diff --git a/MSGCoOp/scripts/base2new_all_unc.sh b/MSGCoOp/scripts/base2new_all_unc.sh new file mode 100755 index 0000000..1bf7812 --- /dev/null +++ b/MSGCoOp/scripts/base2new_all_unc.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +TRAINER=$1 +KG_WEIGHT=$2 +MP_WEIGHT=$3 +UNC_TEMPERATURE=$4 + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} ucf101 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} ucf101 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} eurosat ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} eurosat ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} oxford_pets ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} oxford_pets ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} food101 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} food101 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} oxford_flowers ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} oxford_flowers ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} dtd ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} dtd ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} caltech101 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} caltech101 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} fgvc_aircraft ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} fgvc_aircraft ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} stanford_cars ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} stanford_cars ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} sun397 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} sun397 ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} + +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_train_unc.sh ${TRAINER} imagenet ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} +CUDA_VISIBLE_DEVICES=0 bash scripts/base2new_test_unc.sh ${TRAINER} imagenet ${KG_WEIGHT} ${MP_WEIGHT} ${UNC_TEMPERATURE} diff --git a/MSGCoOp/scripts/base2new_test_unc.sh b/MSGCoOp/scripts/base2new_test_unc.sh new file mode 100755 index 0000000..be6d5fb --- /dev/null +++ b/MSGCoOp/scripts/base2new_test_unc.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# custom config +DATA=~/Datasets/CoOp +TRAINER=$1 +DATASET=$2 +N_PROMPTS=4 +KG_WEIGHT=$3 +MP_WEIGHT=$4 +UNC_TEMPERATURE=$5 +#CFG=rn50_ep100 # config file +CFG=vit_b16_ep100_ctxv1 +CTP=end # class token position (end or middle) +NCTX=4 # number of context tokens +SHOTS=16 # number of shots (1, 2, 4, 8, 16) +CSC=False # class-specific context (False or True) + +LOADEP=100 +SUB=new + +for SEED in 1 2 3 +do + COMMON_DIR=${DATASET}/shots_${SHOTS}_${KG_WEIGHT}_unc${UNC_TEMPERATURE}/${TRAINER}/${CFG}/seed${SEED} + MODEL_DIR=output/base2new/train_base/${COMMON_DIR} + DIR=output/base2new/test_${SUB}/${COMMON_DIR} + + + if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + --model-dir ${MODEL_DIR} \ + --load-epoch ${LOADEP} \ + --eval-only \ + TRAINER.COOP.N_PROMPTS ${N_PROMPTS} \ + TRAINER.COOP.N_CTX ${NCTX} \ + TRAINER.COOP.CSC ${CSC} \ + TRAINER.COOP.CLASS_TOKEN_POSITION ${CTP} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES ${SUB} \ + TRAINER.COOP.UNC_ENABLED True \ + TRAINER.COOP.UNC_TEMPERATURE ${UNC_TEMPERATURE} + fi +done diff --git a/MSGCoOp/scripts/base2new_train_unc.sh b/MSGCoOp/scripts/base2new_train_unc.sh new file mode 100755 index 0000000..ef6f65e --- /dev/null +++ b/MSGCoOp/scripts/base2new_train_unc.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# custom config +DATA=~/Datasets/CoOp +TRAINER=$1 +DATASET=$2 +KG_WEIGHT=$3 +MP_WEIGHT=$4 +UNC_TEMPERATURE=$5 +N_PROMPTS=4 +#CFG=rn50_ep100 # config file +CFG=vit_b16_ep100_ctxv1 +CTP=end # class token position (end or middle) +NCTX=4 # number of context tokens +SHOTS=16 # number of shots (1, 2, 4, 8, 16) +CSC=False # class-specific context (False or True) + +for SEED in 1 2 3 +do + DIR=output/base2new/train_base/${DATASET}/shots_${SHOTS}_${KG_WEIGHT}_unc${UNC_TEMPERATURE}/${TRAINER}/${CFG}/seed${SEED} + if [ -d "$DIR" ]; then + echo "Results are available in ${DIR}. Skip this job" + else + echo "Run this job and save the output to ${DIR}" + python train.py \ + --root ${DATA} \ + --seed ${SEED} \ + --trainer ${TRAINER} \ + --dataset-config-file configs/datasets/${DATASET}.yaml \ + --config-file configs/trainers/${TRAINER}/${CFG}.yaml \ + --output-dir ${DIR} \ + TRAINER.COOP.N_CTX ${NCTX} \ + TRAINER.COOP.CSC ${CSC} \ + TRAINER.COOP.W ${KG_WEIGHT} \ + TRAINER.COOP.CLASS_TOKEN_POSITION ${CTP} \ + DATASET.NUM_SHOTS ${SHOTS} \ + DATASET.SUBSAMPLE_CLASSES base \ + TRAINER.COOP.N_PROMPTS ${N_PROMPTS} \ + TRAINER.COOP.DIV_WEIGHT ${MP_WEIGHT} \ + TRAINER.COOP.UNC_ENABLED True \ + TRAINER.COOP.UNC_TEMPERATURE ${UNC_TEMPERATURE} + fi +done diff --git a/MSGCoOp/train.py b/MSGCoOp/train.py index 1258a76..6cbff82 100644 --- a/MSGCoOp/train.py +++ b/MSGCoOp/train.py @@ -105,6 +105,10 @@ def extend_cfg(cfg): cfg.TRAINER.COCOOP.PREC = "fp16" # fp16, fp32, amp cfg.TRAINER.COOP.DIV_WEIGHT = 0.1 cfg.TRAINER.COOP.N_PROMPTS = 3 + + # 不确定性集成配置 + cfg.TRAINER.COOP.UNC_ENABLED = False # 是否启用基于熵的不确定性集成 + cfg.TRAINER.COOP.UNC_TEMPERATURE = 1.0 # 控制权重分布的平滑度 cfg.DATASET.SUBSAMPLE_CLASSES = "all" # all, base or new """ diff --git a/MSGCoOp/trainers/msgcoop.py b/MSGCoOp/trainers/msgcoop.py index 0047355..9e64dff 100644 --- a/MSGCoOp/trainers/msgcoop.py +++ b/MSGCoOp/trainers/msgcoop.py @@ -223,6 +223,41 @@ class Adapter(nn.Module): x = self.fc(x) return x +class UncertaintyPromptIntegrator(nn.Module): + def __init__(self, temperature=1.0): + """ + 基于预测熵的不确定性加权集成器 + + Args: + temperature: 控制权重分布的平滑度,值越大权重分布越平均 + """ + super().__init__() + self.temperature = temperature + + def forward(self, all_logits): + """ + Args: + all_logits: [n_prompts, batch_size, n_classes] + + Returns: + integrated_logits: [batch_size, n_classes] + prompt_weights: [n_prompts, batch_size] + entropy: [n_prompts, batch_size] + """ + n_prompts, batch_size, n_classes = all_logits.shape + + log_probs = F.log_softmax(all_logits, dim=-1) + probs = log_probs.exp() + + entropy = -(probs * log_probs).sum(dim=-1) + + temperature = max(self.temperature, 1e-8) + weights = F.softmax(-entropy / temperature, dim=0) + + integrated_logits = torch.einsum('pb,pbc->bc', weights, all_logits) + + return integrated_logits, weights, entropy + class CustomCLIP(nn.Module): def __init__(self, cfg, classnames, clip_model): super().__init__() @@ -236,6 +271,14 @@ class CustomCLIP(nn.Module): self.dtype = clip_model.dtype self.meta_net = self.prompt_learner.meta_net self.adapter = Adapter(512, 4).to(clip_model.dtype) + + self.use_uncertainty_integration = cfg.TRAINER.COOP.get('UNC_ENABLED', False) + self.unc_temperature = cfg.TRAINER.COOP.get('UNC_TEMPERATURE', 1.0) + + if self.use_uncertainty_integration: + self.unc_integrator = UncertaintyPromptIntegrator( + temperature=self.unc_temperature + ) def compute_diversity_loss(self, text_features): if self.n_prompts == 1: @@ -283,8 +326,15 @@ class CustomCLIP(nn.Module): text_features_i = text_features_i / text_features_i.norm(dim=-1, keepdim=True) logits_i = logit_scale * image_features @ text_features_i.t() all_logits.append(logits_i) - - logits = torch.stack(all_logits).mean(dim=0) + + all_logits = torch.stack(all_logits) + + if self.use_uncertainty_integration: + logits, prompt_weights, entropy = self.unc_integrator(all_logits) + self.last_prompt_weights = prompt_weights.detach() + self.last_entropy = entropy.detach() + else: + logits = all_logits.mean(dim=0) return logits, score, diversity_loss diff --git a/readme.md b/readme.md index ce24d7d..9278432 100644 --- a/readme.md +++ b/readme.md @@ -1,5 +1,8 @@ # MSGCoOp: Visual-Language Prompt Tuning with Knowledge-guided Context Optimization +[![Paper](https://img.shields.io/badge/arXiv-Paper-brightgreen.svg)](https://arxiv.org/abs/2507.21786) +[![Code](https://img.shields.io/badge/Code-GitHub-blueviolet.svg)](https://github.com/Rain-Bus/MSGCoOp) + ## Overview of MSGCoOp We introduce **Multi-prompt Semantic-Guided Context Optimization (MSGCoOp)**, a novel framework that advances CLIP-based prompt tuning for few-shot learning. MSGCoOp addresses the challenge of generalizing to novel classes efficiently, without heavy architectural modifications or expensive computation.