Compare commits

2 Commits

Author SHA1 Message Date
7fcf319dcf fix conf 2026-02-05 12:12:11 +08:00
1925ddfc86 dual and softmax conf 2026-02-05 12:11:29 +08:00
15 changed files with 268 additions and 151 deletions

View File

@@ -39,4 +39,5 @@ TRAINER:
PROMPT_DEPTH_TEXT: 9
TEXT_LOSS_WEIGHT: 25
IMAGE_LOSS_WEIGHT: 10
LAST_K: 5
GPA_MEAN: 15
GPA_STD: 1

View File

@@ -23,7 +23,6 @@ OPTIM:
WARMUP_CONS_LR: 1e-5
TRAIN:
CHECKPOINT_FREQ: 5
PRINT_FREQ: 20
MODEL:
@@ -40,4 +39,5 @@ TRAINER:
PROMPT_DEPTH_TEXT: 3
TEXT_LOSS_WEIGHT: 25
IMAGE_LOSS_WEIGHT: 10
LAST_K: 5
GPA_MEAN: 6
GPA_STD: 10

View File

@@ -16,7 +16,7 @@ INPUT:
OPTIM:
NAME: "sgd"
LR: 0.0025
MAX_EPOCH: 5
MAX_EPOCH: 50
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
@@ -35,8 +35,13 @@ TRAINER:
N_CTX_TEXT: 4
CTX_INIT: "a photo of a"
PREC: "fp16"
PROMPT_DEPTH_VISION: 3
PROMPT_DEPTH_TEXT: 3
PROMPT_DEPTH_VISION: 9
PROMPT_DEPTH_TEXT: 9
TEXT_LOSS_WEIGHT: 25
IMAGE_LOSS_WEIGHT: 10
LAST_K: 5
# Use the below configuration for: ImageNet, Caltech101, OxfordPets, Food101, UCF101 and SUN397
GPA_MEAN: 30
GPA_STD: 30
# Use the below configuration for: StanfordCars, Flowers102, FGVCAircraft, DTD and EuroSAT
# GPA_MEAN: 45
# GPA_STD: 5

View File

@@ -11,7 +11,7 @@ Training PromptSRC on ImageNet for 20 epochs takes around 6 hours for a single s
## PromptSRC
#### (1) Base-to-Novel class generalization setting
The base-to-novel PromptSRC configuration is provided in config file at `configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml`. All hyper-parameters such as LAST_K, SCL loss weights coefficients, prompt length and prompt depth etc., can be modified using this config file.
The base-to-novel PromptSRC configuration is provided in config file at `configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml`. All hyper-parameters such as GPA STD, GPA Mean, SCL loss weights coefficients, prompt length and prompt depth etc., can be modified using this config file.
Run the commands below to train PromptSRC on ImageNet.

View File

@@ -10,7 +10,7 @@ datasets=(
"fgvc_aircraft"
"stanford_cars"
"sun397"
"imagenet"
# "imagenet"
)
for dataset in "${datasets[@]}"; do

View File

@@ -0,0 +1,27 @@
#!/bin/bash
# custom config
DATA="/path/to/dataset/folder"
TRAINER=PromptSRC
DATASET=$1
CFG=vit_b16_c2_ep50_batch4_4+4ctx_few_shot
SHOTS=$2
for SEED in 1 2 3
do
DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
if [ -d "$DIR" ]; then
echo " The results exist at ${DIR}"
else
echo "Run this job and save the output to ${DIR}"
python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR} \
DATASET.NUM_SHOTS ${SHOTS}
fi
done

View File

@@ -0,0 +1,54 @@
#!/bin/bash
# custom config
DATA="/path/to/dataset/folder"
TRAINER=PromptSRC
DATASET=$1
SEED=$2
WEIGHTSPATH=$3
CFG=vit_b16_c2_ep20_batch4_4+4ctx
SHOTS=16
LOADEP=20
SUB_base=base
SUB_novel=new
COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
MODEL_DIR=${WEIGHTSPATH}/base/seed${SEED}
DIR_base=output/base2new/test_${SUB_base}/${COMMON_DIR}
DIR_novel=output/base2new/test_${SUB_novel}/${COMMON_DIR}
if [ -d "$DIR" ]; then
echo "Results are already available in ${DIR}. Skipping..."
else
echo "Evaluating model"
echo "Runing the first phase job and save the output to ${DIR}"
# Evaluate on base classes
python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR_base} \
--model-dir ${MODEL_DIR} \
--load-epoch ${LOADEP} \
--eval-only \
DATASET.NUM_SHOTS ${SHOTS} \
DATASET.SUBSAMPLE_CLASSES ${SUB_base}
# Evaluate on novel classes
python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR_novel} \
--model-dir ${MODEL_DIR} \
--load-epoch ${LOADEP} \
--eval-only \
DATASET.NUM_SHOTS ${SHOTS} \
DATASET.SUBSAMPLE_CLASSES ${SUB_novel}
fi

View File

@@ -0,0 +1,34 @@
#!/bin/bash
# custom config
DATA="/path/to/dataset/folder"
TRAINER=PromptSRC
DATASET=$1
SHOTS=$2
WEIGHTSPATH=$3
CFG=vit_b16_c2_ep50_batch4_4+4ctx_few_shot
LOADEP=50
for SEED in 1 2 3
do
MODEL_DIR=${WEIGHTSPATH}/${SHOTS}shot/seed${SEED}
DIR=output/few_shot/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
if [ -d "$DIR" ]; then
echo " The results exist at ${DIR}"
else
echo "Run this job and save the output to ${DIR}"
python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR} \
--model-dir ${MODEL_DIR} \
--load-epoch ${LOADEP} \
--eval-only \
DATASET.NUM_SHOTS ${SHOTS}
fi
done

View File

@@ -0,0 +1,36 @@
#!/bin/bash
# custom config
DATA="/path/to/dataset/folder"
TRAINER=PromptSRC
DATASET=$1
SEED=$2
WEIGHTSPATH=$3
CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
SHOTS=16
LOADEP=20
MODEL_DIR=${WEIGHTSPATH}/seed${SEED}
DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED}
if [ -d "$DIR" ]; then
echo "Results are already available in ${DIR}. Skipping..."
else
echo "Evaluating model"
echo "Runing the first phase job and save the output to ${DIR}"
# Evaluate on evaluation datasets
python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR} \
--model-dir ${MODEL_DIR} \
--load-epoch ${LOADEP} \
--eval-only \
DATASET.NUM_SHOTS ${SHOTS} \
fi

View File

@@ -0,0 +1,31 @@
#!/bin/bash
# custom config
DATA="/path/to/dataset/folder"
TRAINER=PromptSRC
DATASET=$1
SEED=$2
CFG=vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets
SHOTS=16
DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED}
if [ -d "$DIR" ]; then
echo "Results are available in ${DIR}. Skip this job"
else
echo "Run this job and save the output to ${DIR}"
python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR} \
--model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} \
--load-epoch 20 \
--eval-only
fi

View File

@@ -1,30 +1,29 @@
#!/bin/bash
DATA=" ~/Datasets/CoOp"
# custom config
DATA="/path/to/dataset/folder"
TRAINER=PromptSRC
SRC_DATASETS=imagenet
DATASET=$1
SEED=$2
CFG=vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets
SHOTS=16
CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
for SEED in 1 2 3
do
DIR=output_xd/base2new/train_base/${SRC_DATASETS}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
if [ -d "$DIR" ]; then
echo "Results are available in ${DIR}. Skip this job"
else
DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
if [ -d "$DIR" ]; then
echo "Results are available in ${DIR}."
else
echo "Run this job and save the output to ${DIR}"
CUDA_VISIBLE_DEVICES=0 python train.py \
python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${SRC_DATASETS}.yaml \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR} \
DATASET.NUM_SHOTS ${SHOTS}
fi
done
fi

View File

@@ -1,46 +0,0 @@
#!/bin/bash
# custom config
DATA=" ~/Datasets/CoOp"
TRAINER=PromptSRC
SRC_DATASETS=imagenet
SHOTS=16
CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
LOADEP=20
DATASETS=(dtd eurosat fgvc_aircraft food101 oxford_flowers oxford_pets stanford_cars ucf101 caltech101 sun397)
SEEDS=(1 2 3)
for DATASET in "${DATASETS[@]}"
do
for SEED in "${SEEDS[@]}"
do
MODEL_DIR=output_xd/base2new/train_base/${SRC_DATASETS}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
DIR=output_xd/base2new/test_new/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
if [ -d "$DIR" ]; then
echo "Results are available in ${DIR}. Skip this job"
else
echo "Run this job and save the output to ${DIR}"
echo "Loading model from ${MODEL_DIR}"
CUDA_VISIBLE_DEVICES=0 python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR} \
--model-dir ${MODEL_DIR} \
--load-epoch ${LOADEP} \
--eval-only
fi
done
done

View File

@@ -1,46 +0,0 @@
#!/bin/bash
# custom config
DATA=" ~/Datasets/CoOp"
TRAINER=PromptSRC
SRC_DATASETS=imagenet
SHOTS=16
CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
LOADEP=20
DATASETS=(imagenetv2 imagenet_sketch imagenet_a imagenet_r)
SEEDS=(1 2 3)
for DATASET in "${DATASETS[@]}"
do
for SEED in "${SEEDS[@]}"
do
MODEL_DIR=output_xd/base2new/train_base/${SRC_DATASETS}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
DIR=output_xd/base2new/test_new/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
if [ -d "$DIR" ]; then
echo "Results are available in ${DIR}. Skip this job"
else
echo "Run this job and save the output to ${DIR}"
echo "Loading model from ${MODEL_DIR}"
CUDA_VISIBLE_DEVICES=0 python train.py \
--root ${DATA} \
--seed ${SEED} \
--trainer ${TRAINER} \
--dataset-config-file configs/datasets/${DATASET}.yaml \
--config-file configs/trainers/${TRAINER}/${CFG}.yaml \
--output-dir ${DIR} \
--model-dir ${MODEL_DIR} \
--load-epoch ${LOADEP} \
--eval-only
fi
done
done

View File

@@ -122,7 +122,11 @@ def extend_cfg(cfg):
cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT_STRONG = 25 # lambda2: strong text constraint weight
cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT_WEAK = 2.5 # lambda3: weak text constraint weight
cfg.TRAINER.PROMPTSRC.IMAGE_LOSS_WEIGHT = 10
cfg.TRAINER.PROMPTSRC.LAST_K = 5
cfg.TRAINER.PROMPTSRC.GPA_MEAN = 15
cfg.TRAINER.PROMPTSRC.GPA_STD = 1
cfg.TRAINER.PROMPTSRC.CONFIDENCE_TYPE = "max_margin" # entropy, max_prob, margin, max_margin
cfg.TRAINER.PROMPTSRC.CONFIDENCE_TEMPERATURE = 2.0 # temperature for confidence calculation
cfg.TRAINER.PROMPTSRC.CONFIDENCE_MOMENTUM = 0.95 # momentum for running confidence
cfg.DATASET.SUBSAMPLE_CLASSES = "all" # all, base or new
# Config for independent Vision Language prompting (independent-vlp)

View File

@@ -106,6 +106,7 @@ class VLPromptLearner(nn.Module):
cfg_imsize = cfg.INPUT.SIZE[0]
assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})"
# Strong constraint branch initialization
if ctx_init and n_ctx <= 4:
ctx_init = ctx_init.replace("_", " ")
prompt = clip.tokenize(ctx_init)
@@ -118,6 +119,7 @@ class VLPromptLearner(nn.Module):
nn.init.normal_(ctx_vectors_strong, std=0.02)
prompt_prefix_strong = " ".join(["X"] * n_ctx)
# Weak constraint branch - random initialization
ctx_vectors_weak = torch.empty(n_ctx, ctx_dim, dtype=dtype)
nn.init.normal_(ctx_vectors_weak, std=0.02)
prompt_prefix_weak = " ".join(["X"] * n_ctx)
@@ -246,10 +248,7 @@ class CustomCLIP(nn.Module):
logits_strong = logit_scale * image_features @ text_features_strong.t()
logits_weak = logit_scale * image_features @ text_features_weak.t()
zs_probs = F.softmax(zero_shot_logits, dim=1)
confidence = zs_probs.max(dim=1).values
alpha = confidence.unsqueeze(1)
alpha = 0.5
logits_final = alpha * logits_strong + (1 - alpha) * logits_weak
@@ -311,8 +310,12 @@ class PromptSRC(TrainerX):
# Cosine scheduler
self.total_epochs = cfg.OPTIM.MAX_EPOCH
self.step_counter = 1
self.max_k = cfg.TRAINER.PROMPTSRC.LAST_K
self.last_k_models = []
N = cfg.OPTIM.MAX_EPOCH
mean = cfg.TRAINER.PROMPTSRC.GPA_MEAN
stdev = cfg.TRAINER.PROMPTSRC.GPA_STD
gauss = self.get_gauss(mean, stdev)
self.gauss = np.array([gauss(a) for a in range(1, N + 1)])
self.gauss = self.gauss / sum(self.gauss)
self.scaler = GradScaler() if cfg.TRAINER.PROMPTSRC.PREC == "amp" else None
# Note that multi-gpu training could be slow because CLIP's size is
# big, which slows down the copy operation in DataParallel
@@ -320,6 +323,8 @@ class PromptSRC(TrainerX):
if device_count > 1:
print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!")
self.model = nn.DataParallel(self.model)
# Keep model with GPA
self.previous_model_gpa = None
def forward_backward(self, batch):
image, label = self.parse_batch_train(batch)
@@ -365,32 +370,45 @@ class PromptSRC(TrainerX):
if (self.batch_idx + 1) == self.num_batches:
self.update_lr()
# Means one epoch is completed, perform GPA
self.step_counter = self.step_counter + 1
current_epoch_weight = self.gauss[self.step_counter - 2]
current_model_weights = copy.deepcopy(model.state_dict())
for key in current_model_weights:
current_model_weights[key] = current_model_weights[key].cpu()
self.last_k_models.append(current_model_weights)
if len(self.last_k_models) > self.max_k:
self.last_k_models.pop(0)
torch.cuda.empty_cache()
weighted_state_dict = self.state_dict_weighting(current_model_weights, current_epoch_weight)
if self.previous_model_gpa is None:
self.previous_model_gpa = weighted_state_dict
else:
self.previous_model_gpa = self.state_dict_add(weighted_state_dict, self.previous_model_gpa)
if self.step_counter == self.model.total_epochs + 1:
print(f"Using Last-K Averaging (K={len(self.last_k_models)}) model for final inference...")
averaged_state_dict = self._average_last_k_models()
for key in averaged_state_dict:
averaged_state_dict[key] = averaged_state_dict[key].cuda()
model.load_state_dict(averaged_state_dict)
self.model.load_state_dict(averaged_state_dict)
print("Using GPA model for final inference...")
model.load_state_dict(self.previous_model_gpa)
self.model.load_state_dict(self.previous_model_gpa)
return loss_summary
def _average_last_k_models(self):
if not self.last_k_models:
return {}
averaged_dict = {}
for key in self.last_k_models[0]:
stacked = torch.stack([model_state[key] for model_state in self.last_k_models])
averaged_dict[key] = torch.mean(stacked, dim=0)
return averaged_dict
def state_dict_weighting(self, main_dict, weightage, prompt_only=False):
# Average all parameters
updated_dict = copy.deepcopy(main_dict)
if not prompt_only:
for key in main_dict:
updated_dict[key] = main_dict[key] * weightage
return updated_dict
else:
return main_dict * weightage
def state_dict_add(self, dict1, dict2, prompt_only=False):
# Average all parameters
if not prompt_only:
modified_dict = dict2
for key in dict1:
modified_dict[key] = (modified_dict[key] + dict1[key])
return modified_dict
else:
return dict1 + dict2
def get_gauss(self, mu, sigma):
gauss = lambda x: (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
return gauss
def parse_batch_train(self, batch):
input = batch["img"]