hyper-param change

rename distill variable
rename ewa
2026-02-26 13:03:59 +08:00 · 2026-02-25 21:15:41 +08:00 · 2026-02-25 17:36:27 +08:00 · 2026-02-24 20:35:56 +08:00 · 2026-02-06 17:38:54 +08:00 · 2026-02-05 18:46:37 +08:00
20 changed files with 301 additions and 360 deletions
--- a/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml
+++ b/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml
@@ -1,4 +1,4 @@
-# PromptSRC: Prompting with Self-regularizing constraints
+# DZGCoOp: Dual-branch Zero-shot Guidance CoOp
 DATALOADER:
  TRAIN_X:
    BATCH_SIZE: 4
@@ -30,14 +30,15 @@ MODEL:
    NAME: "ViT-B/16"

 TRAINER:
-  PROMPTSRC:
+  DZGCOOP:
    N_CTX_VISION: 4
    N_CTX_TEXT: 4
    CTX_INIT: "a photo of a"
    PREC: "fp16"
    PROMPT_DEPTH_VISION: 9
    PROMPT_DEPTH_TEXT: 9
-    TEXT_LOSS_WEIGHT: 25
-    IMAGE_LOSS_WEIGHT: 10
-    GPA_MEAN: 15
-    GPA_STD: 1
+    IMAGE_LOSS_WEIGHT: 8
+    TEXT_LOSS_WEIGHT_STRONG: 24
+    TEXT_LOSS_WEIGHT_WEAK: 8
+    EWA_MEAN: 15
+    EWA_STD: 1
--- a/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets.yaml
+++ b/configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets.yaml
@@ -1,4 +1,4 @@
-# PromptSRC: Prompting with Self-regularizing constraints
+# DZGCoOp: Dual-branch Zero-shot Guidance CoOp
 DATALOADER:
  TRAIN_X:
    BATCH_SIZE: 4
@@ -23,6 +23,7 @@ OPTIM:
  WARMUP_CONS_LR: 1e-5

 TRAIN:
+  CHECKPOINT_FREQ: 5
  PRINT_FREQ: 20

 MODEL:
@@ -30,7 +31,7 @@ MODEL:
    NAME: "ViT-B/16"

 TRAINER:
-  PROMPTSRC:
+  DZGCOOP:
    N_CTX_VISION: 4
    N_CTX_TEXT: 4
    CTX_INIT: "a photo of a"
@@ -39,5 +40,5 @@ TRAINER:
    PROMPT_DEPTH_TEXT: 3
    TEXT_LOSS_WEIGHT: 25
    IMAGE_LOSS_WEIGHT: 10
-    GPA_MEAN: 6
-    GPA_STD: 10
+    EWA_MEAN: 6
+    EWA_STD: 10
--- a/configs/trainers/DZGCoOp/vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets.yaml
+++ b/configs/trainers/DZGCoOp/vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets.yaml
@@ -1,4 +1,4 @@
-# PromptSRC: Prompting with Self-regularizing constraints
+# DZGCoOp: Dual-branch Zero-shot Guidance CoOp
 DATALOADER:
  TRAIN_X:
    BATCH_SIZE: 4
@@ -16,7 +16,7 @@ INPUT:
 OPTIM:
  NAME: "sgd"
  LR: 0.0025
-  MAX_EPOCH: 50
+  MAX_EPOCH: 5
  LR_SCHEDULER: "cosine"
  WARMUP_EPOCH: 1
  WARMUP_TYPE: "constant"
@@ -30,18 +30,14 @@ MODEL:
    NAME: "ViT-B/16"

 TRAINER:
-  PROMPTSRC:
+  DZGCOOP:
    N_CTX_VISION: 4
    N_CTX_TEXT: 4
    CTX_INIT: "a photo of a"
    PREC: "fp16"
-    PROMPT_DEPTH_VISION: 9
-    PROMPT_DEPTH_TEXT: 9
+    PROMPT_DEPTH_VISION: 3
+    PROMPT_DEPTH_TEXT: 3
    TEXT_LOSS_WEIGHT: 25
    IMAGE_LOSS_WEIGHT: 10
-# Use the below configuration for: ImageNet, Caltech101, OxfordPets, Food101, UCF101 and  SUN397
-    GPA_MEAN: 30
-    GPA_STD: 30
-# Use the below configuration for: StanfordCars, Flowers102, FGVCAircraft, DTD and EuroSAT
-#    GPA_MEAN: 45
-#    GPA_STD: 5
+    EWA_MEAN: 6
+    EWA_STD: 10
--- a/docs/TRAIN.md
+++ b/docs/TRAIN.md
@@ -11,7 +11,7 @@ Training PromptSRC on ImageNet for 20 epochs takes around 6 hours for a single s
 ## PromptSRC

 #### (1) Base-to-Novel class generalization setting
-The base-to-novel PromptSRC configuration is provided in config file at `configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml`. All hyper-parameters such as GPA STD, GPA Mean, SCL loss weights coefficients, prompt length and prompt depth etc., can be modified using this config file.
+The base-to-novel PromptSRC configuration is provided in config file at `configs/trainers/PromptSRC/vit_b16_c2_ep20_batch4_4+4ctx.yaml`. All hyper-parameters such as EWA STD, EWA Mean, SCL loss weights coefficients, prompt length and prompt depth etc., can be modified using this config file.

 Run the commands below to train PromptSRC on ImageNet.

--- a/extract_acc.py
+++ b/extract_acc.py
@@ -109,7 +109,7 @@ def print_model_results(results, model_name):

 def main():
    root_dir = 'output'  # 修改为你的output目录路径
-    target_model = 'PromptSRC'  # 指定要分析的模型
+    target_model = 'DZGCoOp'  # 指定要分析的模型
    
    results = collect_model_results(root_dir, target_model)
    print_model_results(results, target_model)
--- a/scripts/dzgcoop/base2new_all.sh
+++ b/scripts/dzgcoop/base2new_all.sh
@@ -0,0 +1,22 @@
+seeds=(1 2 3)
+datasets=(
+    "ucf101"
+    "eurosat"
+    "oxford_pets"
+    "food101"
+    "oxford_flowers"
+    "dtd"
+    "caltech101"
+    "fgvc_aircraft"
+    "stanford_cars"
+    "sun397"
+    # "imagenet"
+)
+
+for dataset in "${datasets[@]}"; do
+    for seed in "${seeds[@]}"; do
+        bash scripts/dzgcoop/base2new_train.sh "$dataset" "$seed"
+        bash scripts/dzgcoop/base2new_test.sh "$dataset" "$seed"
+    done
+done
+
--- a/scripts/promptsrc/base2new_test.sh
+++ b/scripts/promptsrc/base2new_test.sh
@@ -3,7 +3,7 @@

 # custom config
 DATA="~/Datasets/CoOp"
-TRAINER=PromptSRC
+TRAINER=DZGCoOp

 DATASET=$1
 SEED=$2
--- a/scripts/promptsrc/base2new_train.sh
+++ b/scripts/promptsrc/base2new_train.sh
@@ -2,7 +2,7 @@

 # custom config
 DATA="~/Datasets/CoOp"
-TRAINER=PromptSRC
+TRAINER=DZGCoOp

 DATASET=$1
 SEED=$2
--- a/scripts/dzgcoop/xd_train.sh
+++ b/scripts/dzgcoop/xd_train.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+
+DATA=" ~/Datasets/CoOp"
+TRAINER=DZGCoOp
+SRC_DATASETS=imagenet
+SHOTS=16
+CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
+
+
+for SEED in 1 2 3
+do
+    DIR=output_xd/base2new/train_base/${SRC_DATASETS}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
+
+    if [ -d "$DIR" ]; then
+        echo "Results are available in ${DIR}. Skip this job"
+    else
+        echo "Run this job and save the output to ${DIR}"
+
+        CUDA_VISIBLE_DEVICES=0 python train.py \
+        --root ${DATA} \
+        --seed ${SEED} \
+        --trainer ${TRAINER} \
+        --dataset-config-file configs/datasets/${SRC_DATASETS}.yaml \
+        --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
+        --output-dir ${DIR} \
+	DATASET.NUM_SHOTS ${SHOTS}
+    fi
+done
+
--- a/scripts/dzgcoop/xda_test.sh
+++ b/scripts/dzgcoop/xda_test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+
+# custom config
+DATA=" ~/Datasets/CoOp"
+TRAINER=DZGCoOp
+
+
+SRC_DATASETS=imagenet 
+SHOTS=16
+CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
+LOADEP=20
+
+
+DATASETS=(dtd eurosat fgvc_aircraft food101 oxford_flowers oxford_pets stanford_cars ucf101 caltech101 sun397)
+SEEDS=(1 2 3)
+
+
+for DATASET in "${DATASETS[@]}"
+do
+    for SEED in "${SEEDS[@]}"
+    do
+        MODEL_DIR=output_xd/base2new/train_base/${SRC_DATASETS}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
+        
+        DIR=output_xd/base2new/test_new/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
+
+        if [ -d "$DIR" ]; then
+            echo "Results are available in ${DIR}. Skip this job"
+        else
+            echo "Run this job and save the output to ${DIR}"
+            echo "Loading model from ${MODEL_DIR}"
+
+            CUDA_VISIBLE_DEVICES=0 python train.py \
+            --root ${DATA} \
+            --seed ${SEED} \
+            --trainer ${TRAINER} \
+            --dataset-config-file configs/datasets/${DATASET}.yaml \
+            --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
+            --output-dir ${DIR} \
+            --model-dir ${MODEL_DIR} \
+            --load-epoch ${LOADEP} \
+            --eval-only
+        fi
+    done
+done
+
--- a/scripts/dzgcoop/xdo_test.sh
+++ b/scripts/dzgcoop/xdo_test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+
+# custom config
+DATA=" ~/Datasets/CoOp"
+TRAINER=DZGCoOp
+
+
+SRC_DATASETS=imagenet 
+SHOTS=16
+CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
+LOADEP=20
+
+
+DATASETS=(imagenetv2 imagenet_sketch imagenet_a imagenet_r)
+SEEDS=(1 2 3)
+
+
+for DATASET in "${DATASETS[@]}"
+do
+    for SEED in "${SEEDS[@]}"
+    do
+        MODEL_DIR=output_xd/base2new/train_base/${SRC_DATASETS}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
+        
+        DIR=output_xd/base2new/test_new/${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
+
+        if [ -d "$DIR" ]; then
+            echo "Results are available in ${DIR}. Skip this job"
+        else
+            echo "Run this job and save the output to ${DIR}"
+            echo "Loading model from ${MODEL_DIR}"
+
+            CUDA_VISIBLE_DEVICES=0 python train.py \
+            --root ${DATA} \
+            --seed ${SEED} \
+            --trainer ${TRAINER} \
+            --dataset-config-file configs/datasets/${DATASET}.yaml \
+            --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
+            --output-dir ${DIR} \
+            --model-dir ${MODEL_DIR} \
+            --load-epoch ${LOADEP} \
+            --eval-only
+        fi
+    done
+done
+
--- a/scripts/promptsrc/base2new_all.sh
+++ b/scripts/promptsrc/base2new_all.sh
@@ -1,22 +0,0 @@
-seeds=(1 2 3)
-datasets=(
-    # "ucf101"
-    # "eurosat"
-    # "oxford_pets"
-    # "food101"
-    # "oxford_flowers"
-    # "dtd"
-    # "caltech101"
-    # "fgvc_aircraft"
-    # "stanford_cars"
-    # "sun397"
-    "imagenet"
-)
-
-for dataset in "${datasets[@]}"; do
-    for seed in "${seeds[@]}"; do
-        bash scripts/promptsrc/base2new_train.sh "$dataset" "$seed"
-        bash scripts/promptsrc/base2new_test.sh "$dataset" "$seed"
-    done
-done
-
--- a/scripts/promptsrc/few_shot.sh
+++ b/scripts/promptsrc/few_shot.sh
@@ -1,27 +0,0 @@
-#!/bin/bash
-
-# custom config
-DATA="/path/to/dataset/folder"
-TRAINER=PromptSRC
-
-DATASET=$1
-CFG=vit_b16_c2_ep50_batch4_4+4ctx_few_shot
-SHOTS=$2
-
-for SEED in 1 2 3
-do
-    DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
-    if [ -d "$DIR" ]; then
-        echo " The results exist at ${DIR}"
-    else
-        echo "Run this job and save the output to ${DIR}"
-        python train.py \
-        --root ${DATA} \
-        --seed ${SEED} \
-        --trainer ${TRAINER} \
-        --dataset-config-file configs/datasets/${DATASET}.yaml \
-        --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
-        --output-dir ${DIR} \
-        DATASET.NUM_SHOTS ${SHOTS}
-    fi
-done
--- a/scripts/promptsrc/reproduce_base2novel_setting.sh
+++ b/scripts/promptsrc/reproduce_base2novel_setting.sh
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-# custom config
-DATA="/path/to/dataset/folder"
-TRAINER=PromptSRC
-
-DATASET=$1
-SEED=$2
-WEIGHTSPATH=$3
-
-CFG=vit_b16_c2_ep20_batch4_4+4ctx
-SHOTS=16
-LOADEP=20
-SUB_base=base
-SUB_novel=new
-
-COMMON_DIR=${DATASET}/shots_${SHOTS}/${TRAINER}/${CFG}/seed${SEED}
-MODEL_DIR=${WEIGHTSPATH}/base/seed${SEED}
-DIR_base=output/base2new/test_${SUB_base}/${COMMON_DIR}
-DIR_novel=output/base2new/test_${SUB_novel}/${COMMON_DIR}
-if [ -d "$DIR" ]; then
-    echo "Results are already available in ${DIR}. Skipping..."
-else
-    echo "Evaluating model"
-    echo "Runing the first phase job and save the output to ${DIR}"
-    # Evaluate on base classes
-    python train.py \
-    --root ${DATA} \
-    --seed ${SEED} \
-    --trainer ${TRAINER} \
-    --dataset-config-file configs/datasets/${DATASET}.yaml \
-    --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
-    --output-dir ${DIR_base} \
-    --model-dir ${MODEL_DIR} \
-    --load-epoch ${LOADEP} \
-    --eval-only \
-    DATASET.NUM_SHOTS ${SHOTS} \
-    DATASET.SUBSAMPLE_CLASSES ${SUB_base}
-
-    # Evaluate on novel classes
-    python train.py \
-    --root ${DATA} \
-    --seed ${SEED} \
-    --trainer ${TRAINER} \
-    --dataset-config-file configs/datasets/${DATASET}.yaml \
-    --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
-    --output-dir ${DIR_novel} \
-    --model-dir ${MODEL_DIR} \
-    --load-epoch ${LOADEP} \
-    --eval-only \
-    DATASET.NUM_SHOTS ${SHOTS} \
-    DATASET.SUBSAMPLE_CLASSES ${SUB_novel}
-
-fi
--- a/scripts/promptsrc/reproduce_few_shot.sh
+++ b/scripts/promptsrc/reproduce_few_shot.sh
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-# custom config
-DATA="/path/to/dataset/folder"
-TRAINER=PromptSRC
-
-DATASET=$1
-SHOTS=$2
-WEIGHTSPATH=$3
-
-CFG=vit_b16_c2_ep50_batch4_4+4ctx_few_shot
-LOADEP=50
-
-for SEED in 1 2 3
-do
-    MODEL_DIR=${WEIGHTSPATH}/${SHOTS}shot/seed${SEED}
-    DIR=output/few_shot/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
-    if [ -d "$DIR" ]; then
-        echo " The results exist at ${DIR}"
-    else
-        echo "Run this job and save the output to ${DIR}"
-        python train.py \
-        --root ${DATA} \
-        --seed ${SEED} \
-        --trainer ${TRAINER} \
-        --dataset-config-file configs/datasets/${DATASET}.yaml \
-        --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
-        --output-dir ${DIR} \
-        --model-dir ${MODEL_DIR} \
-        --load-epoch ${LOADEP} \
-        --eval-only \
-        DATASET.NUM_SHOTS ${SHOTS}
-    fi
-done
--- a/scripts/promptsrc/reproduce_xd.sh
+++ b/scripts/promptsrc/reproduce_xd.sh
@@ -1,36 +0,0 @@
-#!/bin/bash
-
-# custom config
-DATA="/path/to/dataset/folder"
-TRAINER=PromptSRC
-
-DATASET=$1
-SEED=$2
-WEIGHTSPATH=$3
-
-CFG=vit_b16_c2_ep20_batch4_4+4ctx_cross_datasets
-SHOTS=16
-LOADEP=20
-
-MODEL_DIR=${WEIGHTSPATH}/seed${SEED}
-
-DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED}
-if [ -d "$DIR" ]; then
-    echo "Results are already available in ${DIR}. Skipping..."
-else
-    echo "Evaluating model"
-    echo "Runing the first phase job and save the output to ${DIR}"
-    # Evaluate on evaluation datasets
-    python train.py \
-    --root ${DATA} \
-    --seed ${SEED} \
-    --trainer ${TRAINER} \
-    --dataset-config-file configs/datasets/${DATASET}.yaml \
-    --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
-    --output-dir ${DIR} \
-    --model-dir ${MODEL_DIR} \
-    --load-epoch ${LOADEP} \
-    --eval-only \
-    DATASET.NUM_SHOTS ${SHOTS} \
-
-fi
--- a/scripts/promptsrc/xd_test.sh
+++ b/scripts/promptsrc/xd_test.sh
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-
-# custom config
-DATA="/path/to/dataset/folder"
-TRAINER=PromptSRC
-
-DATASET=$1
-SEED=$2
-
-CFG=vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets
-SHOTS=16
-
-
-DIR=output/evaluation/${TRAINER}/${CFG}_${SHOTS}shots/${DATASET}/seed${SEED}
-if [ -d "$DIR" ]; then
-    echo "Results are available in ${DIR}. Skip this job"
-else
-    echo "Run this job and save the output to ${DIR}"
-
-    python train.py \
-    --root ${DATA} \
-    --seed ${SEED} \
-    --trainer ${TRAINER} \
-    --dataset-config-file configs/datasets/${DATASET}.yaml \
-    --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
-    --output-dir ${DIR} \
-    --model-dir output/imagenet/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED} \
-    --load-epoch 20 \
-    --eval-only
-fi
--- a/scripts/promptsrc/xd_train.sh
+++ b/scripts/promptsrc/xd_train.sh
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-
-# custom config
-DATA="/path/to/dataset/folder"
-TRAINER=PromptSRC
-
-DATASET=$1
-SEED=$2
-
-CFG=vit_b16_c2_ep5_batch4_4+4ctx_cross_datasets
-SHOTS=16
-
-
-DIR=output/${DATASET}/${TRAINER}/${CFG}_${SHOTS}shots/seed${SEED}
-if [ -d "$DIR" ]; then
-    echo "Results are available in ${DIR}."
-else
-    echo "Run this job and save the output to ${DIR}"
-
-    python train.py \
-    --root ${DATA} \
-    --seed ${SEED} \
-    --trainer ${TRAINER} \
-    --dataset-config-file configs/datasets/${DATASET}.yaml \
-    --config-file configs/trainers/${TRAINER}/${CFG}.yaml \
-    --output-dir ${DIR} \
-    DATASET.NUM_SHOTS ${SHOTS}
-fi
--- a/train.py
+++ b/train.py
@@ -28,7 +28,7 @@ import trainers.cocoop
 import trainers.zsclip
 import trainers.maple
 import trainers.independentVL
-import trainers.promptsrc
+import trainers.dzgcoop


 def print_args(args, cfg):
@@ -110,18 +110,19 @@ def extend_cfg(cfg):
    cfg.TRAINER.MAPLE.PROMPT_DEPTH = 9  # Max 12, minimum 0, for 1 it will act as shallow MaPLe (J=1)
    cfg.DATASET.SUBSAMPLE_CLASSES = "all"  # all, base or new

-    # Config for PromptSRC
-    cfg.TRAINER.PROMPTSRC = CN()
-    cfg.TRAINER.PROMPTSRC.N_CTX_VISION = 4  # number of context vectors at the vision branch
-    cfg.TRAINER.PROMPTSRC.N_CTX_TEXT = 4  # number of context vectors at the language branch
-    cfg.TRAINER.PROMPTSRC.CTX_INIT = "a photo of a"  # initialization words
-    cfg.TRAINER.PROMPTSRC.PREC = "fp16"  # fp16, fp32, amp
-    cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_VISION = 9  # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1)
-    cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_TEXT = 9  # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1)
-    cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT = 25
-    cfg.TRAINER.PROMPTSRC.IMAGE_LOSS_WEIGHT = 10
-    cfg.TRAINER.PROMPTSRC.GPA_MEAN = 15
-    cfg.TRAINER.PROMPTSRC.GPA_STD = 1
+    # Config for DZGCoOp
+    cfg.TRAINER.DZGCOOP = CN()
+    cfg.TRAINER.DZGCOOP.N_CTX_VISION = 4  # number of context vectors at the vision branch
+    cfg.TRAINER.DZGCOOP.N_CTX_TEXT = 4  # number of context vectors at the language branch
+    cfg.TRAINER.DZGCOOP.CTX_INIT = "a photo of a"  # initialization words
+    cfg.TRAINER.DZGCOOP.PREC = "fp16"  # fp16, fp32, amp
+    cfg.TRAINER.DZGCOOP.PROMPT_DEPTH_VISION = 9  # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1)
+    cfg.TRAINER.DZGCOOP.PROMPT_DEPTH_TEXT = 9  # Max 12, minimum 0, for 0 it will be using shallow IVLP prompting (J=1)
+    cfg.TRAINER.DZGCOOP.TEXT_LOSS_WEIGHT_STRONG = 25  # lambda2: strong text constraint weight
+    cfg.TRAINER.DZGCOOP.TEXT_LOSS_WEIGHT_WEAK = 10  # lambda3: weak text constraint weight
+    cfg.TRAINER.DZGCOOP.IMAGE_LOSS_WEIGHT = 10
+    cfg.TRAINER.DZGCOOP.EWA_MEAN = 15
+    cfg.TRAINER.DZGCOOP.EWA_STD = 1
    cfg.DATASET.SUBSAMPLE_CLASSES = "all"  # all, base or new

    # Config for independent Vision Language prompting (independent-vlp)
--- a/trainers/promptsrc.py
+++ b/trainers/promptsrc.py
@@ -51,10 +51,10 @@ def load_clip_to_cpu(cfg, zero_shot_model=False):
        state_dict = torch.load(model_path, map_location="cpu")
    if not zero_shot_model:
        design_details = {"trainer": 'IVLP',
-                          "vision_depth": cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_VISION,
-                          "language_depth": cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_TEXT,
-                          "vision_ctx": cfg.TRAINER.PROMPTSRC.N_CTX_VISION,
-                          "language_ctx": cfg.TRAINER.PROMPTSRC.N_CTX_TEXT}
+                           "vision_depth": cfg.TRAINER.DZGCOOP.PROMPT_DEPTH_VISION,
+                           "language_depth": cfg.TRAINER.DZGCOOP.PROMPT_DEPTH_TEXT,
+                           "vision_ctx": cfg.TRAINER.DZGCOOP.N_CTX_VISION,
+                           "language_ctx": cfg.TRAINER.DZGCOOP.N_CTX_TEXT}
        model = clip.build_model(state_dict or model.state_dict(), design_details)
    else:
        # Return original CLIP model for generating frozen VL features
@@ -95,11 +95,11 @@ class VLPromptLearner(nn.Module):
        super().__init__()
        n_cls = len(classnames)
        # Make sure Language depth >= 1
-        assert cfg.TRAINER.PROMPTSRC.PROMPT_DEPTH_TEXT >= 1, "In Independent VL prompting, Language prompt depth should be >=1" \
+        assert cfg.TRAINER.DZGCOOP.PROMPT_DEPTH_TEXT >= 1, "In Independent VL prompting, Language prompt depth should be >=1" \
                                                        "\nPlease use VPT trainer if you want to learn only vision " \
                                                        "branch"
-        n_ctx = cfg.TRAINER.PROMPTSRC.N_CTX_TEXT
-        ctx_init = cfg.TRAINER.PROMPTSRC.CTX_INIT
+        n_ctx = cfg.TRAINER.DZGCOOP.N_CTX_TEXT
+        ctx_init = cfg.TRAINER.DZGCOOP.CTX_INIT
        dtype = clip_model.dtype
        ctx_dim = clip_model.ln_final.weight.shape[0]
        clip_imsize = clip_model.visual.input_resolution
@@ -107,28 +107,32 @@ class VLPromptLearner(nn.Module):
        assert cfg_imsize == clip_imsize, f"cfg_imsize ({cfg_imsize}) must equal to clip_imsize ({clip_imsize})"

        if ctx_init and n_ctx <= 4:
-            # use given words to initialize context vectors
            ctx_init = ctx_init.replace("_", " ")
-            n_ctx = n_ctx
            prompt = clip.tokenize(ctx_init)
            with torch.no_grad():
                embedding = clip_model.token_embedding(prompt).type(dtype)
-            ctx_vectors = embedding[0, 1: 1 + n_ctx, :]
-            prompt_prefix = ctx_init
+            ctx_vectors_strong = embedding[0, 1: 1 + n_ctx, :]
+            prompt_prefix_strong = ctx_init
        else:
-            # random initialization
-            ctx_vectors = torch.empty(n_ctx, ctx_dim, dtype=dtype)
-            nn.init.normal_(ctx_vectors, std=0.02)
-            prompt_prefix = " ".join(["X"] * n_ctx)
-        print(f"Independent V-L design")
-        print(f'Initial text context: "{prompt_prefix}"')
+            ctx_vectors_strong = torch.empty(n_ctx, ctx_dim, dtype=dtype)
+            nn.init.normal_(ctx_vectors_strong, std=0.02)
+            prompt_prefix_strong = " ".join(["X"] * n_ctx)
+
+        ctx_vectors_weak = torch.empty(n_ctx, ctx_dim, dtype=dtype)
+        nn.init.normal_(ctx_vectors_weak, std=0.02)
+        prompt_prefix_weak = " ".join(["X"] * n_ctx)
+
+        print(f"Independent V-L design with Dual Prompt Branches")
+        print(f'Strong branch initial text context: "{prompt_prefix_strong}"')
+        print(f'Weak branch initial text context: "{prompt_prefix_weak}"')
        print(f"Number of context words (tokens) for Language prompting: {n_ctx}")
-        print(f"Number of context words (tokens) for Vision prompting: {cfg.TRAINER.PROMPTSRC.N_CTX_VISION}")
-        self.ctx = nn.Parameter(ctx_vectors)
+        print(f"Number of context words (tokens) for Vision prompting: {cfg.TRAINER.DZGCOOP.N_CTX_VISION}")
+        self.ctx_strong = nn.Parameter(ctx_vectors_strong)
+        self.ctx_weak = nn.Parameter(ctx_vectors_weak)

        classnames = [name.replace("_", " ") for name in classnames]
        name_lens = [len(_tokenizer.encode(name)) for name in classnames]
-        prompts = [prompt_prefix + " " + name + "." for name in classnames]
+        prompts = [prompt_prefix_strong + " " + name + "." for name in classnames]

        tokenized_prompts = torch.cat([clip.tokenize(p) for p in prompts])  # (n_cls, n_tkn)
        # Also create frozen CLIP
@@ -138,7 +142,7 @@ class VLPromptLearner(nn.Module):
            embedding = clip_model.token_embedding(tokenized_prompts).type(dtype)
            self.ZS_image_encoder = clip_model_temp_image.visual
            # Now pre-compute the frozen VL embeddings from LLM descriptions
-            all_teacher_features = []
+            semantic_guidance_features = []
            desc_file = f"./desc/{DESC_LLM}/descriptions_top{DESC_TOPK}/{cfg.DATASET.NAME}.json"
            with open(desc_file, "r") as f:
                all_desc = json.load(f)
@@ -151,9 +155,9 @@ class VLPromptLearner(nn.Module):
                    cls_feature = clip_model_temp.encode_text(cls_token)
                    cls_feature = cls_feature / cls_feature.norm(dim=-1, keepdim=True)
                    cls_feature = torch.mean(cls_feature, dim=0)
-                all_teacher_features.append(cls_feature)
+                semantic_guidance_features.append(cls_feature)

-        self.fixed_embeddings = torch.stack(all_teacher_features)
+        self.semantic_embeddings = torch.stack(semantic_guidance_features)
        print(f"Using LLM descriptions from: {desc_file}")
        # These token vectors will be saved when in save_model(),
        # but they should be ignored in load_model() as we want to use
@@ -188,15 +192,19 @@ class VLPromptLearner(nn.Module):
        return prompts

    def forward(self):
-        ctx = self.ctx
-        if ctx.dim() == 2:
-            ctx = ctx.unsqueeze(0).expand(self.n_cls, -1, -1)
+        ctx_strong = self.ctx_strong
+        ctx_weak = self.ctx_weak
+        
+        if ctx_strong.dim() == 2:
+            ctx_strong = ctx_strong.unsqueeze(0).expand(self.n_cls, -1, -1)
+            ctx_weak = ctx_weak.unsqueeze(0).expand(self.n_cls, -1, -1)

        prefix = self.token_prefix
        suffix = self.token_suffix
-        prompts = self.construct_prompts(ctx, prefix, suffix)
+        prompts_strong = self.construct_prompts(ctx_strong, prefix, suffix)
+        prompts_weak = self.construct_prompts(ctx_weak, prefix, suffix)

-        return prompts
+        return prompts_strong, prompts_weak


 class CustomCLIP(nn.Module):
@@ -215,35 +223,47 @@ class CustomCLIP(nn.Module):
        tokenized_prompts = self.tokenized_prompts
        logit_scale = self.logit_scale.exp()

-        prompts = self.prompt_learner()
-        # Compute the prompted image and text features
-        text_features = self.text_encoder(prompts, tokenized_prompts)
+        prompts_strong, prompts_weak = self.prompt_learner()
+
+        with torch.no_grad():
+            zero_shot_features = self.prompt_learner.ZS_image_encoder(image.type(self.dtype))
+            zero_shot_features = zero_shot_features / zero_shot_features.norm(dim=-1, keepdim=True)
+
        image_features = self.image_encoder(image.type(self.dtype))
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)
-        text_features = text_features / text_features.norm(dim=-1, keepdim=True)
-        # Compute the prompted logits
-        logits = logit_scale * image_features @ text_features.t()
-        if self.prompt_learner.training:
-            # Now calculate the frozen pre-trained features
-            fixed_embeddings = self.prompt_learner.fixed_embeddings  # precomputed pre-trained frozen textual features
-            fixed_embeddings = fixed_embeddings / fixed_embeddings.norm(dim=-1, keepdim=True)
-            with torch.no_grad():
-                zero_shot_features = self.prompt_learner.ZS_image_encoder(image.type(self.dtype))
-                zero_shot_features = zero_shot_features / zero_shot_features.norm(dim=-1, keepdim=True)
-                # Compute pre-trained frozen visual features
-                zero_shot_logits = logit_scale * zero_shot_features.cuda() @ fixed_embeddings.half().cuda().t()

-            return F.cross_entropy(logits,
-                                   label), text_features, fixed_embeddings, zero_shot_features, \
-                   image_features, zero_shot_logits, logits
+        text_features_strong = self.text_encoder(prompts_strong, tokenized_prompts)
+        text_features_strong = text_features_strong / text_features_strong.norm(dim=-1, keepdim=True)
+
+        text_features_weak = self.text_encoder(prompts_weak, tokenized_prompts)
+        text_features_weak = text_features_weak / text_features_weak.norm(dim=-1, keepdim=True)
+
+        semantic_embeddings = self.prompt_learner.semantic_embeddings
+        semantic_embeddings = semantic_embeddings / semantic_embeddings.norm(dim=-1, keepdim=True)
+
+        zero_shot_logits = logit_scale * zero_shot_features.cuda() @ semantic_embeddings.half().cuda().t()
+
+        logits_strong = logit_scale * image_features @ text_features_strong.t()
+        logits_weak = logit_scale * image_features @ text_features_weak.t()
+
+        zs_probs = F.softmax(zero_shot_logits, dim=1)
+        confidence = zs_probs.max(dim=1).values
+
+        alpha = confidence.unsqueeze(1)
+
+        logits_final = alpha * logits_strong + (1 - alpha) * logits_weak
+
+        if self.prompt_learner.training:
+            loss_ce = F.cross_entropy(logits_final, label)
+            return loss_ce, text_features_strong, text_features_weak, semantic_embeddings, zero_shot_features, image_features, zero_shot_logits, logits_strong, logits_weak, logits_final
        else:
-            return logits
+            return logits_final


@TRAINER_REGISTRY.register()
-class PromptSRC(TrainerX):
+class DZGCoOp(TrainerX):
    def check_cfg(self, cfg):
-        assert cfg.TRAINER.PROMPTSRC.PREC in ["fp16", "fp32", "amp"]
+        assert cfg.TRAINER.DZGCOOP.PREC in ["fp16", "fp32", "amp"]

    def build_model(self):
        cfg = self.cfg
@@ -252,7 +272,7 @@ class PromptSRC(TrainerX):
        print(f"Loading CLIP (backbone: {cfg.MODEL.BACKBONE.NAME})")
        clip_model = load_clip_to_cpu(cfg)

-        if cfg.TRAINER.PROMPTSRC.PREC == "fp32" or cfg.TRAINER.PROMPTSRC.PREC == "amp":
+        if cfg.TRAINER.DZGCOOP.PREC == "fp32" or cfg.TRAINER.DZGCOOP.PREC == "amp":
            # CLIP's default precision is fp16
            clip_model.float()

@@ -292,20 +312,20 @@ class PromptSRC(TrainerX):
        self.total_epochs = cfg.OPTIM.MAX_EPOCH
        self.step_counter = 1
        N = cfg.OPTIM.MAX_EPOCH
-        mean = cfg.TRAINER.PROMPTSRC.GPA_MEAN
-        stdev = cfg.TRAINER.PROMPTSRC.GPA_STD
-        gauss = self.get_gauss(mean, stdev)
-        self.gauss = np.array([gauss(a) for a in range(1, N + 1)])
-        self.gauss = self.gauss / sum(self.gauss)
-        self.scaler = GradScaler() if cfg.TRAINER.PROMPTSRC.PREC == "amp" else None
+        mean = cfg.TRAINER.DZGCOOP.EWA_MEAN
+        stdev = cfg.TRAINER.DZGCOOP.EWA_STD
+        normal = self.get_normal(mean, stdev)
+        self.normal_weights = np.array([normal(a) for a in range(1, N + 1)])
+        self.normal_weights = self.normal_weights / sum(self.normal_weights)
+        self.scaler = GradScaler() if cfg.TRAINER.DZGCOOP.PREC == "amp" else None
        # Note that multi-gpu training could be slow because CLIP's size is
        # big, which slows down the copy operation in DataParallel
        device_count = torch.cuda.device_count()
        if device_count > 1:
            print(f"Multiple GPUs detected (n_gpus={device_count}), use all of them!")
            self.model = nn.DataParallel(self.model)
-        # Keep model with GPA
-        self.previous_model_gpa = None
+        # Keep model with EWA
+        self.previous_model_ewa = None

    def forward_backward(self, batch):
        image, label = self.parse_batch_train(batch)
@@ -314,7 +334,7 @@ class PromptSRC(TrainerX):
        optim = self.optim
        scaler = self.scaler

-        prec = self.cfg.TRAINER.PROMPTSRC.PREC
+        prec = self.cfg.TRAINER.DZGCOOP.PREC
        if prec == "amp":
            with autocast():
                loss = model(image, label)
@@ -323,23 +343,26 @@ class PromptSRC(TrainerX):
            scaler.step(optim)
            scaler.update()
        else:
-            loss_ce, normalized_text_features, zs_clip_text_embeddings, zs_image_embedd, image_ft, \
-            zero_shot_logits, logits = model(image, label)
-            # Calculate the L_SCL_text loss
-            loss_scl_text = F.l1_loss(normalized_text_features, zs_clip_text_embeddings.cuda(),
-                                      reduction='mean') * self.cfg.TRAINER.PROMPTSRC.TEXT_LOSS_WEIGHT
-            # Calculate the L_SCL_image loss
-            loss_scl_image = F.l1_loss(image_ft, zs_image_embedd.cuda(),
-                                       reduction='mean') * self.cfg.TRAINER.PROMPTSRC.IMAGE_LOSS_WEIGHT
-            # Now calculate L_SCL_logits
-            L_SCL_logits = F.kl_div(
-                F.log_softmax(logits / 1, dim=1),
+            loss_ce, text_features_strong, text_features_weak, semantic_embeddings, zs_image_embedd, image_ft, \
+            zero_shot_logits, logits_strong, logits_weak, logits_final = model(image, label)
+
+            lambda1 = self.cfg.TRAINER.DZGCOOP.IMAGE_LOSS_WEIGHT
+            lambda2 = self.cfg.TRAINER.DZGCOOP.TEXT_LOSS_WEIGHT_STRONG
+            lambda3 = self.cfg.TRAINER.DZGCOOP.TEXT_LOSS_WEIGHT_WEAK
+
+            L_zvg = F.l1_loss(image_ft, zs_image_embedd.cuda(), reduction='mean') * lambda1
+            L_sg_strong = F.l1_loss(text_features_strong, semantic_embeddings.cuda(), reduction='mean') * lambda2
+            L_sg_weak = F.l1_loss(text_features_weak, semantic_embeddings.cuda(), reduction='mean') * lambda3
+
+            L_zpg = F.kl_div(
+                F.log_softmax(logits_final / 1, dim=1),
                F.log_softmax(zero_shot_logits / 1, dim=1),
                reduction='sum',
                log_target=True
-            ) * (1 * 1) / logits.numel()
-            L_SCL = (L_SCL_logits + loss_scl_text + loss_scl_image)
-            loss = (loss_ce + L_SCL)
+            ) * (1 * 1) / logits_final.numel()
+
+            L_zg = (L_zpg + L_sg_strong + L_sg_weak + L_zvg)
+            loss = (loss_ce + L_zg)
            optim.zero_grad()
            loss.backward()
            optim.step()
@@ -348,20 +371,22 @@ class PromptSRC(TrainerX):

        if (self.batch_idx + 1) == self.num_batches:
            self.update_lr()
-            # Means one epoch is completed, perform GPA
+            # Means one epoch is completed, perform EWA
            self.step_counter = self.step_counter + 1
-            current_epoch_weight = self.gauss[self.step_counter - 2]
+            current_epoch_weight = self.normal_weights[self.step_counter - 2]
            current_model_weights = copy.deepcopy(model.state_dict())
+            for key in current_model_weights:
+                current_model_weights[key] = current_model_weights[key].cpu()
            weighted_state_dict = self.state_dict_weighting(current_model_weights, current_epoch_weight)
-            if self.previous_model_gpa is None:
-                self.previous_model_gpa = weighted_state_dict
+            if self.previous_model_ewa is None:
+                self.previous_model_ewa = weighted_state_dict
            else:
-                self.previous_model_gpa = self.state_dict_add(weighted_state_dict, self.previous_model_gpa)
+                self.previous_model_ewa = self.state_dict_add(weighted_state_dict, self.previous_model_ewa)

        if self.step_counter == self.model.total_epochs + 1:
-            print("Using GPA model for final inference...")
-            model.load_state_dict(self.previous_model_gpa)
-            self.model.load_state_dict(self.previous_model_gpa)
+            print("Using EWA model for final inference...")
+            model.load_state_dict(self.previous_model_ewa)
+            self.model.load_state_dict(self.previous_model_ewa)
        return loss_summary

    def state_dict_weighting(self, main_dict, weightage, prompt_only=False):
@@ -369,24 +394,24 @@ class PromptSRC(TrainerX):
        updated_dict = copy.deepcopy(main_dict)
        if not prompt_only:
            for key in main_dict:
-                updated_dict[key] = main_dict[key] * weightage
+                updated_dict[key] = main_dict[key].cpu() * weightage
            return updated_dict
        else:
-            return main_dict * weightage
+            return main_dict.cpu() * weightage

    def state_dict_add(self, dict1, dict2, prompt_only=False):
        # Average all parameters
        if not prompt_only:
            modified_dict = dict2
            for key in dict1:
-                modified_dict[key] = (modified_dict[key] + dict1[key])
+                modified_dict[key] = modified_dict[key].cpu() + dict1[key].cpu()
            return modified_dict
        else:
-            return dict1 + dict2
+            return dict1.cpu() + dict2.cpu()

-    def get_gauss(self, mu, sigma):
-        gauss = lambda x: (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
-        return gauss
+    def get_normal(self, mu, sigma):
+        normal = lambda x: (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
+        return normal

    def parse_batch_train(self, batch):
        input = batch["img"]
@@ -425,6 +450,12 @@ class PromptSRC(TrainerX):
            if "prompt_learner.token_suffix" in state_dict:
                del state_dict["prompt_learner.token_suffix"]

+            # Handle backward compatibility: if old checkpoint has ctx, initialize both ctx_strong and ctx_weak
+            if "prompt_learner.ctx" in state_dict:
+                ctx = state_dict.pop("prompt_learner.ctx")
+                state_dict["prompt_learner.ctx_strong"] = ctx.clone()
+                state_dict["prompt_learner.ctx_weak"] = ctx.clone()
+
            print("Loading weights to {} " 'from "{}" (epoch = {})'.format(name, model_path, epoch))
            # set strict=False
-            self._models[name].load_state_dict(state_dict, strict=False)
+            self._models[name].load_state_dict(state_dict, strict=False)
Author	SHA1	Message	Date
rain-bus	0b6eb7ce5e	hyper-param change	2026-02-26 13:03:59 +08:00
rain-bus	fa3afbcae1	rename distill variable	2026-02-25 21:15:41 +08:00
rain-bus	f26f793937	rename ewa	2026-02-25 17:36:27 +08:00
rain-bus	61864e192a	rename to dzgcoop	2026-02-24 20:35:56 +08:00
rain-bus	f3a7993665	xda xdo script	2026-02-06 17:38:54 +08:00
rain-bus	91e873c365	dual and softmax conf	2026-02-05 18:46:37 +08:00