Release of PromptSRC with pretrained models.

This commit is contained in:
uzair khattak
2023-07-13 23:43:31 +05:00
commit 8be7dcff6b
132 changed files with 106641 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "Caltech101"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "DescribableTextures"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "EuroSAT"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "FGVCAircraft"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "Food101"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "ImageNet"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "ImageNetA"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "ImageNetR"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "ImageNetSketch"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "ImageNetV2"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "OxfordFlowers"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "OxfordPets"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "StanfordCars"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "SUN397"

View File

@@ -0,0 +1,2 @@
DATASET:
NAME: "UCF101"

View File

@@ -0,0 +1,35 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 1
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 10
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
COCOOP:
N_CTX: 16
CTX_INIT: ""
PREC: "fp16"

View File

@@ -0,0 +1,35 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 1
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 10
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
COCOOP:
N_CTX: 4
CTX_INIT: ""
PREC: "fp16"

View File

@@ -0,0 +1,35 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 1
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 10
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
COCOOP:
N_CTX: 4
CTX_INIT: "a photo of a"
PREC: "fp16"

View File

@@ -0,0 +1,35 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 1
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 10
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
COCOOP:
N_CTX: 8
CTX_INIT: ""
PREC: "fp16"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 200
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "RN101"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 50
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "RN101"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 200
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "RN50"

View File

@@ -0,0 +1,33 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 200
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "RN50"
TRAINER:
COOP:
CTX_INIT: "a photo of a"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 100
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "RN50"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 50
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "RN50"

View File

@@ -0,0 +1,33 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 50
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "RN50"
TRAINER:
COOP:
CTX_INIT: "a photo of a"

View File

@@ -0,0 +1,17 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 200
TEST:
BATCH_SIZE: 200
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
MODEL:
BACKBONE:
NAME: "RN50"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 200
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "ViT-B/16"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 100
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "ViT-B/16"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 50
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "ViT-B/16"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 200
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "ViT-B/32"

View File

@@ -0,0 +1,29 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 32
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.002
MAX_EPOCH: 50
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 5
MODEL:
BACKBONE:
NAME: "ViT-B/32"

View File

@@ -0,0 +1,39 @@
# Independent Vision Language Prompting
DATALOADER:
TRAIN_X:
BATCH_SIZE: 4
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.0025
MAX_EPOCH: 20
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
IVLP:
N_CTX_VISION: 4
N_CTX_TEXT: 4
CTX_INIT: "a photo of a"
PREC: "fp16"
PROMPT_DEPTH_VISION: 9
PROMPT_DEPTH_TEXT: 9

View File

@@ -0,0 +1,36 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 4
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.0035
MAX_EPOCH: 2
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
MAPLE:
N_CTX: 2
CTX_INIT: "a photo of a"
PREC: "fp16"
PROMPT_DEPTH: 9

View File

@@ -0,0 +1,36 @@
DATALOADER:
TRAIN_X:
BATCH_SIZE: 4
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.0026
MAX_EPOCH: 2
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
MAPLE:
N_CTX: 2
CTX_INIT: "a photo of a"
PREC: "fp16"
PROMPT_DEPTH: 3

View File

@@ -0,0 +1,43 @@
# PromptSRC: Prompting with Self-regularizing constraints
DATALOADER:
TRAIN_X:
BATCH_SIZE: 4
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.0025
MAX_EPOCH: 20
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
PROMPTSRC:
N_CTX_VISION: 4
N_CTX_TEXT: 4
CTX_INIT: "a photo of a"
PREC: "fp16"
PROMPT_DEPTH_VISION: 9
PROMPT_DEPTH_TEXT: 9
TEXT_LOSS_WEIGHT: 25
IMAGE_LOSS_WEIGHT: 10
GPA_MEAN: 15
GPA_STD: 1

View File

@@ -0,0 +1,43 @@
# PromptSRC: Prompting with Self-regularizing constraints
DATALOADER:
TRAIN_X:
BATCH_SIZE: 4
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.0025
MAX_EPOCH: 20
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
PROMPTSRC:
N_CTX_VISION: 4
N_CTX_TEXT: 4
CTX_INIT: "a photo of a"
PREC: "fp16"
PROMPT_DEPTH_VISION: 3
PROMPT_DEPTH_TEXT: 3
TEXT_LOSS_WEIGHT: 25
IMAGE_LOSS_WEIGHT: 10
GPA_MEAN: 6
GPA_STD: 10

View File

@@ -0,0 +1,47 @@
# PromptSRC: Prompting with Self-regularizing constraints
DATALOADER:
TRAIN_X:
BATCH_SIZE: 4
TEST:
BATCH_SIZE: 100
NUM_WORKERS: 8
INPUT:
SIZE: (224, 224)
INTERPOLATION: "bicubic"
PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073]
PIXEL_STD: [0.26862954, 0.26130258, 0.27577711]
TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"]
OPTIM:
NAME: "sgd"
LR: 0.0025
MAX_EPOCH: 50
LR_SCHEDULER: "cosine"
WARMUP_EPOCH: 1
WARMUP_TYPE: "constant"
WARMUP_CONS_LR: 1e-5
TRAIN:
PRINT_FREQ: 20
MODEL:
BACKBONE:
NAME: "ViT-B/16"
TRAINER:
PROMPTSRC:
N_CTX_VISION: 4
N_CTX_TEXT: 4
CTX_INIT: "a photo of a"
PREC: "fp16"
PROMPT_DEPTH_VISION: 9
PROMPT_DEPTH_TEXT: 9
TEXT_LOSS_WEIGHT: 25
IMAGE_LOSS_WEIGHT: 10
# Use the below configuration for: ImageNet, Caltech101, OxfordPets, Food101, UCF101 and SUN397
GPA_MEAN: 30
GPA_STD: 30
# Use the below configuration for: StanfordCars, Flowers102, FGVCAircraft, DTD and EuroSAT
# GPA_MEAN: 45
# GPA_STD: 5