DATASET: SELECTION_BATCH_SIZE: 50 SUBSAMPLE_CLASSES: base DATALOADER: RETURN_IMG0: true TRAIN_X: BATCH_SIZE: 1 TEST: BATCH_SIZE: 256 NUM_WORKERS: 4 INPUT: SIZE: (224, 224) INTERPOLATION: "bicubic" PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] # CUTOUT_N: 1 # CUTOUT_LEN: 128 OPTIM: NAME: "sgd" LR: 0.0035 MAX_EPOCH: 5 LR_SCHEDULER: "cosine" WARMUP_EPOCH: 1 WARMUP_TYPE: "constant" WARMUP_CONS_LR: 1e-5 TRAIN: PRINT_FREQ: 20 MODEL: BACKBONE: NAME: "ViT-B/16" TEST: PER_CLASS_RESULT: false FINAL_MODEL: "best_val" TRAINER: MAPLE: N_CTX: 2 CTX_INIT: "a photo of a" PREC: "fp16" PROMPT_DEPTH: 9