DATASET: SELECTION_BATCH_SIZE: 50 SUBSAMPLE_CLASSES: all DATALOADER: RETURN_IMG0: true TRAIN_X: BATCH_SIZE: 16 TEST: BATCH_SIZE: 64 NUM_WORKERS: 2 INPUT: SIZE: (224, 224) INTERPOLATION: "bicubic" PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] TRANSFORMS: ["random_resized_crop", "random_flip", "normalize"] # CUTOUT_N: 1 # CUTOUT_LEN: 128 OPTIM: NAME: "sgd" LR: 0.0026 #0.0035 0.0026 for crossdata MAX_EPOCH: 5 LR_SCHEDULER: "cosine" WARMUP_EPOCH: 1 WARMUP_TYPE: "constant" WARMUP_CONS_LR: 1e-5 TRAIN: PRINT_FREQ: 20 CHECKPOINT_FREQ: 1 MODEL: BACKBONE: NAME: "ViT-B/16" TEST: PER_CLASS_RESULT: false FINAL_MODEL: "best_val" TRAINER: MAPLEG: N_CTX: 4 CTX_INIT: "a photo of a" PREC: "fp16" PROMPT_DEPTH: 9