Release of PromptSRC with pretrained models.

This commit is contained in:
uzair khattak
2023-07-13 23:43:31 +05:00
commit 8be7dcff6b
132 changed files with 106641 additions and 0 deletions

0
datasets/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

59
datasets/caltech101.py Normal file
View File

@@ -0,0 +1,59 @@
import os
import pickle
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import mkdir_if_missing
from .oxford_pets import OxfordPets
from .dtd import DescribableTextures as DTD
IGNORED = ["BACKGROUND_Google", "Faces_easy"]
NEW_CNAMES = {
"airplanes": "airplane",
"Faces": "face",
"Leopards": "leopard",
"Motorbikes": "motorbike",
}
@DATASET_REGISTRY.register()
class Caltech101(DatasetBase):
dataset_dir = "caltech-101"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "101_ObjectCategories")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_Caltech101.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
else:
train, val, test = DTD.read_and_split_data(self.image_dir, ignored=IGNORED, new_cnames=NEW_CNAMES)
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)

95
datasets/dtd.py Normal file
View File

@@ -0,0 +1,95 @@
import os
import pickle
import random
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import listdir_nohidden, mkdir_if_missing
from .oxford_pets import OxfordPets
@DATASET_REGISTRY.register()
class DescribableTextures(DatasetBase):
dataset_dir = "dtd"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "images")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_DescribableTextures.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
else:
train, val, test = self.read_and_split_data(self.image_dir)
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
@staticmethod
def read_and_split_data(image_dir, p_trn=0.5, p_val=0.2, ignored=[], new_cnames=None):
# The data are supposed to be organized into the following structure
# =============
# images/
# dog/
# cat/
# horse/
# =============
categories = listdir_nohidden(image_dir)
categories = [c for c in categories if c not in ignored]
categories.sort()
p_tst = 1 - p_trn - p_val
print(f"Splitting into {p_trn:.0%} train, {p_val:.0%} val, and {p_tst:.0%} test")
def _collate(ims, y, c):
items = []
for im in ims:
item = Datum(impath=im, label=y, classname=c) # is already 0-based
items.append(item)
return items
train, val, test = [], [], []
for label, category in enumerate(categories):
category_dir = os.path.join(image_dir, category)
images = listdir_nohidden(category_dir)
images = [os.path.join(category_dir, im) for im in images]
random.shuffle(images)
n_total = len(images)
n_train = round(n_total * p_trn)
n_val = round(n_total * p_val)
n_test = n_total - n_train - n_val
assert n_train > 0 and n_val > 0 and n_test > 0
if new_cnames is not None and category in new_cnames:
category = new_cnames[category]
train.extend(_collate(images[:n_train], label, category))
val.extend(_collate(images[n_train : n_train + n_val], label, category))
test.extend(_collate(images[n_train + n_val :], label, category))
return train, val, test

73
datasets/eurosat.py Normal file
View File

@@ -0,0 +1,73 @@
import os
import pickle
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import mkdir_if_missing
from .oxford_pets import OxfordPets
from .dtd import DescribableTextures as DTD
NEW_CNAMES = {
"AnnualCrop": "Annual Crop Land",
"Forest": "Forest",
"HerbaceousVegetation": "Herbaceous Vegetation Land",
"Highway": "Highway or Road",
"Industrial": "Industrial Buildings",
"Pasture": "Pasture Land",
"PermanentCrop": "Permanent Crop Land",
"Residential": "Residential Buildings",
"River": "River",
"SeaLake": "Sea or Lake",
}
@DATASET_REGISTRY.register()
class EuroSAT(DatasetBase):
dataset_dir = "eurosat"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "2750")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_EuroSAT.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
else:
train, val, test = DTD.read_and_split_data(self.image_dir, new_cnames=NEW_CNAMES)
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
def update_classname(self, dataset_old):
dataset_new = []
for item_old in dataset_old:
cname_old = item_old.classname
cname_new = NEW_CLASSNAMES[cname_old]
item_new = Datum(impath=item_old.impath, label=item_old.label, classname=cname_new)
dataset_new.append(item_new)
return dataset_new

71
datasets/fgvc_aircraft.py Normal file
View File

@@ -0,0 +1,71 @@
import os
import pickle
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import mkdir_if_missing
from .oxford_pets import OxfordPets
@DATASET_REGISTRY.register()
class FGVCAircraft(DatasetBase):
dataset_dir = "fgvc_aircraft"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "images")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
classnames = []
with open(os.path.join(self.dataset_dir, "variants.txt"), "r") as f:
lines = f.readlines()
for line in lines:
classnames.append(line.strip())
cname2lab = {c: i for i, c in enumerate(classnames)}
train = self.read_data(cname2lab, "images_variant_train.txt")
val = self.read_data(cname2lab, "images_variant_val.txt")
test = self.read_data(cname2lab, "images_variant_test.txt")
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
def read_data(self, cname2lab, split_file):
filepath = os.path.join(self.dataset_dir, split_file)
items = []
with open(filepath, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip().split(" ")
imname = line[0] + ".jpg"
classname = " ".join(line[1:])
impath = os.path.join(self.image_dir, imname)
label = cname2lab[classname]
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

51
datasets/food101.py Normal file
View File

@@ -0,0 +1,51 @@
import os
import pickle
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import mkdir_if_missing
from .oxford_pets import OxfordPets
from .dtd import DescribableTextures as DTD
@DATASET_REGISTRY.register()
class Food101(DatasetBase):
dataset_dir = "food-101"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "images")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_Food101.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
else:
train, val, test = DTD.read_and_split_data(self.image_dir)
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)

91
datasets/imagenet.py Normal file
View File

@@ -0,0 +1,91 @@
import os
import pickle
from collections import OrderedDict
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import listdir_nohidden, mkdir_if_missing
from .oxford_pets import OxfordPets
@DATASET_REGISTRY.register()
class ImageNet(DatasetBase):
dataset_dir = "imagenet"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "images")
self.preprocessed = os.path.join(self.dataset_dir, "preprocessed.pkl")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.preprocessed):
with open(self.preprocessed, "rb") as f:
preprocessed = pickle.load(f)
train = preprocessed["train"]
test = preprocessed["test"]
else:
text_file = os.path.join(self.dataset_dir, "classnames.txt")
classnames = self.read_classnames(text_file)
train = self.read_data(classnames, "train")
# Follow standard practice to perform evaluation on the val set
# Also used as the val set (so evaluate the last-step model)
test = self.read_data(classnames, "val")
preprocessed = {"train": train, "test": test}
with open(self.preprocessed, "wb") as f:
pickle.dump(preprocessed, f, protocol=pickle.HIGHEST_PROTOCOL)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train = data["train"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
data = {"train": train}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, test = OxfordPets.subsample_classes(train, test, subsample=subsample)
super().__init__(train_x=train, val=test, test=test)
@staticmethod
def read_classnames(text_file):
"""Return a dictionary containing
key-value pairs of <folder name>: <class name>.
"""
classnames = OrderedDict()
with open(text_file, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip().split(" ")
folder = line[0]
classname = " ".join(line[1:])
classnames[folder] = classname
return classnames
def read_data(self, classnames, split_dir):
split_dir = os.path.join(self.image_dir, split_dir)
folders = sorted(f.name for f in os.scandir(split_dir) if f.is_dir())
items = []
for label, folder in enumerate(folders):
imnames = listdir_nohidden(os.path.join(split_dir, folder))
classname = classnames[folder]
for imname in imnames:
impath = os.path.join(split_dir, folder, imname)
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

46
datasets/imagenet_a.py Normal file
View File

@@ -0,0 +1,46 @@
import os
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import listdir_nohidden
from .imagenet import ImageNet
TO_BE_IGNORED = ["README.txt"]
@DATASET_REGISTRY.register()
class ImageNetA(DatasetBase):
"""ImageNet-A(dversarial).
This dataset is used for testing only.
"""
dataset_dir = "imagenet-adversarial"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "imagenet-a")
text_file = os.path.join(self.dataset_dir, "classnames.txt")
classnames = ImageNet.read_classnames(text_file)
data = self.read_data(classnames)
super().__init__(train_x=data, test=data)
def read_data(self, classnames):
image_dir = self.image_dir
folders = listdir_nohidden(image_dir, sort=True)
folders = [f for f in folders if f not in TO_BE_IGNORED]
items = []
for label, folder in enumerate(folders):
imnames = listdir_nohidden(os.path.join(image_dir, folder))
classname = classnames[folder]
for imname in imnames:
impath = os.path.join(image_dir, folder, imname)
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

46
datasets/imagenet_r.py Normal file
View File

@@ -0,0 +1,46 @@
import os
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import listdir_nohidden
from .imagenet import ImageNet
TO_BE_IGNORED = ["README.txt"]
@DATASET_REGISTRY.register()
class ImageNetR(DatasetBase):
"""ImageNet-R(endition).
This dataset is used for testing only.
"""
dataset_dir = "imagenet-rendition"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "imagenet-r")
text_file = os.path.join(self.dataset_dir, "classnames.txt")
classnames = ImageNet.read_classnames(text_file)
data = self.read_data(classnames)
super().__init__(train_x=data, test=data)
def read_data(self, classnames):
image_dir = self.image_dir
folders = listdir_nohidden(image_dir, sort=True)
folders = [f for f in folders if f not in TO_BE_IGNORED]
items = []
for label, folder in enumerate(folders):
imnames = listdir_nohidden(os.path.join(image_dir, folder))
classname = classnames[folder]
for imname in imnames:
impath = os.path.join(image_dir, folder, imname)
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

View File

@@ -0,0 +1,43 @@
import os
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import listdir_nohidden
from .imagenet import ImageNet
@DATASET_REGISTRY.register()
class ImageNetSketch(DatasetBase):
"""ImageNet-Sketch.
This dataset is used for testing only.
"""
dataset_dir = "imagenet-sketch"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "images")
text_file = os.path.join(self.dataset_dir, "classnames.txt")
classnames = ImageNet.read_classnames(text_file)
data = self.read_data(classnames)
super().__init__(train_x=data, test=data)
def read_data(self, classnames):
image_dir = self.image_dir
folders = listdir_nohidden(image_dir, sort=True)
items = []
for label, folder in enumerate(folders):
imnames = listdir_nohidden(os.path.join(image_dir, folder))
classname = classnames[folder]
for imname in imnames:
impath = os.path.join(image_dir, folder, imname)
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

46
datasets/imagenetv2.py Normal file
View File

@@ -0,0 +1,46 @@
import os
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import listdir_nohidden
from .imagenet import ImageNet
@DATASET_REGISTRY.register()
class ImageNetV2(DatasetBase):
"""ImageNetV2.
This dataset is used for testing only.
"""
dataset_dir = "imagenetv2"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
image_dir = "imagenetv2-matched-frequency-format-val"
self.image_dir = os.path.join(self.dataset_dir, image_dir)
text_file = os.path.join(self.dataset_dir, "classnames.txt")
classnames = ImageNet.read_classnames(text_file)
data = self.read_data(classnames)
super().__init__(train_x=data, test=data)
def read_data(self, classnames):
image_dir = self.image_dir
folders = list(classnames.keys())
items = []
for label in range(1000):
class_dir = os.path.join(image_dir, str(label))
imnames = listdir_nohidden(class_dir)
folder = folders[label]
classname = classnames[folder]
for imname in imnames:
impath = os.path.join(class_dir, imname)
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

View File

@@ -0,0 +1,89 @@
import os
import pickle
import random
from scipy.io import loadmat
from collections import defaultdict
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import read_json, mkdir_if_missing
from .oxford_pets import OxfordPets
@DATASET_REGISTRY.register()
class OxfordFlowers(DatasetBase):
dataset_dir = "oxford_flowers"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "jpg")
self.label_file = os.path.join(self.dataset_dir, "imagelabels.mat")
self.lab2cname_file = os.path.join(self.dataset_dir, "cat_to_name.json")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordFlowers.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
else:
train, val, test = self.read_data()
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
def read_data(self):
tracker = defaultdict(list)
label_file = loadmat(self.label_file)["labels"][0]
for i, label in enumerate(label_file):
imname = f"image_{str(i + 1).zfill(5)}.jpg"
impath = os.path.join(self.image_dir, imname)
label = int(label)
tracker[label].append(impath)
print("Splitting data into 50% train, 20% val, and 30% test")
def _collate(ims, y, c):
items = []
for im in ims:
item = Datum(impath=im, label=y - 1, classname=c) # convert to 0-based label
items.append(item)
return items
lab2cname = read_json(self.lab2cname_file)
train, val, test = [], [], []
for label, impaths in tracker.items():
random.shuffle(impaths)
n_total = len(impaths)
n_train = round(n_total * 0.5)
n_val = round(n_total * 0.2)
n_test = n_total - n_train - n_val
assert n_train > 0 and n_val > 0 and n_test > 0
cname = lab2cname[str(label)]
train.extend(_collate(impaths[:n_train], label, cname))
val.extend(_collate(impaths[n_train : n_train + n_val], label, cname))
test.extend(_collate(impaths[n_train + n_val :], label, cname))
return train, val, test

186
datasets/oxford_pets.py Normal file
View File

@@ -0,0 +1,186 @@
import os
import pickle
import math
import random
from collections import defaultdict
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import read_json, write_json, mkdir_if_missing
@DATASET_REGISTRY.register()
class OxfordPets(DatasetBase):
dataset_dir = "oxford_pets"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "images")
self.anno_dir = os.path.join(self.dataset_dir, "annotations")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordPets.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = self.read_split(self.split_path, self.image_dir)
else:
trainval = self.read_data(split_file="trainval.txt")
test = self.read_data(split_file="test.txt")
train, val = self.split_trainval(trainval)
self.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = self.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
def read_data(self, split_file):
filepath = os.path.join(self.anno_dir, split_file)
items = []
with open(filepath, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()
imname, label, species, _ = line.split(" ")
breed = imname.split("_")[:-1]
breed = "_".join(breed)
breed = breed.lower()
imname += ".jpg"
impath = os.path.join(self.image_dir, imname)
label = int(label) - 1 # convert to 0-based index
item = Datum(impath=impath, label=label, classname=breed)
items.append(item)
return items
@staticmethod
def split_trainval(trainval, p_val=0.2):
p_trn = 1 - p_val
print(f"Splitting trainval into {p_trn:.0%} train and {p_val:.0%} val")
tracker = defaultdict(list)
for idx, item in enumerate(trainval):
label = item.label
tracker[label].append(idx)
train, val = [], []
for label, idxs in tracker.items():
n_val = round(len(idxs) * p_val)
assert n_val > 0
random.shuffle(idxs)
for n, idx in enumerate(idxs):
item = trainval[idx]
if n < n_val:
val.append(item)
else:
train.append(item)
return train, val
@staticmethod
def save_split(train, val, test, filepath, path_prefix):
def _extract(items):
out = []
for item in items:
impath = item.impath
label = item.label
classname = item.classname
impath = impath.replace(path_prefix, "")
if impath.startswith("/"):
impath = impath[1:]
out.append((impath, label, classname))
return out
train = _extract(train)
val = _extract(val)
test = _extract(test)
split = {"train": train, "val": val, "test": test}
write_json(split, filepath)
print(f"Saved split to {filepath}")
@staticmethod
def read_split(filepath, path_prefix):
def _convert(items):
out = []
for impath, label, classname in items:
impath = os.path.join(path_prefix, impath)
item = Datum(impath=impath, label=int(label), classname=classname)
out.append(item)
return out
print(f"Reading split from {filepath}")
split = read_json(filepath)
train = _convert(split["train"])
val = _convert(split["val"])
test = _convert(split["test"])
return train, val, test
@staticmethod
def subsample_classes(*args, subsample="all"):
"""Divide classes into two groups. The first group
represents base classes while the second group represents
new classes.
Args:
args: a list of datasets, e.g. train, val and test.
subsample (str): what classes to subsample.
"""
assert subsample in ["all", "base", "new"]
if subsample == "all":
return args
dataset = args[0]
labels = set()
for item in dataset:
labels.add(item.label)
labels = list(labels)
labels.sort()
n = len(labels)
# Divide classes into two halves
m = math.ceil(n / 2)
print(f"SUBSAMPLE {subsample.upper()} CLASSES!")
if subsample == "base":
selected = labels[:m] # take the first half
else:
selected = labels[m:] # take the second half
relabeler = {y: y_new for y_new, y in enumerate(selected)}
output = []
for dataset in args:
dataset_new = []
for item in dataset:
if item.label not in selected:
continue
item_new = Datum(
impath=item.impath,
label=relabeler[item.label],
classname=item.classname
)
dataset_new.append(item_new)
output.append(dataset_new)
return output

75
datasets/stanford_cars.py Normal file
View File

@@ -0,0 +1,75 @@
import os
import pickle
from scipy.io import loadmat
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import mkdir_if_missing
from .oxford_pets import OxfordPets
@DATASET_REGISTRY.register()
class StanfordCars(DatasetBase):
dataset_dir = "stanford_cars"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.split_path = os.path.join(self.dataset_dir, "split_zhou_StanfordCars.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.dataset_dir)
else:
trainval_file = os.path.join(self.dataset_dir, "devkit", "cars_train_annos.mat")
test_file = os.path.join(self.dataset_dir, "cars_test_annos_withlabels.mat")
meta_file = os.path.join(self.dataset_dir, "devkit", "cars_meta.mat")
trainval = self.read_data("cars_train", trainval_file, meta_file)
test = self.read_data("cars_test", test_file, meta_file)
train, val = OxfordPets.split_trainval(trainval)
OxfordPets.save_split(train, val, test, self.split_path, self.dataset_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
def read_data(self, image_dir, anno_file, meta_file):
anno_file = loadmat(anno_file)["annotations"][0]
meta_file = loadmat(meta_file)["class_names"][0]
items = []
for i in range(len(anno_file)):
imname = anno_file[i]["fname"][0]
impath = os.path.join(self.dataset_dir, image_dir, imname)
label = anno_file[i]["class"][0, 0]
label = int(label) - 1 # convert to 0-based index
classname = meta_file[label][0]
names = classname.split(" ")
year = names.pop(-1)
names.insert(0, year)
classname = " ".join(names)
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

80
datasets/sun397.py Normal file
View File

@@ -0,0 +1,80 @@
import os
import pickle
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import mkdir_if_missing
from .oxford_pets import OxfordPets
@DATASET_REGISTRY.register()
class SUN397(DatasetBase):
dataset_dir = "sun397"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "SUN397")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_SUN397.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
else:
classnames = []
with open(os.path.join(self.dataset_dir, "ClassName.txt"), "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip()[1:] # remove /
classnames.append(line)
cname2lab = {c: i for i, c in enumerate(classnames)}
trainval = self.read_data(cname2lab, "Training_01.txt")
test = self.read_data(cname2lab, "Testing_01.txt")
train, val = OxfordPets.split_trainval(trainval)
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
def read_data(self, cname2lab, text_file):
text_file = os.path.join(self.dataset_dir, text_file)
items = []
with open(text_file, "r") as f:
lines = f.readlines()
for line in lines:
imname = line.strip()[1:] # remove /
classname = os.path.dirname(imname)
label = cname2lab[classname]
impath = os.path.join(self.image_dir, imname)
names = classname.split("/")[1:] # remove 1st letter
names = names[::-1] # put words like indoor/outdoor at first
classname = " ".join(names)
item = Datum(impath=impath, label=label, classname=classname)
items.append(item)
return items

84
datasets/ucf101.py Normal file
View File

@@ -0,0 +1,84 @@
import os
import pickle
import re
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
from dassl.utils import mkdir_if_missing
from .oxford_pets import OxfordPets
@DATASET_REGISTRY.register()
class UCF101(DatasetBase):
dataset_dir = "ucf101"
def __init__(self, cfg):
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = os.path.join(root, self.dataset_dir)
self.image_dir = os.path.join(self.dataset_dir, "UCF-101-midframes")
self.split_path = os.path.join(self.dataset_dir, "split_zhou_UCF101.json")
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
mkdir_if_missing(self.split_fewshot_dir)
if os.path.exists(self.split_path):
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
else:
cname2lab = {}
filepath = os.path.join(self.dataset_dir, "ucfTrainTestlist/classInd.txt")
with open(filepath, "r") as f:
lines = f.readlines()
for line in lines:
label, classname = line.strip().split(" ")
label = int(label) - 1 # conver to 0-based index
cname2lab[classname] = label
trainval = self.read_data(cname2lab, "ucfTrainTestlist/trainlist01.txt")
test = self.read_data(cname2lab, "ucfTrainTestlist/testlist01.txt")
train, val = OxfordPets.split_trainval(trainval)
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
num_shots = cfg.DATASET.NUM_SHOTS
if num_shots >= 1:
seed = cfg.SEED
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
if os.path.exists(preprocessed):
print(f"Loading preprocessed few-shot data from {preprocessed}")
with open(preprocessed, "rb") as file:
data = pickle.load(file)
train, val = data["train"], data["val"]
else:
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
data = {"train": train, "val": val}
print(f"Saving preprocessed few-shot data to {preprocessed}")
with open(preprocessed, "wb") as file:
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
super().__init__(train_x=train, val=val, test=test)
def read_data(self, cname2lab, text_file):
text_file = os.path.join(self.dataset_dir, text_file)
items = []
with open(text_file, "r") as f:
lines = f.readlines()
for line in lines:
line = line.strip().split(" ")[0] # trainlist: filename, label
action, filename = line.split("/")
label = cname2lab[action]
elements = re.findall("[A-Z][^A-Z]*", action)
renamed_action = "_".join(elements)
filename = filename.replace(".avi", ".jpg")
impath = os.path.join(self.image_dir, renamed_action, filename)
item = Datum(impath=impath, label=label, classname=renamed_action)
items.append(item)
return items