Release of PromptSRC with pretrained models.
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,59 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
from .dtd import DescribableTextures as DTD
|
||||
|
||||
IGNORED = ["BACKGROUND_Google", "Faces_easy"]
|
||||
NEW_CNAMES = {
|
||||
"airplanes": "airplane",
|
||||
"Faces": "face",
|
||||
"Leopards": "leopard",
|
||||
"Motorbikes": "motorbike",
|
||||
}
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class Caltech101(DatasetBase):
|
||||
|
||||
dataset_dir = "caltech-101"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "101_ObjectCategories")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_Caltech101.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
train, val, test = DTD.read_and_split_data(self.image_dir, ignored=IGNORED, new_cnames=NEW_CNAMES)
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
@@ -0,0 +1,95 @@
|
||||
import os
|
||||
import pickle
|
||||
import random
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import listdir_nohidden, mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class DescribableTextures(DatasetBase):
|
||||
|
||||
dataset_dir = "dtd"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "images")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_DescribableTextures.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
train, val, test = self.read_and_split_data(self.image_dir)
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
@staticmethod
|
||||
def read_and_split_data(image_dir, p_trn=0.5, p_val=0.2, ignored=[], new_cnames=None):
|
||||
# The data are supposed to be organized into the following structure
|
||||
# =============
|
||||
# images/
|
||||
# dog/
|
||||
# cat/
|
||||
# horse/
|
||||
# =============
|
||||
categories = listdir_nohidden(image_dir)
|
||||
categories = [c for c in categories if c not in ignored]
|
||||
categories.sort()
|
||||
|
||||
p_tst = 1 - p_trn - p_val
|
||||
print(f"Splitting into {p_trn:.0%} train, {p_val:.0%} val, and {p_tst:.0%} test")
|
||||
|
||||
def _collate(ims, y, c):
|
||||
items = []
|
||||
for im in ims:
|
||||
item = Datum(impath=im, label=y, classname=c) # is already 0-based
|
||||
items.append(item)
|
||||
return items
|
||||
|
||||
train, val, test = [], [], []
|
||||
for label, category in enumerate(categories):
|
||||
category_dir = os.path.join(image_dir, category)
|
||||
images = listdir_nohidden(category_dir)
|
||||
images = [os.path.join(category_dir, im) for im in images]
|
||||
random.shuffle(images)
|
||||
n_total = len(images)
|
||||
n_train = round(n_total * p_trn)
|
||||
n_val = round(n_total * p_val)
|
||||
n_test = n_total - n_train - n_val
|
||||
assert n_train > 0 and n_val > 0 and n_test > 0
|
||||
|
||||
if new_cnames is not None and category in new_cnames:
|
||||
category = new_cnames[category]
|
||||
|
||||
train.extend(_collate(images[:n_train], label, category))
|
||||
val.extend(_collate(images[n_train : n_train + n_val], label, category))
|
||||
test.extend(_collate(images[n_train + n_val :], label, category))
|
||||
|
||||
return train, val, test
|
||||
@@ -0,0 +1,73 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
from .dtd import DescribableTextures as DTD
|
||||
|
||||
NEW_CNAMES = {
|
||||
"AnnualCrop": "Annual Crop Land",
|
||||
"Forest": "Forest",
|
||||
"HerbaceousVegetation": "Herbaceous Vegetation Land",
|
||||
"Highway": "Highway or Road",
|
||||
"Industrial": "Industrial Buildings",
|
||||
"Pasture": "Pasture Land",
|
||||
"PermanentCrop": "Permanent Crop Land",
|
||||
"Residential": "Residential Buildings",
|
||||
"River": "River",
|
||||
"SeaLake": "Sea or Lake",
|
||||
}
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class EuroSAT(DatasetBase):
|
||||
|
||||
dataset_dir = "eurosat"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "2750")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_EuroSAT.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
train, val, test = DTD.read_and_split_data(self.image_dir, new_cnames=NEW_CNAMES)
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
def update_classname(self, dataset_old):
|
||||
dataset_new = []
|
||||
for item_old in dataset_old:
|
||||
cname_old = item_old.classname
|
||||
cname_new = NEW_CLASSNAMES[cname_old]
|
||||
item_new = Datum(impath=item_old.impath, label=item_old.label, classname=cname_new)
|
||||
dataset_new.append(item_new)
|
||||
return dataset_new
|
||||
@@ -0,0 +1,71 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class FGVCAircraft(DatasetBase):
|
||||
|
||||
dataset_dir = "fgvc_aircraft"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "images")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
classnames = []
|
||||
with open(os.path.join(self.dataset_dir, "variants.txt"), "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
classnames.append(line.strip())
|
||||
cname2lab = {c: i for i, c in enumerate(classnames)}
|
||||
|
||||
train = self.read_data(cname2lab, "images_variant_train.txt")
|
||||
val = self.read_data(cname2lab, "images_variant_val.txt")
|
||||
test = self.read_data(cname2lab, "images_variant_test.txt")
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
def read_data(self, cname2lab, split_file):
|
||||
filepath = os.path.join(self.dataset_dir, split_file)
|
||||
items = []
|
||||
|
||||
with open(filepath, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip().split(" ")
|
||||
imname = line[0] + ".jpg"
|
||||
classname = " ".join(line[1:])
|
||||
impath = os.path.join(self.image_dir, imname)
|
||||
label = cname2lab[classname]
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,51 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
from .dtd import DescribableTextures as DTD
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class Food101(DatasetBase):
|
||||
|
||||
dataset_dir = "food-101"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "images")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_Food101.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
train, val, test = DTD.read_and_split_data(self.image_dir)
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
@@ -0,0 +1,91 @@
|
||||
import os
|
||||
import pickle
|
||||
from collections import OrderedDict
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import listdir_nohidden, mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class ImageNet(DatasetBase):
|
||||
|
||||
dataset_dir = "imagenet"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "images")
|
||||
self.preprocessed = os.path.join(self.dataset_dir, "preprocessed.pkl")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.preprocessed):
|
||||
with open(self.preprocessed, "rb") as f:
|
||||
preprocessed = pickle.load(f)
|
||||
train = preprocessed["train"]
|
||||
test = preprocessed["test"]
|
||||
else:
|
||||
text_file = os.path.join(self.dataset_dir, "classnames.txt")
|
||||
classnames = self.read_classnames(text_file)
|
||||
train = self.read_data(classnames, "train")
|
||||
# Follow standard practice to perform evaluation on the val set
|
||||
# Also used as the val set (so evaluate the last-step model)
|
||||
test = self.read_data(classnames, "val")
|
||||
|
||||
preprocessed = {"train": train, "test": test}
|
||||
with open(self.preprocessed, "wb") as f:
|
||||
pickle.dump(preprocessed, f, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train = data["train"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
data = {"train": train}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, test = OxfordPets.subsample_classes(train, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=test, test=test)
|
||||
|
||||
@staticmethod
|
||||
def read_classnames(text_file):
|
||||
"""Return a dictionary containing
|
||||
key-value pairs of <folder name>: <class name>.
|
||||
"""
|
||||
classnames = OrderedDict()
|
||||
with open(text_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip().split(" ")
|
||||
folder = line[0]
|
||||
classname = " ".join(line[1:])
|
||||
classnames[folder] = classname
|
||||
return classnames
|
||||
|
||||
def read_data(self, classnames, split_dir):
|
||||
split_dir = os.path.join(self.image_dir, split_dir)
|
||||
folders = sorted(f.name for f in os.scandir(split_dir) if f.is_dir())
|
||||
items = []
|
||||
|
||||
for label, folder in enumerate(folders):
|
||||
imnames = listdir_nohidden(os.path.join(split_dir, folder))
|
||||
classname = classnames[folder]
|
||||
for imname in imnames:
|
||||
impath = os.path.join(split_dir, folder, imname)
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import listdir_nohidden
|
||||
|
||||
from .imagenet import ImageNet
|
||||
|
||||
TO_BE_IGNORED = ["README.txt"]
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class ImageNetA(DatasetBase):
|
||||
"""ImageNet-A(dversarial).
|
||||
|
||||
This dataset is used for testing only.
|
||||
"""
|
||||
|
||||
dataset_dir = "imagenet-adversarial"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "imagenet-a")
|
||||
|
||||
text_file = os.path.join(self.dataset_dir, "classnames.txt")
|
||||
classnames = ImageNet.read_classnames(text_file)
|
||||
|
||||
data = self.read_data(classnames)
|
||||
|
||||
super().__init__(train_x=data, test=data)
|
||||
|
||||
def read_data(self, classnames):
|
||||
image_dir = self.image_dir
|
||||
folders = listdir_nohidden(image_dir, sort=True)
|
||||
folders = [f for f in folders if f not in TO_BE_IGNORED]
|
||||
items = []
|
||||
|
||||
for label, folder in enumerate(folders):
|
||||
imnames = listdir_nohidden(os.path.join(image_dir, folder))
|
||||
classname = classnames[folder]
|
||||
for imname in imnames:
|
||||
impath = os.path.join(image_dir, folder, imname)
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import listdir_nohidden
|
||||
|
||||
from .imagenet import ImageNet
|
||||
|
||||
TO_BE_IGNORED = ["README.txt"]
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class ImageNetR(DatasetBase):
|
||||
"""ImageNet-R(endition).
|
||||
|
||||
This dataset is used for testing only.
|
||||
"""
|
||||
|
||||
dataset_dir = "imagenet-rendition"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "imagenet-r")
|
||||
|
||||
text_file = os.path.join(self.dataset_dir, "classnames.txt")
|
||||
classnames = ImageNet.read_classnames(text_file)
|
||||
|
||||
data = self.read_data(classnames)
|
||||
|
||||
super().__init__(train_x=data, test=data)
|
||||
|
||||
def read_data(self, classnames):
|
||||
image_dir = self.image_dir
|
||||
folders = listdir_nohidden(image_dir, sort=True)
|
||||
folders = [f for f in folders if f not in TO_BE_IGNORED]
|
||||
items = []
|
||||
|
||||
for label, folder in enumerate(folders):
|
||||
imnames = listdir_nohidden(os.path.join(image_dir, folder))
|
||||
classname = classnames[folder]
|
||||
for imname in imnames:
|
||||
impath = os.path.join(image_dir, folder, imname)
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,43 @@
|
||||
import os
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import listdir_nohidden
|
||||
|
||||
from .imagenet import ImageNet
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class ImageNetSketch(DatasetBase):
|
||||
"""ImageNet-Sketch.
|
||||
|
||||
This dataset is used for testing only.
|
||||
"""
|
||||
|
||||
dataset_dir = "imagenet-sketch"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "images")
|
||||
|
||||
text_file = os.path.join(self.dataset_dir, "classnames.txt")
|
||||
classnames = ImageNet.read_classnames(text_file)
|
||||
|
||||
data = self.read_data(classnames)
|
||||
|
||||
super().__init__(train_x=data, test=data)
|
||||
|
||||
def read_data(self, classnames):
|
||||
image_dir = self.image_dir
|
||||
folders = listdir_nohidden(image_dir, sort=True)
|
||||
items = []
|
||||
|
||||
for label, folder in enumerate(folders):
|
||||
imnames = listdir_nohidden(os.path.join(image_dir, folder))
|
||||
classname = classnames[folder]
|
||||
for imname in imnames:
|
||||
impath = os.path.join(image_dir, folder, imname)
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,46 @@
|
||||
import os
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import listdir_nohidden
|
||||
|
||||
from .imagenet import ImageNet
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class ImageNetV2(DatasetBase):
|
||||
"""ImageNetV2.
|
||||
|
||||
This dataset is used for testing only.
|
||||
"""
|
||||
|
||||
dataset_dir = "imagenetv2"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
image_dir = "imagenetv2-matched-frequency-format-val"
|
||||
self.image_dir = os.path.join(self.dataset_dir, image_dir)
|
||||
|
||||
text_file = os.path.join(self.dataset_dir, "classnames.txt")
|
||||
classnames = ImageNet.read_classnames(text_file)
|
||||
|
||||
data = self.read_data(classnames)
|
||||
|
||||
super().__init__(train_x=data, test=data)
|
||||
|
||||
def read_data(self, classnames):
|
||||
image_dir = self.image_dir
|
||||
folders = list(classnames.keys())
|
||||
items = []
|
||||
|
||||
for label in range(1000):
|
||||
class_dir = os.path.join(image_dir, str(label))
|
||||
imnames = listdir_nohidden(class_dir)
|
||||
folder = folders[label]
|
||||
classname = classnames[folder]
|
||||
for imname in imnames:
|
||||
impath = os.path.join(class_dir, imname)
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,89 @@
|
||||
import os
|
||||
import pickle
|
||||
import random
|
||||
from scipy.io import loadmat
|
||||
from collections import defaultdict
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import read_json, mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class OxfordFlowers(DatasetBase):
|
||||
|
||||
dataset_dir = "oxford_flowers"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "jpg")
|
||||
self.label_file = os.path.join(self.dataset_dir, "imagelabels.mat")
|
||||
self.lab2cname_file = os.path.join(self.dataset_dir, "cat_to_name.json")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordFlowers.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
train, val, test = self.read_data()
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
def read_data(self):
|
||||
tracker = defaultdict(list)
|
||||
label_file = loadmat(self.label_file)["labels"][0]
|
||||
for i, label in enumerate(label_file):
|
||||
imname = f"image_{str(i + 1).zfill(5)}.jpg"
|
||||
impath = os.path.join(self.image_dir, imname)
|
||||
label = int(label)
|
||||
tracker[label].append(impath)
|
||||
|
||||
print("Splitting data into 50% train, 20% val, and 30% test")
|
||||
|
||||
def _collate(ims, y, c):
|
||||
items = []
|
||||
for im in ims:
|
||||
item = Datum(impath=im, label=y - 1, classname=c) # convert to 0-based label
|
||||
items.append(item)
|
||||
return items
|
||||
|
||||
lab2cname = read_json(self.lab2cname_file)
|
||||
train, val, test = [], [], []
|
||||
for label, impaths in tracker.items():
|
||||
random.shuffle(impaths)
|
||||
n_total = len(impaths)
|
||||
n_train = round(n_total * 0.5)
|
||||
n_val = round(n_total * 0.2)
|
||||
n_test = n_total - n_train - n_val
|
||||
assert n_train > 0 and n_val > 0 and n_test > 0
|
||||
cname = lab2cname[str(label)]
|
||||
train.extend(_collate(impaths[:n_train], label, cname))
|
||||
val.extend(_collate(impaths[n_train : n_train + n_val], label, cname))
|
||||
test.extend(_collate(impaths[n_train + n_val :], label, cname))
|
||||
|
||||
return train, val, test
|
||||
@@ -0,0 +1,186 @@
|
||||
import os
|
||||
import pickle
|
||||
import math
|
||||
import random
|
||||
from collections import defaultdict
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import read_json, write_json, mkdir_if_missing
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class OxfordPets(DatasetBase):
|
||||
|
||||
dataset_dir = "oxford_pets"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "images")
|
||||
self.anno_dir = os.path.join(self.dataset_dir, "annotations")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_OxfordPets.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = self.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
trainval = self.read_data(split_file="trainval.txt")
|
||||
test = self.read_data(split_file="test.txt")
|
||||
train, val = self.split_trainval(trainval)
|
||||
self.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = self.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
def read_data(self, split_file):
|
||||
filepath = os.path.join(self.anno_dir, split_file)
|
||||
items = []
|
||||
|
||||
with open(filepath, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
imname, label, species, _ = line.split(" ")
|
||||
breed = imname.split("_")[:-1]
|
||||
breed = "_".join(breed)
|
||||
breed = breed.lower()
|
||||
imname += ".jpg"
|
||||
impath = os.path.join(self.image_dir, imname)
|
||||
label = int(label) - 1 # convert to 0-based index
|
||||
item = Datum(impath=impath, label=label, classname=breed)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
|
||||
@staticmethod
|
||||
def split_trainval(trainval, p_val=0.2):
|
||||
p_trn = 1 - p_val
|
||||
print(f"Splitting trainval into {p_trn:.0%} train and {p_val:.0%} val")
|
||||
tracker = defaultdict(list)
|
||||
for idx, item in enumerate(trainval):
|
||||
label = item.label
|
||||
tracker[label].append(idx)
|
||||
|
||||
train, val = [], []
|
||||
for label, idxs in tracker.items():
|
||||
n_val = round(len(idxs) * p_val)
|
||||
assert n_val > 0
|
||||
random.shuffle(idxs)
|
||||
for n, idx in enumerate(idxs):
|
||||
item = trainval[idx]
|
||||
if n < n_val:
|
||||
val.append(item)
|
||||
else:
|
||||
train.append(item)
|
||||
|
||||
return train, val
|
||||
|
||||
@staticmethod
|
||||
def save_split(train, val, test, filepath, path_prefix):
|
||||
def _extract(items):
|
||||
out = []
|
||||
for item in items:
|
||||
impath = item.impath
|
||||
label = item.label
|
||||
classname = item.classname
|
||||
impath = impath.replace(path_prefix, "")
|
||||
if impath.startswith("/"):
|
||||
impath = impath[1:]
|
||||
out.append((impath, label, classname))
|
||||
return out
|
||||
|
||||
train = _extract(train)
|
||||
val = _extract(val)
|
||||
test = _extract(test)
|
||||
|
||||
split = {"train": train, "val": val, "test": test}
|
||||
|
||||
write_json(split, filepath)
|
||||
print(f"Saved split to {filepath}")
|
||||
|
||||
@staticmethod
|
||||
def read_split(filepath, path_prefix):
|
||||
def _convert(items):
|
||||
out = []
|
||||
for impath, label, classname in items:
|
||||
impath = os.path.join(path_prefix, impath)
|
||||
item = Datum(impath=impath, label=int(label), classname=classname)
|
||||
out.append(item)
|
||||
return out
|
||||
|
||||
print(f"Reading split from {filepath}")
|
||||
split = read_json(filepath)
|
||||
train = _convert(split["train"])
|
||||
val = _convert(split["val"])
|
||||
test = _convert(split["test"])
|
||||
|
||||
return train, val, test
|
||||
|
||||
@staticmethod
|
||||
def subsample_classes(*args, subsample="all"):
|
||||
"""Divide classes into two groups. The first group
|
||||
represents base classes while the second group represents
|
||||
new classes.
|
||||
|
||||
Args:
|
||||
args: a list of datasets, e.g. train, val and test.
|
||||
subsample (str): what classes to subsample.
|
||||
"""
|
||||
assert subsample in ["all", "base", "new"]
|
||||
|
||||
if subsample == "all":
|
||||
return args
|
||||
|
||||
dataset = args[0]
|
||||
labels = set()
|
||||
for item in dataset:
|
||||
labels.add(item.label)
|
||||
labels = list(labels)
|
||||
labels.sort()
|
||||
n = len(labels)
|
||||
# Divide classes into two halves
|
||||
m = math.ceil(n / 2)
|
||||
|
||||
print(f"SUBSAMPLE {subsample.upper()} CLASSES!")
|
||||
if subsample == "base":
|
||||
selected = labels[:m] # take the first half
|
||||
else:
|
||||
selected = labels[m:] # take the second half
|
||||
relabeler = {y: y_new for y_new, y in enumerate(selected)}
|
||||
|
||||
output = []
|
||||
for dataset in args:
|
||||
dataset_new = []
|
||||
for item in dataset:
|
||||
if item.label not in selected:
|
||||
continue
|
||||
item_new = Datum(
|
||||
impath=item.impath,
|
||||
label=relabeler[item.label],
|
||||
classname=item.classname
|
||||
)
|
||||
dataset_new.append(item_new)
|
||||
output.append(dataset_new)
|
||||
|
||||
return output
|
||||
@@ -0,0 +1,75 @@
|
||||
import os
|
||||
import pickle
|
||||
from scipy.io import loadmat
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class StanfordCars(DatasetBase):
|
||||
|
||||
dataset_dir = "stanford_cars"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_StanfordCars.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.dataset_dir)
|
||||
else:
|
||||
trainval_file = os.path.join(self.dataset_dir, "devkit", "cars_train_annos.mat")
|
||||
test_file = os.path.join(self.dataset_dir, "cars_test_annos_withlabels.mat")
|
||||
meta_file = os.path.join(self.dataset_dir, "devkit", "cars_meta.mat")
|
||||
trainval = self.read_data("cars_train", trainval_file, meta_file)
|
||||
test = self.read_data("cars_test", test_file, meta_file)
|
||||
train, val = OxfordPets.split_trainval(trainval)
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.dataset_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
def read_data(self, image_dir, anno_file, meta_file):
|
||||
anno_file = loadmat(anno_file)["annotations"][0]
|
||||
meta_file = loadmat(meta_file)["class_names"][0]
|
||||
items = []
|
||||
|
||||
for i in range(len(anno_file)):
|
||||
imname = anno_file[i]["fname"][0]
|
||||
impath = os.path.join(self.dataset_dir, image_dir, imname)
|
||||
label = anno_file[i]["class"][0, 0]
|
||||
label = int(label) - 1 # convert to 0-based index
|
||||
classname = meta_file[label][0]
|
||||
names = classname.split(" ")
|
||||
year = names.pop(-1)
|
||||
names.insert(0, year)
|
||||
classname = " ".join(names)
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,80 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class SUN397(DatasetBase):
|
||||
|
||||
dataset_dir = "sun397"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "SUN397")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_SUN397.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
classnames = []
|
||||
with open(os.path.join(self.dataset_dir, "ClassName.txt"), "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip()[1:] # remove /
|
||||
classnames.append(line)
|
||||
cname2lab = {c: i for i, c in enumerate(classnames)}
|
||||
trainval = self.read_data(cname2lab, "Training_01.txt")
|
||||
test = self.read_data(cname2lab, "Testing_01.txt")
|
||||
train, val = OxfordPets.split_trainval(trainval)
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
def read_data(self, cname2lab, text_file):
|
||||
text_file = os.path.join(self.dataset_dir, text_file)
|
||||
items = []
|
||||
|
||||
with open(text_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
imname = line.strip()[1:] # remove /
|
||||
classname = os.path.dirname(imname)
|
||||
label = cname2lab[classname]
|
||||
impath = os.path.join(self.image_dir, imname)
|
||||
|
||||
names = classname.split("/")[1:] # remove 1st letter
|
||||
names = names[::-1] # put words like indoor/outdoor at first
|
||||
classname = " ".join(names)
|
||||
|
||||
item = Datum(impath=impath, label=label, classname=classname)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,84 @@
|
||||
import os
|
||||
import pickle
|
||||
import re
|
||||
|
||||
from dassl.data.datasets import DATASET_REGISTRY, Datum, DatasetBase
|
||||
from dassl.utils import mkdir_if_missing
|
||||
|
||||
from .oxford_pets import OxfordPets
|
||||
|
||||
|
||||
@DATASET_REGISTRY.register()
|
||||
class UCF101(DatasetBase):
|
||||
|
||||
dataset_dir = "ucf101"
|
||||
|
||||
def __init__(self, cfg):
|
||||
root = os.path.abspath(os.path.expanduser(cfg.DATASET.ROOT))
|
||||
self.dataset_dir = os.path.join(root, self.dataset_dir)
|
||||
self.image_dir = os.path.join(self.dataset_dir, "UCF-101-midframes")
|
||||
self.split_path = os.path.join(self.dataset_dir, "split_zhou_UCF101.json")
|
||||
self.split_fewshot_dir = os.path.join(self.dataset_dir, "split_fewshot")
|
||||
mkdir_if_missing(self.split_fewshot_dir)
|
||||
|
||||
if os.path.exists(self.split_path):
|
||||
train, val, test = OxfordPets.read_split(self.split_path, self.image_dir)
|
||||
else:
|
||||
cname2lab = {}
|
||||
filepath = os.path.join(self.dataset_dir, "ucfTrainTestlist/classInd.txt")
|
||||
with open(filepath, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
label, classname = line.strip().split(" ")
|
||||
label = int(label) - 1 # conver to 0-based index
|
||||
cname2lab[classname] = label
|
||||
|
||||
trainval = self.read_data(cname2lab, "ucfTrainTestlist/trainlist01.txt")
|
||||
test = self.read_data(cname2lab, "ucfTrainTestlist/testlist01.txt")
|
||||
train, val = OxfordPets.split_trainval(trainval)
|
||||
OxfordPets.save_split(train, val, test, self.split_path, self.image_dir)
|
||||
|
||||
num_shots = cfg.DATASET.NUM_SHOTS
|
||||
if num_shots >= 1:
|
||||
seed = cfg.SEED
|
||||
preprocessed = os.path.join(self.split_fewshot_dir, f"shot_{num_shots}-seed_{seed}.pkl")
|
||||
|
||||
if os.path.exists(preprocessed):
|
||||
print(f"Loading preprocessed few-shot data from {preprocessed}")
|
||||
with open(preprocessed, "rb") as file:
|
||||
data = pickle.load(file)
|
||||
train, val = data["train"], data["val"]
|
||||
else:
|
||||
train = self.generate_fewshot_dataset(train, num_shots=num_shots)
|
||||
val = self.generate_fewshot_dataset(val, num_shots=min(num_shots, 4))
|
||||
data = {"train": train, "val": val}
|
||||
print(f"Saving preprocessed few-shot data to {preprocessed}")
|
||||
with open(preprocessed, "wb") as file:
|
||||
pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
|
||||
subsample = cfg.DATASET.SUBSAMPLE_CLASSES
|
||||
train, val, test = OxfordPets.subsample_classes(train, val, test, subsample=subsample)
|
||||
|
||||
super().__init__(train_x=train, val=val, test=test)
|
||||
|
||||
def read_data(self, cname2lab, text_file):
|
||||
text_file = os.path.join(self.dataset_dir, text_file)
|
||||
items = []
|
||||
|
||||
with open(text_file, "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip().split(" ")[0] # trainlist: filename, label
|
||||
action, filename = line.split("/")
|
||||
label = cname2lab[action]
|
||||
|
||||
elements = re.findall("[A-Z][^A-Z]*", action)
|
||||
renamed_action = "_".join(elements)
|
||||
|
||||
filename = filename.replace(".avi", ".jpg")
|
||||
impath = os.path.join(self.image_dir, renamed_action, filename)
|
||||
|
||||
item = Datum(impath=impath, label=label, classname=renamed_action)
|
||||
items.append(item)
|
||||
|
||||
return items
|
||||
Reference in New Issue
Block a user