release code

This commit is contained in:
miunangel
2025-08-16 20:46:31 +08:00
commit 3dc26db3b9
277 changed files with 60106 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
from .svhn import SVHN
from .cifar import CIFAR10, CIFAR100
from .stl10 import STL10

View File

@@ -0,0 +1,108 @@
import math
import random
import os.path as osp
from dassl.utils import listdir_nohidden
from ..build import DATASET_REGISTRY
from ..base_dataset import Datum, DatasetBase
@DATASET_REGISTRY.register()
class CIFAR10(DatasetBase):
"""CIFAR10 for SSL.
Reference:
- Krizhevsky. Learning Multiple Layers of Features
from Tiny Images. Tech report.
"""
dataset_dir = "cifar10"
def __init__(self, cfg):
root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = osp.join(root, self.dataset_dir)
train_dir = osp.join(self.dataset_dir, "train")
test_dir = osp.join(self.dataset_dir, "test")
assert cfg.DATASET.NUM_LABELED > 0
train_x, train_u, val = self._read_data_train(
train_dir, cfg.DATASET.NUM_LABELED, cfg.DATASET.VAL_PERCENT
)
test = self._read_data_test(test_dir)
if cfg.DATASET.ALL_AS_UNLABELED:
train_u = train_u + train_x
if len(val) == 0:
val = None
super().__init__(train_x=train_x, train_u=train_u, val=val, test=test)
def _read_data_train(self, data_dir, num_labeled, val_percent):
class_names = listdir_nohidden(data_dir)
class_names.sort()
num_labeled_per_class = num_labeled / len(class_names)
items_x, items_u, items_v = [], [], []
for label, class_name in enumerate(class_names):
class_dir = osp.join(data_dir, class_name)
imnames = listdir_nohidden(class_dir)
# Split into train and val following Oliver et al. 2018
# Set cfg.DATASET.VAL_PERCENT to 0 to not use val data
num_val = math.floor(len(imnames) * val_percent)
imnames_train = imnames[num_val:]
imnames_val = imnames[:num_val]
# Note we do shuffle after split
random.shuffle(imnames_train)
for i, imname in enumerate(imnames_train):
impath = osp.join(class_dir, imname)
item = Datum(impath=impath, label=label)
if (i + 1) <= num_labeled_per_class:
items_x.append(item)
else:
items_u.append(item)
for imname in imnames_val:
impath = osp.join(class_dir, imname)
item = Datum(impath=impath, label=label)
items_v.append(item)
return items_x, items_u, items_v
def _read_data_test(self, data_dir):
class_names = listdir_nohidden(data_dir)
class_names.sort()
items = []
for label, class_name in enumerate(class_names):
class_dir = osp.join(data_dir, class_name)
imnames = listdir_nohidden(class_dir)
for imname in imnames:
impath = osp.join(class_dir, imname)
item = Datum(impath=impath, label=label)
items.append(item)
return items
@DATASET_REGISTRY.register()
class CIFAR100(CIFAR10):
"""CIFAR100 for SSL.
Reference:
- Krizhevsky. Learning Multiple Layers of Features
from Tiny Images. Tech report.
"""
dataset_dir = "cifar100"
def __init__(self, cfg):
super().__init__(cfg)

View File

@@ -0,0 +1,87 @@
import numpy as np
import os.path as osp
from dassl.utils import listdir_nohidden
from ..build import DATASET_REGISTRY
from ..base_dataset import Datum, DatasetBase
@DATASET_REGISTRY.register()
class STL10(DatasetBase):
"""STL-10 dataset.
Description:
- 10 classes: airplane, bird, car, cat, deer, dog, horse,
monkey, ship, truck.
- Images are 96x96 pixels, color.
- 500 training images per class, 800 test images per class.
- 100,000 unlabeled images for unsupervised learning.
Reference:
- Coates et al. An Analysis of Single Layer Networks in
Unsupervised Feature Learning. AISTATS 2011.
"""
dataset_dir = "stl10"
def __init__(self, cfg):
root = osp.abspath(osp.expanduser(cfg.DATASET.ROOT))
self.dataset_dir = osp.join(root, self.dataset_dir)
train_dir = osp.join(self.dataset_dir, "train")
test_dir = osp.join(self.dataset_dir, "test")
unlabeled_dir = osp.join(self.dataset_dir, "unlabeled")
fold_file = osp.join(
self.dataset_dir, "stl10_binary", "fold_indices.txt"
)
# Only use the first five splits
assert 0 <= cfg.DATASET.STL10_FOLD <= 4
train_x = self._read_data_train(
train_dir, cfg.DATASET.STL10_FOLD, fold_file
)
train_u = self._read_data_all(unlabeled_dir)
test = self._read_data_all(test_dir)
if cfg.DATASET.ALL_AS_UNLABELED:
train_u = train_u + train_x
super().__init__(train_x=train_x, train_u=train_u, test=test)
def _read_data_train(self, data_dir, fold, fold_file):
imnames = listdir_nohidden(data_dir)
imnames.sort()
items = []
list_idx = list(range(len(imnames)))
if fold >= 0:
with open(fold_file, "r") as f:
str_idx = f.read().splitlines()[fold]
list_idx = np.fromstring(str_idx, dtype=np.uint8, sep=" ")
for i in list_idx:
imname = imnames[i]
impath = osp.join(data_dir, imname)
label = osp.splitext(imname)[0].split("_")[1]
label = int(label)
item = Datum(impath=impath, label=label)
items.append(item)
return items
def _read_data_all(self, data_dir):
imnames = listdir_nohidden(data_dir)
items = []
for imname in imnames:
impath = osp.join(data_dir, imname)
label = osp.splitext(imname)[0].split("_")[1]
if label == "none":
label = -1
else:
label = int(label)
item = Datum(impath=impath, label=label)
items.append(item)
return items

View File

@@ -0,0 +1,17 @@
from .cifar import CIFAR10
from ..build import DATASET_REGISTRY
@DATASET_REGISTRY.register()
class SVHN(CIFAR10):
"""SVHN for SSL.
Reference:
- Netzer et al. Reading Digits in Natural Images with
Unsupervised Feature Learning. NIPS-W 2011.
"""
dataset_dir = "svhn"
def __init__(self, cfg):
super().__init__(cfg)