Upload to Main

This commit is contained in:
张菲
2025-10-07 22:42:55 +08:00
commit d3ddab7c5d
218 changed files with 125815 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
from .cal import *
from .contextualdiversity import *
from .coresetmethod import *
from .craig import *
from .deepfool import *
from .earlytrain import *
from .forgetting import *
from .full import *
from .glister import *
from .grand import *
from .gradmatch import *
from .herding import *
from .kcentergreedy import *
from .submodular import *
from .uncertainty import *
from .uniform import *

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

146
deepcore/methods/cal.py Normal file
View File

@@ -0,0 +1,146 @@
from .earlytrain import EarlyTrain
from .methods_utils.euclidean import euclidean_dist_pair_np
from .methods_utils.cossim import cossim_pair_np
import numpy as np
import torch
from tqdm import tqdm
from .. import nets
from copy import deepcopy
from torchvision import transforms
class Cal(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
balance=False, metric="euclidean", neighbors: int = 10, pretrain_model: str = "ResNet18", **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
self.balance = balance
assert neighbors > 0 and neighbors < 100
self.neighbors = neighbors
if metric == "euclidean":
self.metric = euclidean_dist_pair_np
elif metric == "cossim":
self.metric = lambda a, b: -1. * cossim_pair_np(a, b)
elif callable(metric):
self.metric = metric
else:
self.metric = euclidean_dist_pair_np
self.pretrain_model = pretrain_model
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
#Initial achievement, may not optimal
def mixing_feature(self,img_fea,text_fea,lam=0.5):
# return img_fea
return lam*img_fea + (1-lam)*text_fea
def find_knn(self):
"""
Find k-nearest-neighbor data points with the pretrained embedding model
:return: knn matrix
"""
# Initialize pretrained model
# model = nets.__dict__[self.pretrain_model](channel=self.args.channel, num_classes=self.args.num_classes,
# im_size=(224, 224), record_embedding=True, no_grad=True,
# pretrained=True).to(self.args.device)
self.model.eval()
probs = []
# # Resize dst_train to 224*224
# if self.args.im_size[0] != 224 or self.args.im_size[1] != 224:
# dst_train = deepcopy(self.dst_train)
# dst_train.transform = transforms.Compose([dst_train.transform, transforms.Resize(224)])
# else:
# dst_train = self.dst_train
# Calculate the distance matrix and return knn results
if self.balance:
knn = []
for c in tqdm(range(self.num_classes)):
print(f'Start processing class {c}/{self.num_classes}')
class_index = np.arange(self.n_train)[self.dst_train_label == c]
# Start recording embedding vectors
# batch_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(dst_train, class_index),
# batch_size=self.args.selection_batch,
# num_workers=self.args.workers)
embdeddings = []
c_probs = np.zeros([len(class_index), self.num_classes])
data_loader = self.select_dm(self.dst_train, class_index, is_train=False)
for i, batch in enumerate(data_loader):
image, label = batch['img'].cuda(), batch['label'].cuda()
img_f, text_f,logit = self.model(image, label, record=True)
final_feature = self.mixing_feature(img_f,text_f)
embdeddings.append(final_feature.cpu().numpy())
c_probs[i * self.args.DATASET.SELECTION_BATCH_SIZE:(i + 1) * self.args.DATASET.SELECTION_BATCH_SIZE] = \
torch.softmax(logit, dim=1).detach().cpu()
embdeddings = np.concatenate(embdeddings, axis=0)
probs.append(c_probs)
knn.append(np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)])
self.probs = np.concatenate(probs,axis=0)
return knn
else:
# Start recording embedding vectors
embdeddings = []
batch_loader = self.select_dm(self.dst_train, None, is_train=False)
print(f'Start processing all class')
for i, batch in enumerate(tqdm(batch_loader)):
image, label = batch['img'].cuda(), batch['label'].cuda()
img_f, text_f,logit = self.model(image, label, record=True)
final_feature = self.mixing_feature(img_f, text_f)
embdeddings.append(final_feature.cpu().numpy())
probs[i * self.args.DATASET.SELECTION_BATCH_SIZE:(i + 1) * self.args.DATASET.SELECTION_BATCH_SIZE] = \
torch.softmax(logit, dim=1).detach().cpu()
embdeddings = np.concatenate(embdeddings, axis=0)
self.probs = np.concatenate(probs, axis=0)
return np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)]
def calc_kl(self, knn, index=None):
self.model.eval()
self.model.no_grad = True
sample_num = self.n_train if index is None else len(index)
# probs = np.zeros([sample_num, self.num_classes])
#
# batch_loader = torch.utils.data.DataLoader(
# self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
# batch_num = len(batch_loader)
#
# for i, (inputs, _) in enumerate(batch_loader):
# probs[i * self.args.selection_batch:(i + 1) * self.args.selection_batch] = torch.nn.functional.softmax(
# self.model(inputs.to(self.args.device)), dim=1).detach().cpu()
probs = self.probs[index]
s = np.zeros(sample_num)
for i in range(0, sample_num, self.args.DATASET.SELECTION_BATCH_SIZE):
print("| Caculating KL-divergence for batch [%3d/%3d] with batchsize [%3d]" % (i, sample_num, self.args.DATASET.SELECTION_BATCH_SIZE))
aa = np.expand_dims(probs[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)], 1).repeat(self.neighbors, 1)
bb = probs[knn[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)], :]
s[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)] = np.mean(
np.sum(0.5 * aa * np.log(aa / bb) + 0.5 * bb * np.log(bb / aa), axis=2), axis=1)
self.model.no_grad = False
return s
def finish_run(self):
scores=[]
if self.balance:
selection_result = np.array([], dtype=np.int32)
for c, knn in zip(range(self.num_classes), self.knn):
class_index = np.arange(self.n_train)[self.dst_train_label == c]
scores.append(self.calc_kl(knn, class_index))
selection_result = np.append(selection_result, class_index[np.argsort(
#self.calc_kl(knn, class_index))[::1][:round(self.fraction * len(class_index))]])
scores[-1])[::1][:round(self.fraction * len(class_index))]])
else:
selection_result = np.argsort(self.calc_kl(self.knn))[::1][:self.coreset_size]
return {"indices": selection_result, "scores":scores}
def select(self, **kwargs):
self.knn = self.find_knn()
selection_result = self.run()
return selection_result

View File

@@ -0,0 +1,33 @@
from .kcentergreedy import kCenterGreedy
import torch
# Acknowlegement to:
# https://github.com/sharat29ag/CDAL
class ContextualDiversity(kCenterGreedy):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
specific_model=None, balance=True, already_selected=[], torchvision_pretrain: bool = False, **kwargs):
super(ContextualDiversity, self).__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, balance=balance, already_selected=already_selected, torchvision_pretrain=torchvision_pretrain, **kwargs)
self.metric = self._metric
def _metric(self, a_output, b_output):
with torch.no_grad():
# Overload self.metric function for kCenterGreedy Algorithm
aa = a_output.view(a_output.shape[0], 1, a_output.shape[1]).repeat(1, b_output.shape[0], 1)
bb = b_output.view(1, b_output.shape[0], b_output.shape[1]).repeat(a_output.shape[0], 1, 1)
return torch.sum(0.5 * aa * torch.log(aa / bb) + 0.5 * bb * torch.log(bb / aa), dim=2)
def construct_matrix(self, index=None):
self.model.eval()
self.model.no_grad = True
sample_num = self.n_train if index is None else len(index)
matrix = torch.zeros([sample_num, self.args.num_classes], requires_grad=False).to(self.args.device)
batch_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch
,num_workers=self.args.workers)
for i, (inputs, _) in enumerate(batch_loader):
matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = torch.nn.functional.softmax(self.model(inputs.to(self.args.device)), dim=1)
self.model.no_grad = False
return matrix

View File

@@ -0,0 +1,49 @@
import numpy as np
import os
class CoresetMethod(object):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None,**kwargs):
if fraction <= 0.0 or fraction > 1.0:
raise ValueError("Illegal Coreset Size.")
self.dm = dst_train
self.dst_train = dst_train.dataset.train_x
self.num_classes = dst_train.dataset.num_classes
self.fraction = fraction
self.random_seed = random_seed
self.index = []
self.args = args
self.dst_train_label = self.get_train_label(self.dst_train)
self.n_train = len(self.dst_train)
self.coreset_size = round(self.n_train * fraction)
self.max_epoch = self.args.OPTIM_SELECTION.MAX_EPOCH
def select(self, **kwargs):
return
def get_train_label(self,dst_train):
####Readable
ind = []
for i,item in enumerate(dst_train):
ind.append(item.label)
return np.asarray(ind)
def pre_run(self):
self.train_indx = np.arange(self.n_train)
print(f'Start pre-funing CLIP with all datasets by {self.max_epoch} epoch')
file_save_name = self.args.DATASET.NAME + '_' + str(self.args.SEED) + '.pth'
output_checkpoint_dir = os.path.join('checkpoints', file_save_name)
if self.max_epoch > 0:
if os.path.exists(output_checkpoint_dir):
print(f'The checkpiont exists! Load that shit')
ckpt = torch.load(output_checkpoint_dir)
self.model.load_state_dict(ckpt)
else:
for epoch in range(self.epoch, self.max_epoch):
# list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train),
# self.n_pretrain_size, replace=False)
self.before_epoch() # PASS
self.train(epoch)
self.test(epoch)
self.after_epoch()
torch.save(self.model.state_dict(), output_checkpoint_dir)

126
deepcore/methods/craig.py Normal file
View File

@@ -0,0 +1,126 @@
from .earlytrain import EarlyTrain
import torch
from .methods_utils import FacilityLocation, submodular_optimizer
import numpy as np
from .methods_utils.euclidean import euclidean_dist_pair_np
from ..nets.nets_utils import MyDataParallel
from tqdm import tqdm
class Craig(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
balance=True, greedy="LazyGreedy", **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
if greedy not in submodular_optimizer.optimizer_choices:
raise ModuleNotFoundError("Greedy optimizer not found.")
self._greedy = greedy
self.balance = balance
def before_train(self):
pass
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
pass
def before_epoch(self):
pass
def after_epoch(self):
pass
def before_run(self):
pass
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
# def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
# if batch_idx % self.args.print_freq == 0:
# print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
# epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
# def calc_gradient(self, index=None):
# self.model.eval()
#
# batch_loader = torch.utils.data.DataLoader(
# self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
# sample_num = len(self.dst_val.targets) if index is None else len(index)
# self.embedding_dim = self.model.get_last_layer().in_features
#
# gradients = []
#
# for i, (input, targets) in enumerate(batch_loader):
# self.model_optimizer.zero_grad()
# outputs = self.model(input.to(self.args.device))
# loss = self.criterion(outputs.requires_grad_(True),
# targets.to(self.args.device)).sum()
# batch_num = targets.shape[0]
# with torch.no_grad():
# bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
# weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1,
# self.embedding_dim).repeat(1,
# self.args.num_classes,
# 1) * bias_parameters_grads.view(
# batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
# gradients.append(
# torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu().numpy())
#
# gradients = np.concatenate(gradients, axis=0)
#
# self.model.train()
# return euclidean_dist_pair_np(gradients)
def calc_weights(self, matrix, result):
min_sample = np.argmax(matrix[result], axis=0)
weights = np.ones(np.sum(result) if result.dtype == bool else len(result))
for i in min_sample:
weights[i] = weights[i] + 1
return weights
def finish_run(self):
if isinstance(self.model, MyDataParallel):
self.model = self.model.module
self.model.no_grad = True
grad = self.calc_gradient()
grad_matrix = euclidean_dist_pair_np(grad)
# with self.model.embedding_recorder:
if self.balance:
# Do selection by class
selection_result = np.array([], dtype=np.int32)
weights = np.array([])
for c in tqdm(range(self.num_classes)):
class_index = np.arange(self.n_train)[self.dst_train_label == c]
matrix = -1. * grad_matrix[class_index[:,None],class_index] # Change to column index
# matrix = -1. * self.calc_gradient(class_index)
matrix -= np.min(matrix) - 1e-3 #The least is zero
submod_function = FacilityLocation(index=class_index, similarity_matrix=matrix)
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=class_index,
budget=round(self.fraction * len(
class_index)))
class_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
update_state=submod_function.update_state)
selection_result = np.append(selection_result, class_result)
weights = np.append(weights, self.calc_weights(matrix, np.isin(class_index, class_result)))
else:
matrix = np.zeros([self.n_train, self.n_train])
all_index = np.arange(self.n_train)
for c in range(self.num_classes): # Sparse Matrix
class_index = np.arange(self.n_train)[self.dst_train_label== c]
matrix[np.ix_(class_index, class_index)] = -1. * self.calc_gradient(class_index)
matrix[np.ix_(class_index, class_index)] -= np.min(matrix[np.ix_(class_index, class_index)]) - 1e-3
submod_function = FacilityLocation(index=all_index, similarity_matrix=matrix)
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=all_index,
budget=self.coreset_size)
selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain_batch,
update_state=submod_function.update_state,
batch=self.args.selection_batch)
weights = self.calc_weights(matrix, selection_result)
self.model.no_grad = False
return {"indices": selection_result, "weights": weights}
def select(self, **kwargs):
selection_result = self.run()
return selection_result

View File

@@ -0,0 +1,120 @@
from .earlytrain import EarlyTrain
import torch
import numpy as np
class DeepFool(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
specific_model=None, balance: bool = False, max_iter: int = 50, **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
self.balance = balance
self.max_iter = max_iter
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
if batch_idx % self.args.print_freq == 0:
print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
def finish_run(self):
self.model.no_grad = False
# Create a data loader for self.dst_train with batch size self.args.selection_batch
batch_loader = torch.utils.data.DataLoader(self.dst_train, batch_size=self.args.selection_batch
, num_workers=self.args.workers)
r = np.zeros(self.n_train, dtype=np.float32)
batch_num = len(batch_loader)
for i, (inputs, targets) in enumerate(batch_loader):
if i % self.args.print_freq == 0:
print('| Selecting Batch [%3d/%3d]' % (i + 1, batch_num))
r[(i * self.args.selection_batch):(i * self.args.selection_batch + targets.shape[0])] = self.deep_fool(
inputs)
if self.balance:
selection_result = np.array([], dtype=np.int64)
for c in range(self.args.num_classes):
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
selection_result = np.append(selection_result, class_index[
r[class_index].argsort()[:round(len(class_index) * self.fraction)]])
else:
selection_result = r.argsort()[:self.coreset_size]
return {"indices": selection_result, "scores": r}
def deep_fool(self, inputs):
# Here, start running DeepFool algorithm.
self.model.eval()
# Initialize a boolean mask indicating if selection has been stopped at corresponding positions.
sample_size = inputs.shape[0]
boolean_mask = np.ones(sample_size, dtype=bool)
all_idx = np.arange(sample_size)
# A matrix to store total pertubations.
r_tot = np.zeros([sample_size, inputs.shape[1] * inputs.shape[2] * inputs.shape[3]])
# Set requires_grad for inputs.
cur_inputs = inputs.requires_grad_(True).to(self.args.device)
original_shape = inputs.shape[1:]
# set requires_grad for all parametres in network as False to accelerate autograd
for p in self.model.parameters():
p.requires_grad_(False)
self.model.no_grad = True
first_preds = self.model(cur_inputs).argmax(dim=1)
self.model.no_grad = False
for i in range(self.max_iter):
f_all = self.model(cur_inputs)
w_k = []
for c in range(self.args.num_classes):
w_k.append(torch.autograd.grad(f_all[:, c].sum(), cur_inputs,
retain_graph=False if c + 1 == self.args.num_classes else True)[
0].flatten(1))
w_k = torch.stack(w_k, dim=0)
w_k = w_k - w_k[first_preds, boolean_mask[boolean_mask]].unsqueeze(0)
w_k_norm = w_k.norm(dim=2)
w_k_norm[first_preds, boolean_mask[
boolean_mask]] = 1. # Set w_k_norm for preds positions to 1. to avoid division by zero.
l_all = (f_all - f_all[boolean_mask[boolean_mask], first_preds].unsqueeze(1)).detach().abs() / w_k_norm.T
l_all[boolean_mask[
boolean_mask], first_preds] = np.inf # Set l_k for preds positions to inf, as the argmin for each
# row will be calculated soon.
l_hat = l_all.argmin(dim=1)
r_i = l_all[boolean_mask[boolean_mask], l_hat].unsqueeze(1) / w_k_norm[
l_hat, boolean_mask[boolean_mask]].T.unsqueeze(1) * w_k[l_hat, boolean_mask[boolean_mask]]
# Update r_tot values.
r_tot[boolean_mask] += r_i.cpu().numpy()
cur_inputs += r_i.reshape([r_i.shape[0]] + list(original_shape))
# Re-input the updated sample into the network and get new predictions.
self.model.no_grad = True
preds = self.model(cur_inputs).argmax(dim=1)
self.model.no_grad = False
# In DeepFool algorithm, the iteration stops when the updated sample produces a different prediction
# in the model.
index_unfinished = (preds == first_preds)
if torch.all(~index_unfinished):
break
cur_inputs = cur_inputs[index_unfinished]
first_preds = first_preds[index_unfinished]
boolean_mask[all_idx[boolean_mask][~index_unfinished.cpu().numpy()]] = False
return (r_tot * r_tot).sum(axis=1)
def select(self, **kwargs):
selection_result = self.run()
return selection_result

View File

@@ -0,0 +1,322 @@
from .coresetmethod import CoresetMethod
import torch, time
from torch import nn
import numpy as np
from copy import deepcopy
from .. import nets
from torchvision import transforms
from datasets.data_manager import select_dm_loader
from dassl.utils import MetricMeter, AverageMeter
from torch.cuda.amp import GradScaler, autocast
import datetime
from tqdm import tqdm
import os
class EarlyTrain(CoresetMethod):
'''
Core code for training related to coreset selection methods when pre-training is required.
'''
def __init__(self, dst_train, args,fraction=0.5, random_seed=None, epochs=200, specific_model=None,
torchvision_pretrain: bool = False, dst_pretrain_dict: dict = {}, fraction_pretrain=1., dst_test=None,
**kwargs):
super().__init__(dst_train, args, fraction, random_seed)
self.epochs = epochs
self.n_train = len(self.dst_train)
self.coreset_size = round(self.n_train * fraction)
self.model = specific_model
self.train_loader = self.dm.train_loader_x
self.test_loader = self.dm.test_loader
if kwargs:
# self.text_feature = kwargs['text_feature']
self.optim = kwargs['optim']
self.sche = kwargs['schedule']
self.scar = kwargs['scar']
self.start_epoch = self.epoch = 0
self.max_epoch = self.args.OPTIM_SELECTION.MAX_EPOCH
if fraction_pretrain <= 0. or fraction_pretrain > 1.:
raise ValueError("Illegal pretrain fraction value.")
self.fraction_pretrain = fraction_pretrain
if dst_pretrain_dict.__len__() != 0:
dict_keys = dst_pretrain_dict.keys()
if 'im_size' not in dict_keys or 'channel' not in dict_keys or 'dst_train' not in dict_keys or \
'num_classes' not in dict_keys:
raise AttributeError(
'Argument dst_pretrain_dict must contain imszie, channel, dst_train and num_classes.')
if dst_pretrain_dict['im_size'][0] != args.im_size[0] or dst_pretrain_dict['im_size'][0] != args.im_size[0]:
raise ValueError("im_size of pretrain dataset does not match that of the training dataset.")
if dst_pretrain_dict['channel'] != args.channel:
raise ValueError("channel of pretrain dataset does not match that of the training dataset.")
if dst_pretrain_dict['num_classes'] != args.num_classes:
self.num_classes_mismatch()
self.dst_pretrain_dict = dst_pretrain_dict
self.torchvision_pretrain = torchvision_pretrain
self.if_dst_pretrain = (len(self.dst_pretrain_dict) != 0)
if torchvision_pretrain:
# Pretrained models in torchvision only accept 224*224 inputs, therefore we resize current
# datasets to 224*224.
if args.im_size[0] != 224 or args.im_size[1] != 224:
self.dst_train = deepcopy(dst_train)
self.dst_train.transform = transforms.Compose([self.dst_train.transform, transforms.Resize(224)])
if self.if_dst_pretrain:
self.dst_pretrain_dict['dst_train'] = deepcopy(dst_pretrain_dict['dst_train'])
self.dst_pretrain_dict['dst_train'].transform = transforms.Compose(
[self.dst_pretrain_dict['dst_train'].transform, transforms.Resize(224)])
if self.if_dst_pretrain:
self.n_pretrain = len(self.dst_pretrain_dict['dst_train'])
self.n_pretrain_size = round(
self.fraction_pretrain * (self.n_pretrain if self.if_dst_pretrain else self.n_train))
self.dst_test = dst_test
def train(self, epoch, list_of_train_idx=None, **kwargs):
""" Train model for one epoch """
self.before_train()
self.model.train()
losses = MetricMeter()
batch_time = AverageMeter()
data_time = AverageMeter()
end = time.time()
print('\n=> Training Pre-tuning Epoch #%d' % epoch)
train_loader = select_dm_loader(self.args,self.dst_train,is_train=True)
self.num_batches = len(train_loader)
# trainset_permutation_inds = np.random.permutation(list_of_train_idx)
# batch_sampler = torch.utils.data.BatchSampler(trainset_permutation_inds, batch_size=self.args.selection_batch,
# drop_last=False)
# trainset_permutation_inds = list(batch_sampler)
#
# train_loader = torch.utils.data.DataLoader(self.dst_pretrain_dict['dst_train'] if self.if_dst_pretrain
# else self.dst_train, shuffle=False, batch_sampler=batch_sampler,
#
#
# num_workers=self.args.workers, pin_memory=True)
for i, batch in enumerate(train_loader):
data_time.update(time.time() - end)
image, label,real_ind = batch['img'].cuda(),batch['label'].cuda(),batch['index'].cuda()
model = self.model
optim = self.optim
scaler = self.scar
prec = self.args.TRAINER.MAPLE.PREC
if prec == "amp":
with autocast():
loss,outputs = model(image, label)
optim.zero_grad()
scaler.scale(loss).backward()
scaler.step(optim)
scaler.update()
else:
loss,outputs = model(image, label)
optim.zero_grad()
loss.backward()
optim.step()
self.after_loss(outputs, loss, label, real_ind, epoch)
self.while_update(outputs, loss, label, epoch, i, self.args.DATALOADER.TRAIN_X.BATCH_SIZE)
loss_summary = {"loss": loss.item()}
if (i + 1) == self.num_batches:
self.sche.step()
batch_time.update(time.time() - end)
losses.update(loss_summary)
meet_freq = (i + 1) % self.args.TRAIN.PRINT_FREQ == 0
only_few_batches = self.num_batches < self.args.TRAIN.PRINT_FREQ
if meet_freq or only_few_batches:
nb_remain = 0
nb_remain += self.num_batches - i - 1
nb_remain += (self.max_epoch - self.epoch - 1) * self.num_batches
eta_seconds = batch_time.avg * nb_remain
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
info = []
info += [f"epoch [{self.epoch + 1}/{self.max_epoch}]"]
info += [f"batch [{i + 1}/{self.num_batches}]"]
info += [f"time {batch_time.val:.3f} ({batch_time.avg:.3f})"]
info += [f"data {data_time.val:.3f} ({data_time.avg:.3f})"]
info += [f"{losses}"]
info += [f"lr {optim.param_groups[0]['lr']:.4e}"]
info += [f"eta {eta}"]
print(" ".join(info))
# n_iter = self.epoch * self.num_batches + i
# for name, meter in losses.meters.items():
# self.write_scalar("train/" + name, meter.avg, n_iter)
# self.write_scalar("train/lr", self.get_current_lr(), n_iter)
end = time.time()
return self.finish_train()
def run(self):
self.train_indx = np.arange(self.n_train)
self.before_run()
print(f'Start pre-funing CLIP with all datasets by {self.max_epoch} epoch')
file_save_name = self.args.DATASET.NAME + '_' + str(self.args.SEED) + '.pth'
output_checkpoint_dir = os.path.join('checkpoints', file_save_name)
if self.max_epoch > 0:
if os.path.exists(output_checkpoint_dir):
print(f'The checkpiont exists! Load that shit')
ckpt = torch.load(output_checkpoint_dir)
self.model.load_state_dict(ckpt)
else:
for epoch in range(self.epoch,self.max_epoch):
# list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train),
# self.n_pretrain_size, replace=False)
self.before_epoch() #PASS
self.train(epoch)
self.test(epoch)
self.after_epoch()
torch.save(self.model.state_dict(),output_checkpoint_dir)
return self.finish_run()
def test(self, epoch):
self.model.no_grad = True
self.model.eval()
correct = 0.
total = 0.
print('\n=> Testing Tuning Epoch #%d' % epoch)
for batch_idx, batch in enumerate(self.test_loader):
image, target = batch['img'].cuda(), batch['label']
output = self.model(image, target.cuda())
predicted = torch.max(output.data, 1).indices.cpu()
correct += predicted.eq(target).sum().item()
total += target.size(0)
# if batch_idx % self.args.print_freq == 0:
# print('| Test Epoch [%3d/%3d] Iter[%3d/%3d]\t\t Test Acc: %.3f%%' % (
# epoch, self.epochs, batch_idx + 1, (round(len(self.dst_test) * self.args.selection_test_fraction) //
# self.args.selection_batch) + 1, loss.item(),
# 100. * correct / total))
print(f'| Test Epoch {epoch} Test Acc: {100. * correct / total:.3f}%')
self.model.no_grad = False
def num_classes_mismatch(self):
pass
def before_train(self):
pass
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
pass
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
pass
def finish_train(self):
pass
def before_epoch(self):
pass
def after_epoch(self):
pass
def before_run(self):
pass
def finish_run(self):
pass
def select(self, **kwargs):
selection_result = self.run()
return selection_result
def select_without_train(self, **kwargs):
return self.finish_run()
@torch.no_grad()
def calcluate_clip_probability(self,batch):
input = batch["img"].cuda()
self.specific_model = self.specific_model.cuda()
image_features = self.specific_model.encode_image(input)
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
logit_scale = self.specific_model.logit_scale.exp()
return logit_scale * image_features @ self.text_feature.t()
# using the defined select_dm
def select_dm(self,data,ind=None,is_train=None):
return select_dm_loader(self.args,data,ind,is_train)
def parse_batch_test(self, batch):
input = batch["img"]
label = batch["label"]
input = input.cuda()
label = label.cuda()
return input, label
def parse_batch_train(self, batch):
input = batch["img"].cuda()
label = batch["label"].cuda()
domain = batch["index"].cuda()
return input, label, domain
def calc_gradient(self, index=None):
'''
Calculate gradients matrix on current network for specified training dataset.
'''
self.model.eval()
data_loader = self.select_dm(self.dst_train, index, is_train=False)
# Initialize a matrix to save gradients.
# (on cpu)
gradients = []
lam = 0.5
for i, batch in enumerate(tqdm(data_loader)):
self.optim.zero_grad()
image, label = batch['img'].cuda(), batch['label'].cuda()
bs_size = image.shape[0]
loss, visual_embedding, logit= self.model(image, label, cal_gradient=True)
embed_dim = visual_embedding.shape[-1]
with torch.no_grad():
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
weight_parameters_grads = visual_embedding.view(bs_size, 1,
-1).repeat(1, self.num_classes, 1) * \
bias_parameters_grads.view(bs_size, self.num_classes,
1).repeat(1, 1, embed_dim)
# weight_parameters_grads_t = text_embedding.view(bs_size, 1,
# -1).repeat(1, self.num_classes, 1) * \
# bias_parameters_grads.view(bs_size, self.num_classes,
# 1).repeat(1, 1, embed_dim)
# final_weight = torch.abs(weight_parameters_grads-weight_parameters_grads_t)
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
dim=1).cpu().numpy())
gradients = np.concatenate(gradients, axis=0, dtype=np.float32)
print('Finish Gradient Calculation')
self.model.train()
return gradients

View File

@@ -0,0 +1,99 @@
from .earlytrain import EarlyTrain
import torch, time
from torch import nn
import numpy as np
from datasets.data_manager import select_dm_loader
# Acknowledgement to
# https://github.com/mtoneva/example_forgetting
class Forgetting(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, #default True
dst_test=None, **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model=specific_model,
dst_test=dst_test,**kwargs)
self.balance = balance
def get_hms(self, seconds):
# Format time for printing purposes
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
return h, m, s
def before_train(self):
self.train_loss = 0.
self.correct = 0.
self.total = 0.
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
with torch.no_grad():
_, predicted = torch.max(outputs.data, 1)
cur_acc = (predicted == targets).clone().detach().requires_grad_(False).type(torch.float32)
self.forgetting_events[batch_inds.clone().detach()[(self.last_acc[batch_inds]-cur_acc)>0.01]]+=1.
self.last_acc[batch_inds] = cur_acc
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
pass
# self.train_loss += loss.item()
# self.total += targets.size(0)
# _, predicted = torch.max(outputs.data, 1)
# self.correct += predicted.eq(targets.data).cpu().sum()
#
# if batch_idx % self.args.print_freq == 0:
# print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (
# epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item(),
# 100. * self.correct.item() / self.total))
def after_epoch(self):
pass
# epoch_time = time.time() - self.start_time
# self.elapsed_time += epoch_time
# print('| Elapsed time : %d:%02d:%02d' % (self.get_hms(self.elapsed_time)))
def before_run(self):
self.elapsed_time = 0
self.forgetting_events = torch.zeros(self.n_train, requires_grad=False).cuda()
self.test_initial_acc()
# self.last_acc = torch.zeros(self.n_train, requires_grad=False).cuda()
def test_initial_acc(self):
self.model.no_grad = True
self.model.eval()
self.last_acc = torch.zeros(self.n_train, requires_grad=False).cuda()
print('\n=> Testing Initial acc for Forgetting')
train_loader = select_dm_loader(self.args, self.dst_train)
for batch_idx, batch in enumerate(train_loader):
image, target,batch_inds = batch['img'].cuda(), batch['label'].cuda(), batch['index'].cuda()
output = self.model(image, target)
predicted = torch.max(output.data, 1).indices
cur_acc = (predicted == target).clone().detach().requires_grad_(False).type(torch.float32)
self.last_acc[batch_inds] = cur_acc
self.model.no_grad = False
def finish_run(self):
pass
def select(self, **kwargs):
self.run()
if not self.balance:
top_examples = self.train_indx[np.argsort(self.forgetting_events.cpu().numpy())][::-1][:self.coreset_size]
else:
top_examples = np.array([], dtype=np.int64)
for c in range(self.num_classes):
c_indx = self.train_indx[self.dst_train_label == c]
budget = round(self.fraction * len(c_indx))
top_examples = np.append(top_examples,
c_indx[np.argsort(self.forgetting_events[c_indx].cpu().numpy())[::-1][:budget]])
return {"indices": top_examples, "scores": self.forgetting_events}

10
deepcore/methods/full.py Normal file
View File

@@ -0,0 +1,10 @@
import numpy as np
from .coresetmethod import CoresetMethod
class Full(CoresetMethod):
def __init__(self, dst_train, args, fraction, random_seed, **kwargs):
self.n_train = len(dst_train)
def select(self, **kwargs):
return {"indices": np.arange(self.n_train)}

210
deepcore/methods/glister.py Normal file
View File

@@ -0,0 +1,210 @@
from .earlytrain import EarlyTrain
from .methods_utils import submodular_optimizer
import torch
import numpy as np
from ..nets.nets_utils import MyDataParallel
from tqdm import tqdm
class Glister(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
balance: bool = True, greedy="StochasticGreedy", eta=None, dst_val=None, **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
self.balance = balance
self.eta = args.OPTIM_SELECTION.LR if eta is None else eta
self.dst_val = dst_train.dataset.val
self.dst_val_label = self.get_train_label(self.dst_val)
self.n_val = len(self.dst_val)
if greedy not in submodular_optimizer.optimizer_choices:
raise ModuleNotFoundError("Greedy optimizer not found.")
self._greedy = greedy
def calc_gradient(self, index=None,val=False):
'''
Calculate gradients matrix on current network for specified training dataset.
'''
self.model.eval()
if val:
val_str = 'Val'
data_loader = self.select_dm(self.dst_val, index, is_train=False)
# self.init_out = []
# self.init_emb = []
# self.init_y = []
else:
val_str = 'Train'
data_loader = self.select_dm(self.dst_train, index, is_train=False)
# Initialize a matrix to save gradients.
# (on cpu)
gradients = []
for i, batch in enumerate(tqdm(data_loader)):
self.optim.zero_grad()
image, label = batch['img'].cuda(), batch['label'].cuda()
bs_size = image.shape[0]
loss,visual_embedding,logit = self.model(image,label,cal_gradient=True)
embed_dim = visual_embedding.shape[-1]
with torch.no_grad():
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
weight_parameters_grads = visual_embedding.view(bs_size, 1,
-1).repeat(1, self.num_classes, 1) *\
bias_parameters_grads.view(bs_size, self.num_classes,
1).repeat(1, 1, embed_dim)
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
dim=1).cpu().numpy())
# if val:
# self.init_out.append(logit.cpu())
# self.init_emb.append(visual_embedding.cpu())
# self.init_y.append(label.cpu())
# if val:
# with torch.no_grad():
# self.init_out = torch.cat(self.init_out,dim=0).numpy().astype(dtype=np.float32)
# self.init_emb = torch.cat(self.init_emb,dim=0).numpy().astype(dtype=np.float32)
# self.init_y = torch.cat(self.init_y,dim=0).numpy().astype(dtype=np.float32)
gradients = np.concatenate(gradients, axis=0,dtype=np.float32)
print(f'Finish Gradient Calculation on {val_str} dataset')
return gradients
# def calc_gradient(self, index=None, val=False, record_val_detail=False):
# '''
# Calculate gradients matrix on current network for training or validation dataset.
# '''
#
# self.model.eval()
#
# if val:
# batch_loader = torch.utils.data.DataLoader(
# self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index),
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
# else:
# batch_loader = torch.utils.data.DataLoader(
# self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
#
# self.embedding_dim = self.model.get_last_layer().in_features
# gradients = []
# if val and record_val_detail:
# self.init_out = []
# self.init_emb = []
# self.init_y = []
#
# for i, (input, targets) in enumerate(batch_loader):
# self.model_optimizer.zero_grad()
# outputs = self.model(input.to(self.args.device))
# loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum()
# batch_num = targets.shape[0]
# with torch.no_grad():
# bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
# weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1,
# self.embedding_dim).repeat(1, self.args.num_classes, 1) *\
# bias_parameters_grads.view(
# batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
# gradients.append(torch.cat(
# [bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu())
#
# if val and record_val_detail:
# self.init_out.append(outputs.cpu())
# self.init_emb.append(self.model.embedding_recorder.embedding.cpu())
# self.init_y.append(targets)
#
# gradients = torch.cat(gradients, dim=0)
# if val:
# self.val_grads = torch.mean(gradients, dim=0)
# if self.dst_val == self.dst_train:
# # No validation set was provided while instantiating Glister, so self.dst_val == self.dst_train
# self.train_grads = gradients
# else:
# self.train_grads = gradients
# if val and record_val_detail:
# with torch.no_grad():
# self.init_out = torch.cat(self.init_out, dim=0)
# self.init_emb = torch.cat(self.init_emb, dim=0)
# self.init_y = torch.cat(self.init_y)
#
# self.model.train()
#PASS, worth disussion
def update_val_gradients(self, new_selection, selected_for_train):
sum_selected_train_gradients = np.mean(self.train_gradients[selected_for_train], axis=0)
new_outputs = self.init_out - self.eta * sum_selected_train_gradients[:self.num_classes].reshape(1,
-1).repeat(self.init_out.shape[0], 1) - self.eta * torch.matmul(self.init_emb,
sum_selected_train_gradients[self.num_classes:].view(self.num_classes, -1).T)
sample_num = new_outputs.shape[0]
gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)], requires_grad=False)
i = 0
while i * self.args.selection_batch < sample_num:
batch_indx = np.arange(sample_num)[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch,
sample_num)]
new_out_puts_batch = new_outputs[batch_indx].clone().detach().requires_grad_(True)
loss = self.criterion(new_out_puts_batch, self.init_y[batch_indx])
batch_num = len(batch_indx)
bias_parameters_grads = torch.autograd.grad(loss.sum(), new_out_puts_batch, retain_graph=True)[0]
weight_parameters_grads = self.init_emb[batch_indx].view(batch_num, 1, self.embedding_dim).repeat(1,
self.args.num_classes, 1) * bias_parameters_grads.view(batch_num,
self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
gradients[batch_indx] = torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu()
i += 1
self.val_grads = torch.mean(gradients, dim=0)
def finish_run(self):
if isinstance(self.model, MyDataParallel):
self.model = self.model.module
self.model.no_grad = True
self.train_indx = np.arange(self.n_train)
self.val_indx = np.arange(self.n_val)
train_gradients = self.calc_gradient(index=None)
val_gradients = self.calc_gradient(index=None,val=True)
if self.balance:
selection_result = np.array([], dtype=np.int64)
#weights = np.array([], dtype=np.float32)
for c in range(self.num_classes):
c_indx = self.train_indx[self.dst_train_label == c]
c_val_inx = self.val_indx[self.dst_val_label == c]
self.train_gradients = train_gradients[c_indx]
self.val_gradients = val_gradients[c_val_inx].mean(axis=0)
# self.init_out = self.init_out[c_val_inx]
# self.init_emb = self.init_emb[c_val_inx]
# self.init_y = self.init_y[c_val_inx]
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=c_indx,
budget=round(self.fraction * len(c_indx)))
#conditioal gain uses taylor series approximation
c_selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected,
**kwargs: np.dot(self.train_gradients[idx_gain],
self.val_gradients.reshape(-1, 1)).
flatten(), update_state=None) #self.update val
selection_result = np.append(selection_result, c_selection_result)
else:
self.train_gradients = train_gradients
self.val_gradients = val_gradients.mean(axis=0)
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args,
index=np.arange(self.n_train), budget=self.coreset_size)
selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected,
**kwargs: torch.matmul(self.train_gradients[idx_gain],
self.val_gradients.view(-1, 1)).detach().cpu().numpy().flatten(),
upadate_state=self.update_val_gradients)
self.model.no_grad = False
return {"indices": selection_result}
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")

View File

@@ -0,0 +1,213 @@
import torch
import numpy as np
from scipy.linalg import lstsq
from scipy.optimize import nnls
from .earlytrain import EarlyTrain
from ..nets.nets_utils import MyDataParallel
# https://github.com/krishnatejakk/GradMatch
class GradMatch(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
balance=True, dst_val=None, lam: float = 1., **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
self.balance = balance
self.dst_val = dst_val
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
if batch_idx % self.args.print_freq == 0:
print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
def orthogonal_matching_pursuit(self, A, b, budget: int, lam: float = 1.):
'''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit
Acknowlegement to:
https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py
Args:
A: design matrix of size (d, n)
b: measurement vector of length d
budget: selection budget
lam: regularization coef. for the final output vector
Returns:
vector of length n
'''
with torch.no_grad():
d, n = A.shape
if budget <= 0:
budget = 0
elif budget > n:
budget = n
x = np.zeros(n, dtype=np.float32)
resid = b.clone()
indices = []
boolean_mask = torch.ones(n, dtype=bool, device="cuda")
all_idx = torch.arange(n, device='cuda')
for i in range(budget):
if i % self.args.print_freq == 0:
print("| Selecting [%3d/%3d]" % (i + 1, budget))
projections = torch.matmul(A.T, resid)
index = torch.argmax(projections[boolean_mask])
index = all_idx[boolean_mask][index]
indices.append(index.item())
boolean_mask[index] = False
if indices.__len__() == 1:
A_i = A[:, index]
x_i = projections[index] / torch.dot(A_i, A_i).view(-1)
A_i = A[:, index].view(1, -1)
else:
A_i = torch.cat((A_i, A[:, index].view(1, -1)), dim=0)
temp = torch.matmul(A_i, torch.transpose(A_i, 0, 1)) + lam * torch.eye(A_i.shape[0], device="cuda")
x_i, _ = torch.lstsq(torch.matmul(A_i, b).view(-1, 1), temp)
resid = b - torch.matmul(torch.transpose(A_i, 0, 1), x_i).view(-1)
if budget > 1:
x_i = nnls(temp.cpu().numpy(), torch.matmul(A_i, b).view(-1).cpu().numpy())[0]
x[indices] = x_i
elif budget == 1:
x[indices[0]] = 1.
return x
def orthogonal_matching_pursuit_np(self, A, b, budget: int, lam: float = 1.):
'''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit
Acknowlegement to:
https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py
Args:
A: design matrix of size (d, n)
b: measurement vector of length d
budget: selection budget
lam: regularization coef. for the final output vector
Returns:
vector of length n
'''
d, n = A.shape
if budget <= 0:
budget = 0
elif budget > n:
budget = n
x = np.zeros(n, dtype=np.float32)
resid = np.copy(b)
indices = []
boolean_mask = np.ones(n, dtype=bool)
all_idx = np.arange(n)
for i in range(budget):
if i % self.args.print_freq == 0:
print("| Selecting [%3d/%3d]" % (i + 1, budget))
projections = A.T.dot(resid)
index = np.argmax(projections[boolean_mask])
index = all_idx[boolean_mask][index]
indices.append(index.item())
boolean_mask[index] = False
if indices.__len__() == 1:
A_i = A[:, index]
x_i = projections[index] / A_i.T.dot(A_i)
else:
A_i = np.vstack([A_i, A[:, index]])
x_i = lstsq(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0]
resid = b - A_i.T.dot(x_i)
if budget > 1:
x_i = nnls(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0]
x[indices] = x_i
elif budget == 1:
x[indices[0]] = 1.
return x
def calc_gradient(self, index=None, val=False):
self.model.eval()
if val:
batch_loader = torch.utils.data.DataLoader(
self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index),
batch_size=self.args.selection_batch, num_workers=self.args.workers)
sample_num = len(self.dst_val.targets) if index is None else len(index)
else:
batch_loader = torch.utils.data.DataLoader(
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
batch_size=self.args.selection_batch, num_workers=self.args.workers)
sample_num = self.n_train if index is None else len(index)
self.embedding_dim = self.model.get_last_layer().in_features
gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)],
requires_grad=False, device=self.args.device)
for i, (input, targets) in enumerate(batch_loader):
self.model_optimizer.zero_grad()
outputs = self.model(input.to(self.args.device)).requires_grad_(True)
loss = self.criterion(outputs, targets.to(self.args.device)).sum()
batch_num = targets.shape[0]
with torch.no_grad():
bias_parameters_grads = torch.autograd.grad(loss, outputs, retain_graph=True)[0].cpu()
weight_parameters_grads = self.model.embedding_recorder.embedding.cpu().view(batch_num, 1,
self.embedding_dim).repeat(1,self.args.num_classes,1) *\
bias_parameters_grads.view(batch_num, self.args.num_classes,
1).repeat(1, 1, self.embedding_dim)
gradients[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] =\
torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1)
return gradients
def finish_run(self):
if isinstance(self.model, MyDataParallel):
self.model = self.model.module
self.model.no_grad = True
with self.model.embedding_recorder:
if self.dst_val is not None:
val_num = len(self.dst_val.targets)
if self.balance:
selection_result = np.array([], dtype=np.int64)
weights = np.array([], dtype=np.float32)
for c in range(self.args.num_classes):
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
cur_gradients = self.calc_gradient(class_index)
if self.dst_val is not None:
# Also calculate gradients of the validation set.
val_class_index = np.arange(val_num)[self.dst_val.targets == c]
cur_val_gradients = torch.mean(self.calc_gradient(val_class_index, val=True), dim=0)
else:
cur_val_gradients = torch.mean(cur_gradients, dim=0)
if self.args.device == "cpu":
# Compute OMP on numpy
cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T,
cur_val_gradients.numpy(),
budget=round(len(class_index) * self.fraction))
else:
cur_weights = self.orthogonal_matching_pursuit(cur_gradients.to(self.args.device).T,
cur_val_gradients.to(self.args.device),
budget=round(len(class_index) * self.fraction))
selection_result = np.append(selection_result, class_index[np.nonzero(cur_weights)[0]])
weights = np.append(weights, cur_weights[np.nonzero(cur_weights)[0]])
else:
cur_gradients = self.calc_gradient()
if self.dst_val is not None:
# Also calculate gradients of the validation set.
cur_val_gradients = torch.mean(self.calc_gradient(val=True), dim=0)
else:
cur_val_gradients = torch.mean(cur_gradients, dim=0)
if self.args.device == "cpu":
# Compute OMP on numpy
cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T,
cur_val_gradients.numpy(),
budget=self.coreset_size)
else:
cur_weights = self.orthogonal_matching_pursuit(cur_gradients.T, cur_val_gradients,
budget=self.coreset_size)
selection_result = np.nonzero(cur_weights)[0]
weights = cur_weights[selection_result]
self.model.no_grad = False
return {"indices": selection_result, "weights": weights}
def select(self, **kwargs):
selection_result = self.run()
return selection_result

108
deepcore/methods/grand.py Normal file
View File

@@ -0,0 +1,108 @@
from .earlytrain import EarlyTrain
import torch, time
import numpy as np
from ..nets.nets_utils import MyDataParallel
from tqdm import tqdm
class GraNd(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, repeat=1,
specific_model=None, balance=False, **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model,**kwargs)
self.epochs = epochs
self.n_train = len(self.dst_train)
self.coreset_size = round(self.n_train * fraction)
self.specific_model = specific_model
self.repeat = repeat
self.balance = balance
# def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
# if batch_idx % self.args.print_freq == 0:
# print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
# epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item()))
def before_run(self):
if isinstance(self.model, MyDataParallel):
self.model = self.model.module
def calc_gradient(self, index=None):
'''
Calculate gradients matrix on current network for specified training dataset.
'''
self.model.eval()
data_loader = self.select_dm(self.dst_train, index, is_train=False)
# Initialize a matrix to save gradients.
# (on cpu)
gradients = []
for i, batch in enumerate(tqdm(data_loader)):
self.optim.zero_grad()
image, label = batch['img'].cuda(), batch['label'].cuda()
bs_size = image.shape[0]
loss, visual_embedding, logit = self.model(image, label, cal_gradient=True)
embed_dim = visual_embedding.shape[-1]
with torch.no_grad():
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
weight_parameters_grads = visual_embedding.view(bs_size, 1,
-1).repeat(1, self.num_classes, 1) * \
bias_parameters_grads.view(bs_size, self.num_classes,
1).repeat(1, 1, embed_dim)
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
dim=1).cpu().numpy())
gradients = np.concatenate(gradients, axis=0, dtype=np.float32)
print('Finish Gradient Calculation')
self.model.train()
return gradients
def finish_run(self):
# self.model.embedding_recorder.record_embedding = True # recording embedding vector
gradients = self.calc_gradient()
self.norm_matrix[:,0] = np.linalg.norm(gradients,axis=1)
# embedding_dim = self.model.get_last_layer().in_features
# data_loader = self.select_dm(self.dst_train, None, is_train=False)
# sample_num = self.n_train
#
# for i, batch in enumerate(data_loader):
# self.optim.zero_grad()
# image, target,batch_inds = batch['img'].cuda(), batch['label'].cuda(), batch['index'].cuda()
#
# outputs = self.model(image)
# loss = self.criterion(outputs.requires_grad_(True),
# targets.to(self.args.device)).sum()
# batch_num = targets.shape[0]
# with torch.no_grad():
# bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
# self.norm_matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num),
# self.cur_repeat] = torch.norm(torch.cat([bias_parameters_grads, (
# self.model.embedding_recorder.embedding.view(batch_num, 1, embedding_dim).repeat(1,
# self.args.num_classes, 1) * bias_parameters_grads.view(
# batch_num, self.args.num_classes, 1).repeat(1, 1, embedding_dim)).
# view(batch_num, -1)], dim=1), dim=1, p=2)
#
# self.model.train()
def select(self, **kwargs):
# Initialize a matrix to save norms of each sample on idependent runs
self.norm_matrix = np.zeros([self.n_train, self.repeat])
# for self.cur_repeat in range(self.repeat):
self.run()
# self.random_seed = self.random_seed + 5
self.norm_mean = np.mean(self.norm_matrix, axis=1)
if not self.balance:
top_examples = self.train_indx[np.argsort(self.norm_mean)][::-1][:self.coreset_size]
else:
top_examples = np.array([], dtype=np.int64)
for c in tqdm(range(self.num_classes)):
c_indx = self.train_indx[self.dst_train_label == c]
budget = round(self.fraction * len(c_indx))
top_examples = np.append(top_examples, c_indx[np.argsort(self.norm_mean[c_indx])[::-1][:budget]])
return {"indices": top_examples, "scores": self.norm_mean}

109
deepcore/methods/herding.py Normal file
View File

@@ -0,0 +1,109 @@
from .earlytrain import EarlyTrain
import torch
import numpy as np
from .methods_utils import euclidean_dist
from ..nets.nets_utils import MyDataParallel
class Herding(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
specific_model="ResNet18", balance: bool = False, metric="euclidean", **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, **kwargs)
if metric == "euclidean":
self.metric = euclidean_dist
elif callable(metric):
self.metric = metric
else:
self.metric = euclidean_dist
self.run = lambda: self.finish_run()
def _construct_matrix(index=None):
data_loader = torch.utils.data.DataLoader(
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
batch_size=self.n_train if index is None else len(index), num_workers=self.args.workers)
inputs, _ = next(iter(data_loader))
return inputs.flatten(1).requires_grad_(False).to(self.args.device)
self.construct_matrix = _construct_matrix
self.balance = balance
self.select_bs = self.args.DATASET.SELECTION_BATCH_SIZE
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
pass
#Initial achievement, may not optimal
def mixing_feature(self,img_fea,text_fea,lam=0.5):
# return img_fea
return lam*img_fea + (1-lam)*text_fea
def construct_matrix(self, index=None):
self.model.eval()
self.model.no_grad = True
with torch.no_grad():
# with self.model.embedding_recorder:
sample_num = self.n_train if index is None else len(index)
matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).cuda()
data_loader = self.select_dm(self.dst_train,index,is_train=False)
for i, batch in enumerate(data_loader):
image,label = batch['img'].cuda(),batch['label'].cuda()
img_f,text_f,_ = self.model(image, label, record=True)
final_embed = self.mixing_feature(img_f,text_f) #Using the mixed image_feature and text_feature
matrix[i * self.select_bs:min((i + 1) * self.select_bs, sample_num)] = final_embed
self.model.no_grad = False
self.model.train()
return matrix
def before_run(self):
self.emb_dim = self.model.image_encoder.output_dim
def herding(self, matrix, budget: int, index=None):
sample_num = matrix.shape[0]
if budget < 0:
raise ValueError("Illegal budget size.")
elif budget > sample_num:
budget = sample_num
indices = np.arange(sample_num)
with torch.no_grad():
mu = torch.mean(matrix, dim=0)
select_result = np.zeros(sample_num, dtype=bool)
for i in range(budget):
if i % self.args.TRAIN.PRINT_FREQ == 0:
print("| Selecting [%3d/%3d]" % (i + 1, budget))
dist = self.metric(((i + 1) * mu - torch.sum(matrix[select_result], dim=0)).view(1, -1),
matrix[~select_result])
p = torch.argmax(dist).item()
p = indices[~select_result][p]
select_result[p] = True
if index is None:
index = indices
return index[select_result]
def finish_run(self):
if isinstance(self.model, MyDataParallel):
self.model = self.model.module
if self.balance:
selection_result = np.array([], dtype=np.int32)
for c in range(self.num_classes):
class_index = np.arange(self.n_train)[self.dst_train_label == c]
selection_result = np.append(selection_result, self.herding(self.construct_matrix(class_index),
budget=round(self.fraction * len(class_index)), index=class_index))
else:
selection_result = self.herding(self.construct_matrix(), budget=self.coreset_size)
return {"indices": selection_result}
def select(self, **kwargs):
selection_result = self.run()
return selection_result

View File

@@ -0,0 +1,182 @@
from .earlytrain import EarlyTrain
import torch
import numpy as np
from .methods_utils import euclidean_dist
from ..nets.nets_utils import MyDataParallel
def k_center_greedy(matrix, budget: int, metric, device, random_seed=None, index=None, already_selected=None,
print_freq: int = 20):
if type(matrix) == torch.Tensor:
assert matrix.dim() == 2
elif type(matrix) == np.ndarray:
assert matrix.ndim == 2
matrix = torch.from_numpy(matrix).requires_grad_(False).to(device)
sample_num = matrix.shape[0]
assert sample_num >= 1
if budget < 0:
raise ValueError("Illegal budget size.")
elif budget > sample_num:
budget = sample_num
if index is not None:
assert matrix.shape[0] == len(index)
else:
index = np.arange(sample_num)
assert callable(metric)
already_selected = np.array(already_selected)
with torch.no_grad():
np.random.seed(random_seed)
if already_selected.__len__() == 0:
select_result = np.zeros(sample_num, dtype=bool)
# Randomly select one initial point.
already_selected = [np.random.randint(0, sample_num)]
budget -= 1
select_result[already_selected] = True
else:
select_result = np.in1d(index, already_selected)
num_of_already_selected = np.sum(select_result)
# Initialize a (num_of_already_selected+budget-1)*sample_num matrix storing distances of pool points from
# each clustering center.
dis_matrix = -1 * torch.ones([num_of_already_selected + budget - 1, sample_num], requires_grad=False).to(device)
dis_matrix[:num_of_already_selected, ~select_result] = metric(matrix[select_result], matrix[~select_result])
mins = torch.min(dis_matrix[:num_of_already_selected, :], dim=0).values
for i in range(budget):
if i % print_freq == 0:
print("| Selecting [%3d/%3d]" % (i + 1, budget))
p = torch.argmax(mins).item()
select_result[p] = True
if i == budget - 1:
break
mins[p] = -1
dis_matrix[num_of_already_selected + i, ~select_result] = metric(matrix[[p]], matrix[~select_result])
mins = torch.min(mins, dis_matrix[num_of_already_selected + i])
return index[select_result]
class kCenterGreedy(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=0,
specific_model="ResNet18", balance: bool = False, already_selected=[], metric="euclidean",
torchvision_pretrain: bool = True, **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model,
torchvision_pretrain=torchvision_pretrain, **kwargs)
if already_selected.__len__() != 0:
if min(already_selected) < 0 or max(already_selected) >= self.n_train:
raise ValueError("List of already selected points out of the boundary.")
self.already_selected = np.array(already_selected)
self.min_distances = None
if metric == "euclidean":
self.metric = euclidean_dist
elif callable(metric):
self.metric = metric
else:
self.metric = euclidean_dist
self.run = lambda : self.finish_run()
def _construct_matrix(index=None):
data_loader = torch.utils.data.DataLoader(
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
batch_size=self.n_train if index is None else len(index),
num_workers=self.args.workers)
inputs, _ = next(iter(data_loader))
return inputs.flatten(1).requires_grad_(False).to(self.args.device)
self.construct_matrix = _construct_matrix
self.balance = balance
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
if batch_idx % self.args.print_freq == 0:
print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
def old_construct_matrix(self, index=None):
self.model.eval()
self.model.no_grad = True
with torch.no_grad():
with self.model.embedding_recorder:
sample_num = self.n_train if index is None else len(index)
matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).to(self.args.device)
data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
torch.utils.data.Subset(self.dst_train, index),
batch_size=self.args.selection_batch,
num_workers=self.args.workers)
for i, (inputs, _) in enumerate(data_loader):
self.model(inputs.to(self.args.device))
matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch,
sample_num)] = self.model.embedding_recorder.embedding
self.model.no_grad = False
return matrix
def construct_matrix(self, index=None):
self.model.eval()
self.model.no_grad = True
with torch.no_grad():
with self.model.embedding_recorder:
sample_num = self.n_train if index is None else len(index)
matrix = []
data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
torch.utils.data.Subset(self.dst_train, index),
batch_size=self.args.selection_batch,
num_workers=self.args.workers)
for i, (inputs, _) in enumerate(data_loader):
self.model(inputs.to(self.args.device))
matrix.append(self.model.embedding_recorder.embedding)
self.model.no_grad = False
return torch.cat(matrix, dim=0)
def before_run(self):
self.emb_dim = self.model.get_last_layer().in_features
def finish_run(self):
if isinstance(self.model, MyDataParallel):
self.model = self.model.module
def select(self, **kwargs):
self.run()
if self.balance:
selection_result = np.array([], dtype=np.int32)
for c in range(self.args.num_classes):
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
selection_result = np.append(selection_result, k_center_greedy(self.construct_matrix(class_index),
budget=round(
self.fraction * len(class_index)),
metric=self.metric,
device=self.args.device,
random_seed=self.random_seed,
index=class_index,
already_selected=self.already_selected[
np.in1d(self.already_selected,
class_index)],
print_freq=self.args.print_freq))
else:
matrix = self.construct_matrix()
del self.model_optimizer
del self.model
selection_result = k_center_greedy(matrix, budget=self.coreset_size,
metric=self.metric, device=self.args.device,
random_seed=self.random_seed,
already_selected=self.already_selected, print_freq=self.args.print_freq)
return {"indices": selection_result}

View File

@@ -0,0 +1,4 @@
from .euclidean import *
from .cossim import *
from .submodular_function import *
from .submodular_optimizer import *

View File

@@ -0,0 +1,35 @@
import numpy as np
import torch
def cossim_np(v1, v2):
# return cossim(torch.tensor(v1),torch.tensor(v2)).cpu().numpy()
num = np.dot(v1, v2.T)
denom = np.linalg.norm(v1, axis=1).reshape(-1, 1) * np.linalg.norm(v2, axis=1)
res = num / (denom + 1e-6)
res[np.isneginf(res)] = 0.
return 0.5 + 0.5 * res
def cossim_pair_np(v1):
num = np.dot(v1, v1.T)
norm = np.linalg.norm(v1, axis=1)
denom = norm.reshape(-1, 1) * norm
res = num / (denom + 1e-6)
res[np.isneginf(res)] = 0.
return 0.5 + 0.5 * res
def cossim(v1, v2):
num = torch.matmul(v1, v2.T)
denom = torch.norm(v1, dim=1).view(-1, 1) * torch.norm(v2, dim=1)
res = num / (denom + 1e-6)
res[torch.isneginf(res)] = 0.
return 0.5 + 0.5 * res
def cossim_pair(v1):
num = torch.matmul(v1, v1.T)
norm = torch.norm(v1, dim=1)
denom = norm.view(-1, 1) * norm
res = num / (denom + 1e-6)
res[torch.isneginf(res)] = 0.
return 0.5 + 0.5 * res

View File

@@ -0,0 +1,36 @@
import torch
import numpy as np
def euclidean_dist(x, y):
m, n = x.size(0), y.size(0)
xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
dist = xx + yy
dist.addmm_(1, -2, x, y.t())
dist = dist.clamp(min=1e-12).sqrt()
return dist
def euclidean_dist_pair(x):
m = x.size(0)
xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, m)
dist = xx + xx.t()
dist.addmm_(1, -2, x, x.t())
dist = dist.clamp(min=1e-12).sqrt()
return dist
def euclidean_dist_np(x, y):
(rowx, colx) = x.shape
(rowy, coly) = y.shape
xy = np.dot(x, y.T)
x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowy, axis=1)
y2 = np.repeat(np.reshape(np.sum(np.multiply(y, y), axis=1), (rowy, 1)), repeats=rowx, axis=1).T
return np.sqrt(np.clip(x2 + y2 - 2. * xy, 1e-12, None))
#calculate the euclidean distance of each sample in x, return a N*N matrix, whose diag is zero
def euclidean_dist_pair_np(x):
(rowx, colx) = x.shape
xy = np.dot(x, x.T)
x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowx, axis=1)
return np.sqrt(np.clip(x2 + x2.T - 2. * xy, 1e-12, None))

View File

@@ -0,0 +1,144 @@
import numpy as np
class SubmodularFunction(object):
def __init__(self, index, similarity_kernel=None, similarity_matrix=None, already_selected=[]):
self.index = index
self.n = len(index)
self.already_selected = already_selected
assert similarity_kernel is not None or similarity_matrix is not None
# For the sample similarity matrix, the method supports two input modes, one is to input a pairwise similarity
# matrix for the whole sample, and the other case allows the input of a similarity kernel to be used to
# calculate similarities incrementally at a later time if required.
if similarity_kernel is not None:
assert callable(similarity_kernel)
self.similarity_kernel = self._similarity_kernel(similarity_kernel)
else:
assert similarity_matrix.shape[0] == self.n and similarity_matrix.shape[1] == self.n
self.similarity_matrix = similarity_matrix
self.similarity_kernel = lambda a, b: self.similarity_matrix[np.ix_(a, b)]
def _similarity_kernel(self, similarity_kernel):
return similarity_kernel
class FacilityLocation(SubmodularFunction):
def __init__(self, **kwargs):
super().__init__(**kwargs)
if self.already_selected.__len__()==0:
self.cur_max = np.zeros(self.n, dtype=np.float32)
else:
self.cur_max = np.max(self.similarity_kernel(np.arange(self.n), self.already_selected), axis=1)
self.all_idx = np.ones(self.n, dtype=bool)
def _similarity_kernel(self, similarity_kernel):
# Initialize a matrix to store similarity values of sample points.
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
def _func(a, b):
if not np.all(self.if_columns_calculated[b]):
if b.dtype != bool:
temp = ~self.all_idx
temp[b] = True
b = temp
not_calculated = b & ~self.if_columns_calculated
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
self.if_columns_calculated[not_calculated] = True
return self.sim_matrix[np.ix_(a, b)]
return _func
def calc_gain(self, idx_gain, selected, **kwargs):
gains = np.maximum(0., self.similarity_kernel(self.all_idx, idx_gain) - self.cur_max.reshape(-1, 1)).sum(axis=0)
return gains
def calc_gain_batch(self, idx_gain, selected, **kwargs):
batch_idx = ~self.all_idx
batch_idx[0:kwargs["batch"]] = True
gains = np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1, 1)).sum(axis=0)
for i in range(kwargs["batch"], self.n, kwargs["batch"]):
batch_idx = ~self.all_idx
batch_idx[i * kwargs["batch"]:(i + 1) * kwargs["batch"]] = True
gains += np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1,1)).sum(axis=0)
return gains
def update_state(self, new_selection, total_selected, **kwargs):
self.cur_max = np.maximum(self.cur_max, np.max(self.similarity_kernel(self.all_idx, new_selection), axis=1))
#self.cur_max = np.max(np.append(self.cur_max.reshape(-1, 1), self.similarity_kernel(self.all_idx, new_selection), axis=1), axis=1)
class GraphCut(SubmodularFunction):
def __init__(self, lam: float = 1., **kwargs):
super().__init__(**kwargs)
self.lam = lam
if 'similarity_matrix' in kwargs:
self.sim_matrix_cols_sum = np.sum(self.similarity_matrix, axis=0)
self.all_idx = np.ones(self.n, dtype=bool)
def _similarity_kernel(self, similarity_kernel):
# Initialize a matrix to store similarity values of sample points.
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
self.sim_matrix_cols_sum = np.zeros(self.n, dtype=np.float32)
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
def _func(a, b):
if not np.all(self.if_columns_calculated[b]):
if b.dtype != bool:
temp = ~self.all_idx
temp[b] = True
b = temp
not_calculated = b & ~self.if_columns_calculated
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
self.sim_matrix_cols_sum[not_calculated] = np.sum(self.sim_matrix[:, not_calculated], axis=0)
self.if_columns_calculated[not_calculated] = True
return self.sim_matrix[np.ix_(a, b)]
return _func
def calc_gain(self, idx_gain, selected, **kwargs):
# Conditional gain
# return the sum distance of each unselected sample to the any other one (selected, idx_gain) is for fun. _func()
gain = -2. * np.sum(self.similarity_kernel(selected, idx_gain), axis=0) + self.lam * self.sim_matrix_cols_sum[idx_gain]
return gain
def update_state(self, new_selection, total_selected, **kwargs):
pass
class LogDeterminant(SubmodularFunction):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.all_idx = np.ones(self.n, dtype=bool)
def _similarity_kernel(self, similarity_kernel):
# Initialize a matrix to store similarity values of sample points.
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
def _func(a, b):
if not np.all(self.if_columns_calculated[b]):
if b.dtype != bool:
temp = ~self.all_idx
temp[b] = True
b = temp
not_calculated = b & ~self.if_columns_calculated
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
self.if_columns_calculated[not_calculated] = True
return self.sim_matrix[np.ix_(a, b)]
return _func
def calc_gain(self, idx_gain, selected, **kwargs):
# Gain for LogDeterminant can be written as $f(x | A ) = \log\det(S_{a} - S_{a,A}S_{A}^{-1}S_{x,A}^T)$.
sim_idx_gain = self.similarity_kernel(selected, idx_gain).T
sim_selected = self.similarity_kernel(selected, selected)
return (np.dot(sim_idx_gain, np.linalg.pinv(sim_selected)) * sim_idx_gain).sum(-1)
def update_state(self, new_selection, total_selected, **kwargs):
pass

View File

@@ -0,0 +1,155 @@
import numpy as np
from tqdm import tqdm
optimizer_choices = ["NaiveGreedy", "LazyGreedy", "StochasticGreedy", "ApproximateLazyGreedy"]
class optimizer(object):
def __init__(self, args, index, budget:int, already_selected=[]):
self.args = args
self.index = index
if budget <= 0 or budget > index.__len__():
raise ValueError("Illegal budget for optimizer.")
self.n = len(index)
self.budget = budget
self.already_selected = already_selected
class NaiveGreedy(optimizer):
def __init__(self, args, index, budget:int, already_selected=[]):
super(NaiveGreedy, self).__init__(args, index, budget, already_selected)
def select(self, gain_function, update_state=None, **kwargs):
assert callable(gain_function)
if update_state is not None:
assert callable(update_state)
selected = np.zeros(self.n, dtype=bool)
selected[self.already_selected] = True
greedy_gain = np.zeros(len(self.index))
for i in range(sum(selected), self.budget):
if i % self.args.TRAIN.PRINT_FREQ == 0:
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
current_selection = greedy_gain.argmax()
selected[current_selection] = True
greedy_gain[current_selection] = -np.inf
if update_state is not None:
update_state(np.array([current_selection]), selected, **kwargs)
return self.index[selected]
class LazyGreedy(optimizer):
def __init__(self, args, index, budget:int, already_selected=[]):
super(LazyGreedy, self).__init__(args, index, budget, already_selected)
def select(self, gain_function, update_state=None, **kwargs):
assert callable(gain_function)
if update_state is not None:
assert callable(update_state)
selected = np.zeros(self.n, dtype=bool)
selected[self.already_selected] = True
greedy_gain = np.zeros(len(self.index))
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
greedy_gain[selected] = -np.inf
for i in tqdm(range(sum(selected), self.budget)):
if i % self.args.TRAIN.PRINT_FREQ == 0:
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
best_gain = -np.inf
last_max_element = -1
while True:
cur_max_element = greedy_gain.argmax()
if last_max_element == cur_max_element:
# Select cur_max_element into the current subset
selected[cur_max_element] = True
greedy_gain[cur_max_element] = -np.inf
if update_state is not None:
update_state(np.array([cur_max_element]), selected, **kwargs)
break
new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
greedy_gain[cur_max_element] = new_gain
if new_gain >= best_gain:
best_gain = new_gain
last_max_element = cur_max_element
return self.index[selected]
class StochasticGreedy(optimizer):
def __init__(self, args, index, budget:int, already_selected=[], epsilon: float=0.9):
super(StochasticGreedy, self).__init__(args, index, budget, already_selected)
self.epsilon = epsilon
def select(self, gain_function, update_state=None, **kwargs):
assert callable(gain_function)
if update_state is not None:
assert callable(update_state)
selected = np.zeros(self.n, dtype=bool)
selected[self.already_selected] = True
sample_size = max(round(-np.log(self.epsilon) * self.n / self.budget), 1)
greedy_gain = np.zeros(len(self.index))
all_idx = np.arange(self.n)
for i in range(sum(selected), self.budget):
if i % self.args.TRAIN.PRINT_FREQ == 0:
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
# Uniformly select a subset from unselected samples with size sample_size
subset = np.random.choice(all_idx[~selected], replace=False, size=min(sample_size, self.n - i))
if subset.__len__() == 0:
break
greedy_gain[subset] = gain_function(subset, selected, **kwargs)
current_selection = greedy_gain[subset].argmax()
selected[subset[current_selection]] = True
greedy_gain[subset[current_selection]] = -np.inf
if update_state is not None:
update_state(np.array([subset[current_selection]]), selected, **kwargs)
return self.index[selected]
class ApproximateLazyGreedy(optimizer):
def __init__(self, args, index, budget:int, already_selected=[], beta: float=0.9):
super(ApproximateLazyGreedy, self).__init__(args, index, budget, already_selected)
self.beta = beta
def select(self, gain_function, update_state=None, **kwargs):
assert callable(gain_function)
if update_state is not None:
assert callable(update_state)
selected = np.zeros(self.n, dtype=bool)
selected[self.already_selected] = True
greedy_gain = np.zeros(len(self.index))
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
greedy_gain[selected] = -np.inf
for i in range(sum(selected), self.budget):
if i % self.args.TRAIN.PRINT_FREQ == 0:
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
while True:
cur_max_element = greedy_gain.argmax()
max_gain = greedy_gain[cur_max_element]
new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
if new_gain >= self.beta * max_gain:
# Select cur_max_element into the current subset
selected[cur_max_element] = True
greedy_gain[cur_max_element] = -np.inf
if update_state is not None:
update_state(np.array([cur_max_element]), selected, **kwargs)
break
else:
greedy_gain[cur_max_element] = new_gain
return self.index[selected]

View File

@@ -0,0 +1,116 @@
from .earlytrain import EarlyTrain
import numpy as np
import torch
from .methods_utils import cossim_np, submodular_function, submodular_optimizer
from ..nets.nets_utils import MyDataParallel
class Submodular(EarlyTrain):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True,
function="GraphCut", greedy="LazyGreedy", metric="cossim", **kwargs):
super(Submodular, self).__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
if greedy not in submodular_optimizer.optimizer_choices:
raise ModuleNotFoundError("Greedy optimizer not found.")
print(f"The Submodular Method is {function}")
self._greedy = greedy
self._metric = metric
self._function = function
self.balance = balance
def before_train(self):
pass
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
pass
def before_epoch(self):
pass
def after_epoch(self):
pass
def before_run(self):
pass
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
def calc_gradient(self, index=None):
'''
Calculate gradients matrix on current network for specified training dataset.
'''
self.model.eval()
data_loader = self.select_dm(self.dst_train, index, is_train=False)
# Initialize a matrix to save gradients.
# (on cpu)
gradients = []
for i, batch in enumerate(data_loader):
self.optim.zero_grad()
image, label = batch['img'].cuda(), batch['label'].cuda()
bs_size = image.shape[0]
loss,visual_embedding,logit = self.model(image,label,cal_gradient=True)
embed_dim = visual_embedding.shape[-1]
with torch.no_grad():
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
weight_parameters_grads = visual_embedding.view(bs_size, 1,
-1).repeat(1, self.num_classes, 1) *\
bias_parameters_grads.view(bs_size, self.num_classes,
1).repeat(1, 1, embed_dim)
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
dim=1).cpu().numpy())
gradients = np.concatenate(gradients, axis=0,dtype=np.float32)
print('Finish Gradient Calculation')
return gradients
def finish_run(self):
if isinstance(self.model, MyDataParallel):
self.model = self.model.module
# Turn on the embedding recorder and the no_grad flag
self.model.no_grad = True
self.train_indx = np.arange(self.n_train)
gradients = self.calc_gradient(index=None)
if self.balance:
selection_result = np.array([], dtype=np.int64)
for c in range(self.num_classes):
print(f'class {c}')
c_indx = self.train_indx[self.dst_train_label == c]
# Calculate gradients into a matrix
c_gradients = gradients[c_indx]
# Instantiate a submodular function
submod_function = submodular_function.__dict__[self._function](index=c_indx,
similarity_kernel=lambda a, b:cossim_np(c_gradients[a], c_gradients[b]))
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args,
index=c_indx, budget=round(self.fraction * len(c_indx)), already_selected=[])
c_selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
update_state=submod_function.update_state)
selection_result = np.append(selection_result, c_selection_result)
else:
# Calculate gradients into a matrix
gradients = self.calc_gradient()
# Instantiate a submodular function
submod_function = submodular_function.__dict__[self._function](index=self.train_indx,
similarity_kernel=lambda a, b: cossim_np(gradients[a], gradients[b]))
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=self.train_indx,
budget=self.coreset_size)
selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
update_state=submod_function.update_state)
self.model.no_grad = False
return {"indices": selection_result}
def select(self, **kwargs):
selection_result = self.run()
return selection_result

View File

@@ -0,0 +1,120 @@
from .earlytrain import EarlyTrain
import torch
import numpy as np
from datasets.data_manager import select_dm_loader
import time
class Uncertainty(EarlyTrain):
def __init__(self, dst_train, args,fraction=0.5, random_seed=None, epochs=200, selection_method="Margin",
specific_model=None, balance=False, **kwargs):
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
selection_choices = ["LeastConfidence",
"Entropy",
"Margin"]
if selection_method not in selection_choices:
raise NotImplementedError("Selection algorithm unavailable.")
self.selection_method = selection_method
self.epochs = epochs
self.balance = balance
def before_train(self):
pass
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
pass
def after_epoch(self):
pass
def before_run(self):
pass
def num_classes_mismatch(self):
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
pass
def finish_run(self):
if self.balance:
selection_result = np.array([], dtype=np.int64)
scores = []
for c in range(self.num_classes):
print(f"Balance Processing on the train set class {c}")
class_index = np.arange(self.n_train)[self.dst_train_label == c]
scores.append(self.rank_uncertainty_clip(class_index))
selection_result = np.append(selection_result, class_index[np.argsort(scores[-1])[
:round(len(class_index) * self.fraction)]])
else:
print(f"Imbalance Processing on the train set class")
scores = self.rank_uncertainty_clip()
selection_result = np.argsort(scores)[::-1][:self.coreset_size]
return {"indices": selection_result, "scores": scores}
def rank_uncertainty(self,index=None):
self.specific_model.eval()
with torch.no_grad():
train_loader = torch.utils.data.DataLoader(
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
batch_size=self.args.selection_batch,
num_workers=self.args.workers)
scores = np.array([])
batch_num = len(train_loader)
for i, (input, _) in enumerate(train_loader):
if i % self.args.print_freq == 0:
print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num))
if self.selection_method == "LeastConfidence":
scores = np.append(scores, self.model(input.to(self.args.device)).max(axis=1).values.cpu().numpy())
elif self.selection_method == "Entropy":
preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1).cpu().numpy()
scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1))
elif self.selection_method == 'Margin':
preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1)
preds_argmax = torch.argmax(preds, dim=1)
max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone()
preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0
preds_sub_argmax = torch.argmax(preds, dim=1)
scores = np.append(scores, (max_preds - preds[
torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy())
return scores
def rank_uncertainty_clip(self,index=None):
self.model.eval()
with torch.no_grad():
train_loader = select_dm_loader(self.args,self.dst_train,index)
scores = np.array([])
for i, batch in enumerate(train_loader):
# if i % self.args.print_freq == 0:
# print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num))
image, label = batch['img'].cuda(), batch['label'].cuda()
logits = self.model(image,label) ##Eval mode
if self.selection_method == "LeastConfidence":
scores = np.append(scores, logits.max(axis=1).values.cpu().numpy())
elif self.selection_method == "Entropy":
preds = torch.softmax(logits, dim=1).cpu().numpy()
scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1))
elif self.selection_method == 'Margin':
preds = torch.softmax(logits, dim=1)
preds_argmax = torch.argmax(preds, dim=1)
max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone()
preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0
preds_sub_argmax = torch.argmax(preds, dim=1)
scores = np.append(scores, (max_preds - preds[torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy())
self.model.train()
return scores
def select(self, **kwargs):
selection_result = self.run()
return selection_result
def select_without_train(self):
selection_result = self.finish_run()
return selection_result

View File

@@ -0,0 +1,34 @@
import numpy as np
from .coresetmethod import CoresetMethod
class Uniform(CoresetMethod):
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, balance=True, replace=False, **kwargs):
super().__init__(dst_train, args, fraction, random_seed)
self.balance = balance
self.replace = replace
self.n_train = len(self.dst_train)
def select_balance(self):
"""The same sampling proportions were used in each class separately."""
np.random.seed(self.random_seed)
self.index = np.array([], dtype=np.int64)
all_index = np.arange(self.n_train)
for c in range(self.num_classes):
c_index = (self.dst_train_label == c)
self.index = np.append(self.index,
np.random.choice(all_index[c_index], round(self.fraction * c_index.sum().item()),
replace=self.replace))
return self.index
def select_no_balance(self):
np.random.seed(self.random_seed)
self.index = np.random.choice(np.arange(self.n_train), round(self.n_train * self.fraction),
replace=self.replace)
return self.index
def select(self, **kwargs):
return {"indices": self.select_balance() if self.balance else self.select_no_balance()}