Upload to Main
This commit is contained in:
@@ -0,0 +1,17 @@
|
||||
from .cal import *
|
||||
from .contextualdiversity import *
|
||||
from .coresetmethod import *
|
||||
from .craig import *
|
||||
from .deepfool import *
|
||||
from .earlytrain import *
|
||||
from .forgetting import *
|
||||
from .full import *
|
||||
from .glister import *
|
||||
from .grand import *
|
||||
from .gradmatch import *
|
||||
from .herding import *
|
||||
from .kcentergreedy import *
|
||||
from .submodular import *
|
||||
from .uncertainty import *
|
||||
from .uniform import *
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,146 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
from .methods_utils.euclidean import euclidean_dist_pair_np
|
||||
from .methods_utils.cossim import cossim_pair_np
|
||||
import numpy as np
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
from .. import nets
|
||||
from copy import deepcopy
|
||||
from torchvision import transforms
|
||||
|
||||
|
||||
class Cal(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
|
||||
balance=False, metric="euclidean", neighbors: int = 10, pretrain_model: str = "ResNet18", **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
|
||||
|
||||
self.balance = balance
|
||||
|
||||
assert neighbors > 0 and neighbors < 100
|
||||
self.neighbors = neighbors
|
||||
|
||||
if metric == "euclidean":
|
||||
self.metric = euclidean_dist_pair_np
|
||||
elif metric == "cossim":
|
||||
self.metric = lambda a, b: -1. * cossim_pair_np(a, b)
|
||||
elif callable(metric):
|
||||
self.metric = metric
|
||||
else:
|
||||
self.metric = euclidean_dist_pair_np
|
||||
|
||||
self.pretrain_model = pretrain_model
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
#Initial achievement, may not optimal
|
||||
def mixing_feature(self,img_fea,text_fea,lam=0.5):
|
||||
# return img_fea
|
||||
return lam*img_fea + (1-lam)*text_fea
|
||||
|
||||
def find_knn(self):
|
||||
"""
|
||||
Find k-nearest-neighbor data points with the pretrained embedding model
|
||||
:return: knn matrix
|
||||
"""
|
||||
|
||||
# Initialize pretrained model
|
||||
# model = nets.__dict__[self.pretrain_model](channel=self.args.channel, num_classes=self.args.num_classes,
|
||||
# im_size=(224, 224), record_embedding=True, no_grad=True,
|
||||
# pretrained=True).to(self.args.device)
|
||||
self.model.eval()
|
||||
probs = []
|
||||
# # Resize dst_train to 224*224
|
||||
# if self.args.im_size[0] != 224 or self.args.im_size[1] != 224:
|
||||
# dst_train = deepcopy(self.dst_train)
|
||||
# dst_train.transform = transforms.Compose([dst_train.transform, transforms.Resize(224)])
|
||||
# else:
|
||||
# dst_train = self.dst_train
|
||||
|
||||
# Calculate the distance matrix and return knn results
|
||||
if self.balance:
|
||||
knn = []
|
||||
for c in tqdm(range(self.num_classes)):
|
||||
print(f'Start processing class {c}/{self.num_classes}')
|
||||
class_index = np.arange(self.n_train)[self.dst_train_label == c]
|
||||
|
||||
# Start recording embedding vectors
|
||||
# batch_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(dst_train, class_index),
|
||||
# batch_size=self.args.selection_batch,
|
||||
# num_workers=self.args.workers)
|
||||
embdeddings = []
|
||||
c_probs = np.zeros([len(class_index), self.num_classes])
|
||||
data_loader = self.select_dm(self.dst_train, class_index, is_train=False)
|
||||
for i, batch in enumerate(data_loader):
|
||||
image, label = batch['img'].cuda(), batch['label'].cuda()
|
||||
img_f, text_f,logit = self.model(image, label, record=True)
|
||||
final_feature = self.mixing_feature(img_f,text_f)
|
||||
embdeddings.append(final_feature.cpu().numpy())
|
||||
c_probs[i * self.args.DATASET.SELECTION_BATCH_SIZE:(i + 1) * self.args.DATASET.SELECTION_BATCH_SIZE] = \
|
||||
torch.softmax(logit, dim=1).detach().cpu()
|
||||
|
||||
embdeddings = np.concatenate(embdeddings, axis=0)
|
||||
probs.append(c_probs)
|
||||
knn.append(np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)])
|
||||
self.probs = np.concatenate(probs,axis=0)
|
||||
return knn
|
||||
else:
|
||||
# Start recording embedding vectors
|
||||
embdeddings = []
|
||||
batch_loader = self.select_dm(self.dst_train, None, is_train=False)
|
||||
print(f'Start processing all class')
|
||||
for i, batch in enumerate(tqdm(batch_loader)):
|
||||
image, label = batch['img'].cuda(), batch['label'].cuda()
|
||||
img_f, text_f,logit = self.model(image, label, record=True)
|
||||
final_feature = self.mixing_feature(img_f, text_f)
|
||||
embdeddings.append(final_feature.cpu().numpy())
|
||||
probs[i * self.args.DATASET.SELECTION_BATCH_SIZE:(i + 1) * self.args.DATASET.SELECTION_BATCH_SIZE] = \
|
||||
torch.softmax(logit, dim=1).detach().cpu()
|
||||
embdeddings = np.concatenate(embdeddings, axis=0)
|
||||
self.probs = np.concatenate(probs, axis=0)
|
||||
return np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)]
|
||||
|
||||
def calc_kl(self, knn, index=None):
|
||||
self.model.eval()
|
||||
self.model.no_grad = True
|
||||
sample_num = self.n_train if index is None else len(index)
|
||||
# probs = np.zeros([sample_num, self.num_classes])
|
||||
#
|
||||
# batch_loader = torch.utils.data.DataLoader(
|
||||
# self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
|
||||
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
|
||||
# batch_num = len(batch_loader)
|
||||
#
|
||||
# for i, (inputs, _) in enumerate(batch_loader):
|
||||
# probs[i * self.args.selection_batch:(i + 1) * self.args.selection_batch] = torch.nn.functional.softmax(
|
||||
# self.model(inputs.to(self.args.device)), dim=1).detach().cpu()
|
||||
probs = self.probs[index]
|
||||
s = np.zeros(sample_num)
|
||||
for i in range(0, sample_num, self.args.DATASET.SELECTION_BATCH_SIZE):
|
||||
|
||||
print("| Caculating KL-divergence for batch [%3d/%3d] with batchsize [%3d]" % (i, sample_num, self.args.DATASET.SELECTION_BATCH_SIZE))
|
||||
aa = np.expand_dims(probs[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)], 1).repeat(self.neighbors, 1)
|
||||
bb = probs[knn[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)], :]
|
||||
s[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)] = np.mean(
|
||||
np.sum(0.5 * aa * np.log(aa / bb) + 0.5 * bb * np.log(bb / aa), axis=2), axis=1)
|
||||
self.model.no_grad = False
|
||||
return s
|
||||
|
||||
def finish_run(self):
|
||||
scores=[]
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int32)
|
||||
for c, knn in zip(range(self.num_classes), self.knn):
|
||||
class_index = np.arange(self.n_train)[self.dst_train_label == c]
|
||||
scores.append(self.calc_kl(knn, class_index))
|
||||
selection_result = np.append(selection_result, class_index[np.argsort(
|
||||
#self.calc_kl(knn, class_index))[::1][:round(self.fraction * len(class_index))]])
|
||||
scores[-1])[::1][:round(self.fraction * len(class_index))]])
|
||||
else:
|
||||
selection_result = np.argsort(self.calc_kl(self.knn))[::1][:self.coreset_size]
|
||||
return {"indices": selection_result, "scores":scores}
|
||||
|
||||
def select(self, **kwargs):
|
||||
self.knn = self.find_knn()
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
@@ -0,0 +1,33 @@
|
||||
from .kcentergreedy import kCenterGreedy
|
||||
import torch
|
||||
|
||||
|
||||
# Acknowlegement to:
|
||||
# https://github.com/sharat29ag/CDAL
|
||||
|
||||
|
||||
class ContextualDiversity(kCenterGreedy):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
|
||||
specific_model=None, balance=True, already_selected=[], torchvision_pretrain: bool = False, **kwargs):
|
||||
super(ContextualDiversity, self).__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, balance=balance, already_selected=already_selected, torchvision_pretrain=torchvision_pretrain, **kwargs)
|
||||
self.metric = self._metric
|
||||
|
||||
def _metric(self, a_output, b_output):
|
||||
with torch.no_grad():
|
||||
# Overload self.metric function for kCenterGreedy Algorithm
|
||||
aa = a_output.view(a_output.shape[0], 1, a_output.shape[1]).repeat(1, b_output.shape[0], 1)
|
||||
bb = b_output.view(1, b_output.shape[0], b_output.shape[1]).repeat(a_output.shape[0], 1, 1)
|
||||
return torch.sum(0.5 * aa * torch.log(aa / bb) + 0.5 * bb * torch.log(bb / aa), dim=2)
|
||||
|
||||
def construct_matrix(self, index=None):
|
||||
self.model.eval()
|
||||
self.model.no_grad = True
|
||||
sample_num = self.n_train if index is None else len(index)
|
||||
matrix = torch.zeros([sample_num, self.args.num_classes], requires_grad=False).to(self.args.device)
|
||||
batch_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
|
||||
torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch
|
||||
,num_workers=self.args.workers)
|
||||
for i, (inputs, _) in enumerate(batch_loader):
|
||||
matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = torch.nn.functional.softmax(self.model(inputs.to(self.args.device)), dim=1)
|
||||
self.model.no_grad = False
|
||||
return matrix
|
||||
@@ -0,0 +1,49 @@
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
class CoresetMethod(object):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None,**kwargs):
|
||||
if fraction <= 0.0 or fraction > 1.0:
|
||||
raise ValueError("Illegal Coreset Size.")
|
||||
|
||||
self.dm = dst_train
|
||||
self.dst_train = dst_train.dataset.train_x
|
||||
self.num_classes = dst_train.dataset.num_classes
|
||||
self.fraction = fraction
|
||||
self.random_seed = random_seed
|
||||
self.index = []
|
||||
self.args = args
|
||||
self.dst_train_label = self.get_train_label(self.dst_train)
|
||||
self.n_train = len(self.dst_train)
|
||||
self.coreset_size = round(self.n_train * fraction)
|
||||
self.max_epoch = self.args.OPTIM_SELECTION.MAX_EPOCH
|
||||
|
||||
def select(self, **kwargs):
|
||||
return
|
||||
|
||||
def get_train_label(self,dst_train):
|
||||
####Readable
|
||||
ind = []
|
||||
for i,item in enumerate(dst_train):
|
||||
ind.append(item.label)
|
||||
return np.asarray(ind)
|
||||
def pre_run(self):
|
||||
self.train_indx = np.arange(self.n_train)
|
||||
print(f'Start pre-funing CLIP with all datasets by {self.max_epoch} epoch')
|
||||
file_save_name = self.args.DATASET.NAME + '_' + str(self.args.SEED) + '.pth'
|
||||
output_checkpoint_dir = os.path.join('checkpoints', file_save_name)
|
||||
if self.max_epoch > 0:
|
||||
|
||||
if os.path.exists(output_checkpoint_dir):
|
||||
print(f'The checkpiont exists! Load that shit')
|
||||
ckpt = torch.load(output_checkpoint_dir)
|
||||
self.model.load_state_dict(ckpt)
|
||||
else:
|
||||
for epoch in range(self.epoch, self.max_epoch):
|
||||
# list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train),
|
||||
# self.n_pretrain_size, replace=False)
|
||||
self.before_epoch() # PASS
|
||||
self.train(epoch)
|
||||
self.test(epoch)
|
||||
self.after_epoch()
|
||||
torch.save(self.model.state_dict(), output_checkpoint_dir)
|
||||
@@ -0,0 +1,126 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import torch
|
||||
from .methods_utils import FacilityLocation, submodular_optimizer
|
||||
import numpy as np
|
||||
from .methods_utils.euclidean import euclidean_dist_pair_np
|
||||
from ..nets.nets_utils import MyDataParallel
|
||||
from tqdm import tqdm
|
||||
|
||||
class Craig(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
|
||||
balance=True, greedy="LazyGreedy", **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
|
||||
|
||||
if greedy not in submodular_optimizer.optimizer_choices:
|
||||
raise ModuleNotFoundError("Greedy optimizer not found.")
|
||||
self._greedy = greedy
|
||||
self.balance = balance
|
||||
|
||||
def before_train(self):
|
||||
pass
|
||||
|
||||
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
|
||||
pass
|
||||
|
||||
def before_epoch(self):
|
||||
pass
|
||||
|
||||
def after_epoch(self):
|
||||
pass
|
||||
|
||||
def before_run(self):
|
||||
pass
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
# def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
# if batch_idx % self.args.print_freq == 0:
|
||||
# print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
|
||||
# epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
|
||||
|
||||
# def calc_gradient(self, index=None):
|
||||
# self.model.eval()
|
||||
#
|
||||
# batch_loader = torch.utils.data.DataLoader(
|
||||
# self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
|
||||
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
|
||||
# sample_num = len(self.dst_val.targets) if index is None else len(index)
|
||||
# self.embedding_dim = self.model.get_last_layer().in_features
|
||||
#
|
||||
# gradients = []
|
||||
#
|
||||
# for i, (input, targets) in enumerate(batch_loader):
|
||||
# self.model_optimizer.zero_grad()
|
||||
# outputs = self.model(input.to(self.args.device))
|
||||
# loss = self.criterion(outputs.requires_grad_(True),
|
||||
# targets.to(self.args.device)).sum()
|
||||
# batch_num = targets.shape[0]
|
||||
# with torch.no_grad():
|
||||
# bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
|
||||
# weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1,
|
||||
# self.embedding_dim).repeat(1,
|
||||
# self.args.num_classes,
|
||||
# 1) * bias_parameters_grads.view(
|
||||
# batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
|
||||
# gradients.append(
|
||||
# torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu().numpy())
|
||||
#
|
||||
# gradients = np.concatenate(gradients, axis=0)
|
||||
#
|
||||
# self.model.train()
|
||||
# return euclidean_dist_pair_np(gradients)
|
||||
|
||||
def calc_weights(self, matrix, result):
|
||||
min_sample = np.argmax(matrix[result], axis=0)
|
||||
weights = np.ones(np.sum(result) if result.dtype == bool else len(result))
|
||||
for i in min_sample:
|
||||
weights[i] = weights[i] + 1
|
||||
return weights
|
||||
|
||||
def finish_run(self):
|
||||
if isinstance(self.model, MyDataParallel):
|
||||
self.model = self.model.module
|
||||
|
||||
self.model.no_grad = True
|
||||
grad = self.calc_gradient()
|
||||
grad_matrix = euclidean_dist_pair_np(grad)
|
||||
# with self.model.embedding_recorder:
|
||||
if self.balance:
|
||||
|
||||
# Do selection by class
|
||||
selection_result = np.array([], dtype=np.int32)
|
||||
weights = np.array([])
|
||||
for c in tqdm(range(self.num_classes)):
|
||||
class_index = np.arange(self.n_train)[self.dst_train_label == c]
|
||||
matrix = -1. * grad_matrix[class_index[:,None],class_index] # Change to column index
|
||||
# matrix = -1. * self.calc_gradient(class_index)
|
||||
matrix -= np.min(matrix) - 1e-3 #The least is zero
|
||||
submod_function = FacilityLocation(index=class_index, similarity_matrix=matrix)
|
||||
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=class_index,
|
||||
budget=round(self.fraction * len(
|
||||
class_index)))
|
||||
class_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
|
||||
update_state=submod_function.update_state)
|
||||
selection_result = np.append(selection_result, class_result)
|
||||
weights = np.append(weights, self.calc_weights(matrix, np.isin(class_index, class_result)))
|
||||
else:
|
||||
matrix = np.zeros([self.n_train, self.n_train])
|
||||
all_index = np.arange(self.n_train)
|
||||
for c in range(self.num_classes): # Sparse Matrix
|
||||
class_index = np.arange(self.n_train)[self.dst_train_label== c]
|
||||
matrix[np.ix_(class_index, class_index)] = -1. * self.calc_gradient(class_index)
|
||||
matrix[np.ix_(class_index, class_index)] -= np.min(matrix[np.ix_(class_index, class_index)]) - 1e-3
|
||||
submod_function = FacilityLocation(index=all_index, similarity_matrix=matrix)
|
||||
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=all_index,
|
||||
budget=self.coreset_size)
|
||||
selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain_batch,
|
||||
update_state=submod_function.update_state,
|
||||
batch=self.args.selection_batch)
|
||||
weights = self.calc_weights(matrix, selection_result)
|
||||
self.model.no_grad = False
|
||||
return {"indices": selection_result, "weights": weights}
|
||||
|
||||
def select(self, **kwargs):
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
@@ -0,0 +1,120 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
|
||||
class DeepFool(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
|
||||
specific_model=None, balance: bool = False, max_iter: int = 50, **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
|
||||
|
||||
self.balance = balance
|
||||
self.max_iter = max_iter
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
if batch_idx % self.args.print_freq == 0:
|
||||
print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
|
||||
epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
|
||||
|
||||
def finish_run(self):
|
||||
self.model.no_grad = False
|
||||
|
||||
# Create a data loader for self.dst_train with batch size self.args.selection_batch
|
||||
batch_loader = torch.utils.data.DataLoader(self.dst_train, batch_size=self.args.selection_batch
|
||||
, num_workers=self.args.workers)
|
||||
|
||||
r = np.zeros(self.n_train, dtype=np.float32)
|
||||
batch_num = len(batch_loader)
|
||||
for i, (inputs, targets) in enumerate(batch_loader):
|
||||
if i % self.args.print_freq == 0:
|
||||
print('| Selecting Batch [%3d/%3d]' % (i + 1, batch_num))
|
||||
r[(i * self.args.selection_batch):(i * self.args.selection_batch + targets.shape[0])] = self.deep_fool(
|
||||
inputs)
|
||||
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int64)
|
||||
for c in range(self.args.num_classes):
|
||||
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
|
||||
selection_result = np.append(selection_result, class_index[
|
||||
r[class_index].argsort()[:round(len(class_index) * self.fraction)]])
|
||||
else:
|
||||
selection_result = r.argsort()[:self.coreset_size]
|
||||
return {"indices": selection_result, "scores": r}
|
||||
|
||||
def deep_fool(self, inputs):
|
||||
# Here, start running DeepFool algorithm.
|
||||
self.model.eval()
|
||||
|
||||
# Initialize a boolean mask indicating if selection has been stopped at corresponding positions.
|
||||
sample_size = inputs.shape[0]
|
||||
boolean_mask = np.ones(sample_size, dtype=bool)
|
||||
all_idx = np.arange(sample_size)
|
||||
|
||||
# A matrix to store total pertubations.
|
||||
r_tot = np.zeros([sample_size, inputs.shape[1] * inputs.shape[2] * inputs.shape[3]])
|
||||
|
||||
# Set requires_grad for inputs.
|
||||
cur_inputs = inputs.requires_grad_(True).to(self.args.device)
|
||||
|
||||
original_shape = inputs.shape[1:]
|
||||
|
||||
# set requires_grad for all parametres in network as False to accelerate autograd
|
||||
for p in self.model.parameters():
|
||||
p.requires_grad_(False)
|
||||
|
||||
self.model.no_grad = True
|
||||
first_preds = self.model(cur_inputs).argmax(dim=1)
|
||||
self.model.no_grad = False
|
||||
|
||||
for i in range(self.max_iter):
|
||||
f_all = self.model(cur_inputs)
|
||||
|
||||
w_k = []
|
||||
for c in range(self.args.num_classes):
|
||||
w_k.append(torch.autograd.grad(f_all[:, c].sum(), cur_inputs,
|
||||
retain_graph=False if c + 1 == self.args.num_classes else True)[
|
||||
0].flatten(1))
|
||||
w_k = torch.stack(w_k, dim=0)
|
||||
w_k = w_k - w_k[first_preds, boolean_mask[boolean_mask]].unsqueeze(0)
|
||||
w_k_norm = w_k.norm(dim=2)
|
||||
|
||||
w_k_norm[first_preds, boolean_mask[
|
||||
boolean_mask]] = 1. # Set w_k_norm for preds positions to 1. to avoid division by zero.
|
||||
|
||||
l_all = (f_all - f_all[boolean_mask[boolean_mask], first_preds].unsqueeze(1)).detach().abs() / w_k_norm.T
|
||||
l_all[boolean_mask[
|
||||
boolean_mask], first_preds] = np.inf # Set l_k for preds positions to inf, as the argmin for each
|
||||
# row will be calculated soon.
|
||||
|
||||
l_hat = l_all.argmin(dim=1)
|
||||
r_i = l_all[boolean_mask[boolean_mask], l_hat].unsqueeze(1) / w_k_norm[
|
||||
l_hat, boolean_mask[boolean_mask]].T.unsqueeze(1) * w_k[l_hat, boolean_mask[boolean_mask]]
|
||||
|
||||
# Update r_tot values.
|
||||
r_tot[boolean_mask] += r_i.cpu().numpy()
|
||||
|
||||
cur_inputs += r_i.reshape([r_i.shape[0]] + list(original_shape))
|
||||
|
||||
# Re-input the updated sample into the network and get new predictions.
|
||||
self.model.no_grad = True
|
||||
preds = self.model(cur_inputs).argmax(dim=1)
|
||||
self.model.no_grad = False
|
||||
|
||||
# In DeepFool algorithm, the iteration stops when the updated sample produces a different prediction
|
||||
# in the model.
|
||||
index_unfinished = (preds == first_preds)
|
||||
if torch.all(~index_unfinished):
|
||||
break
|
||||
|
||||
cur_inputs = cur_inputs[index_unfinished]
|
||||
first_preds = first_preds[index_unfinished]
|
||||
boolean_mask[all_idx[boolean_mask][~index_unfinished.cpu().numpy()]] = False
|
||||
|
||||
return (r_tot * r_tot).sum(axis=1)
|
||||
|
||||
def select(self, **kwargs):
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
@@ -0,0 +1,322 @@
|
||||
from .coresetmethod import CoresetMethod
|
||||
import torch, time
|
||||
from torch import nn
|
||||
import numpy as np
|
||||
from copy import deepcopy
|
||||
from .. import nets
|
||||
from torchvision import transforms
|
||||
from datasets.data_manager import select_dm_loader
|
||||
from dassl.utils import MetricMeter, AverageMeter
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
import datetime
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
|
||||
class EarlyTrain(CoresetMethod):
|
||||
'''
|
||||
Core code for training related to coreset selection methods when pre-training is required.
|
||||
'''
|
||||
|
||||
def __init__(self, dst_train, args,fraction=0.5, random_seed=None, epochs=200, specific_model=None,
|
||||
torchvision_pretrain: bool = False, dst_pretrain_dict: dict = {}, fraction_pretrain=1., dst_test=None,
|
||||
**kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed)
|
||||
self.epochs = epochs
|
||||
self.n_train = len(self.dst_train)
|
||||
self.coreset_size = round(self.n_train * fraction)
|
||||
self.model = specific_model
|
||||
self.train_loader = self.dm.train_loader_x
|
||||
self.test_loader = self.dm.test_loader
|
||||
|
||||
|
||||
if kwargs:
|
||||
# self.text_feature = kwargs['text_feature']
|
||||
self.optim = kwargs['optim']
|
||||
self.sche = kwargs['schedule']
|
||||
self.scar = kwargs['scar']
|
||||
|
||||
|
||||
|
||||
self.start_epoch = self.epoch = 0
|
||||
self.max_epoch = self.args.OPTIM_SELECTION.MAX_EPOCH
|
||||
|
||||
if fraction_pretrain <= 0. or fraction_pretrain > 1.:
|
||||
raise ValueError("Illegal pretrain fraction value.")
|
||||
self.fraction_pretrain = fraction_pretrain
|
||||
|
||||
if dst_pretrain_dict.__len__() != 0:
|
||||
dict_keys = dst_pretrain_dict.keys()
|
||||
if 'im_size' not in dict_keys or 'channel' not in dict_keys or 'dst_train' not in dict_keys or \
|
||||
'num_classes' not in dict_keys:
|
||||
raise AttributeError(
|
||||
'Argument dst_pretrain_dict must contain imszie, channel, dst_train and num_classes.')
|
||||
if dst_pretrain_dict['im_size'][0] != args.im_size[0] or dst_pretrain_dict['im_size'][0] != args.im_size[0]:
|
||||
raise ValueError("im_size of pretrain dataset does not match that of the training dataset.")
|
||||
if dst_pretrain_dict['channel'] != args.channel:
|
||||
raise ValueError("channel of pretrain dataset does not match that of the training dataset.")
|
||||
if dst_pretrain_dict['num_classes'] != args.num_classes:
|
||||
self.num_classes_mismatch()
|
||||
|
||||
self.dst_pretrain_dict = dst_pretrain_dict
|
||||
self.torchvision_pretrain = torchvision_pretrain
|
||||
self.if_dst_pretrain = (len(self.dst_pretrain_dict) != 0)
|
||||
|
||||
if torchvision_pretrain:
|
||||
# Pretrained models in torchvision only accept 224*224 inputs, therefore we resize current
|
||||
# datasets to 224*224.
|
||||
if args.im_size[0] != 224 or args.im_size[1] != 224:
|
||||
self.dst_train = deepcopy(dst_train)
|
||||
self.dst_train.transform = transforms.Compose([self.dst_train.transform, transforms.Resize(224)])
|
||||
if self.if_dst_pretrain:
|
||||
self.dst_pretrain_dict['dst_train'] = deepcopy(dst_pretrain_dict['dst_train'])
|
||||
self.dst_pretrain_dict['dst_train'].transform = transforms.Compose(
|
||||
[self.dst_pretrain_dict['dst_train'].transform, transforms.Resize(224)])
|
||||
if self.if_dst_pretrain:
|
||||
self.n_pretrain = len(self.dst_pretrain_dict['dst_train'])
|
||||
self.n_pretrain_size = round(
|
||||
self.fraction_pretrain * (self.n_pretrain if self.if_dst_pretrain else self.n_train))
|
||||
self.dst_test = dst_test
|
||||
|
||||
|
||||
def train(self, epoch, list_of_train_idx=None, **kwargs):
|
||||
""" Train model for one epoch """
|
||||
|
||||
self.before_train()
|
||||
self.model.train()
|
||||
|
||||
losses = MetricMeter()
|
||||
batch_time = AverageMeter()
|
||||
data_time = AverageMeter()
|
||||
|
||||
|
||||
end = time.time()
|
||||
|
||||
print('\n=> Training Pre-tuning Epoch #%d' % epoch)
|
||||
train_loader = select_dm_loader(self.args,self.dst_train,is_train=True)
|
||||
self.num_batches = len(train_loader)
|
||||
|
||||
# trainset_permutation_inds = np.random.permutation(list_of_train_idx)
|
||||
# batch_sampler = torch.utils.data.BatchSampler(trainset_permutation_inds, batch_size=self.args.selection_batch,
|
||||
# drop_last=False)
|
||||
# trainset_permutation_inds = list(batch_sampler)
|
||||
#
|
||||
# train_loader = torch.utils.data.DataLoader(self.dst_pretrain_dict['dst_train'] if self.if_dst_pretrain
|
||||
# else self.dst_train, shuffle=False, batch_sampler=batch_sampler,
|
||||
#
|
||||
#
|
||||
# num_workers=self.args.workers, pin_memory=True)
|
||||
|
||||
for i, batch in enumerate(train_loader):
|
||||
data_time.update(time.time() - end)
|
||||
image, label,real_ind = batch['img'].cuda(),batch['label'].cuda(),batch['index'].cuda()
|
||||
|
||||
model = self.model
|
||||
optim = self.optim
|
||||
scaler = self.scar
|
||||
|
||||
prec = self.args.TRAINER.MAPLE.PREC
|
||||
if prec == "amp":
|
||||
with autocast():
|
||||
loss,outputs = model(image, label)
|
||||
optim.zero_grad()
|
||||
scaler.scale(loss).backward()
|
||||
scaler.step(optim)
|
||||
scaler.update()
|
||||
else:
|
||||
loss,outputs = model(image, label)
|
||||
optim.zero_grad()
|
||||
loss.backward()
|
||||
optim.step()
|
||||
|
||||
self.after_loss(outputs, loss, label, real_ind, epoch)
|
||||
self.while_update(outputs, loss, label, epoch, i, self.args.DATALOADER.TRAIN_X.BATCH_SIZE)
|
||||
|
||||
loss_summary = {"loss": loss.item()}
|
||||
|
||||
if (i + 1) == self.num_batches:
|
||||
self.sche.step()
|
||||
batch_time.update(time.time() - end)
|
||||
losses.update(loss_summary)
|
||||
|
||||
meet_freq = (i + 1) % self.args.TRAIN.PRINT_FREQ == 0
|
||||
only_few_batches = self.num_batches < self.args.TRAIN.PRINT_FREQ
|
||||
|
||||
if meet_freq or only_few_batches:
|
||||
nb_remain = 0
|
||||
nb_remain += self.num_batches - i - 1
|
||||
nb_remain += (self.max_epoch - self.epoch - 1) * self.num_batches
|
||||
eta_seconds = batch_time.avg * nb_remain
|
||||
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||
|
||||
info = []
|
||||
info += [f"epoch [{self.epoch + 1}/{self.max_epoch}]"]
|
||||
info += [f"batch [{i + 1}/{self.num_batches}]"]
|
||||
info += [f"time {batch_time.val:.3f} ({batch_time.avg:.3f})"]
|
||||
info += [f"data {data_time.val:.3f} ({data_time.avg:.3f})"]
|
||||
info += [f"{losses}"]
|
||||
info += [f"lr {optim.param_groups[0]['lr']:.4e}"]
|
||||
info += [f"eta {eta}"]
|
||||
print(" ".join(info))
|
||||
|
||||
# n_iter = self.epoch * self.num_batches + i
|
||||
# for name, meter in losses.meters.items():
|
||||
# self.write_scalar("train/" + name, meter.avg, n_iter)
|
||||
# self.write_scalar("train/lr", self.get_current_lr(), n_iter)
|
||||
|
||||
end = time.time()
|
||||
|
||||
return self.finish_train()
|
||||
|
||||
def run(self):
|
||||
self.train_indx = np.arange(self.n_train)
|
||||
self.before_run()
|
||||
print(f'Start pre-funing CLIP with all datasets by {self.max_epoch} epoch')
|
||||
file_save_name = self.args.DATASET.NAME + '_' + str(self.args.SEED) + '.pth'
|
||||
output_checkpoint_dir = os.path.join('checkpoints', file_save_name)
|
||||
if self.max_epoch > 0:
|
||||
|
||||
if os.path.exists(output_checkpoint_dir):
|
||||
print(f'The checkpiont exists! Load that shit')
|
||||
ckpt = torch.load(output_checkpoint_dir)
|
||||
self.model.load_state_dict(ckpt)
|
||||
else:
|
||||
for epoch in range(self.epoch,self.max_epoch):
|
||||
# list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train),
|
||||
# self.n_pretrain_size, replace=False)
|
||||
self.before_epoch() #PASS
|
||||
self.train(epoch)
|
||||
self.test(epoch)
|
||||
self.after_epoch()
|
||||
torch.save(self.model.state_dict(),output_checkpoint_dir)
|
||||
|
||||
return self.finish_run()
|
||||
|
||||
def test(self, epoch):
|
||||
self.model.no_grad = True
|
||||
self.model.eval()
|
||||
|
||||
|
||||
correct = 0.
|
||||
total = 0.
|
||||
|
||||
print('\n=> Testing Tuning Epoch #%d' % epoch)
|
||||
|
||||
for batch_idx, batch in enumerate(self.test_loader):
|
||||
image, target = batch['img'].cuda(), batch['label']
|
||||
output = self.model(image, target.cuda())
|
||||
|
||||
|
||||
predicted = torch.max(output.data, 1).indices.cpu()
|
||||
correct += predicted.eq(target).sum().item()
|
||||
total += target.size(0)
|
||||
|
||||
# if batch_idx % self.args.print_freq == 0:
|
||||
# print('| Test Epoch [%3d/%3d] Iter[%3d/%3d]\t\t Test Acc: %.3f%%' % (
|
||||
# epoch, self.epochs, batch_idx + 1, (round(len(self.dst_test) * self.args.selection_test_fraction) //
|
||||
# self.args.selection_batch) + 1, loss.item(),
|
||||
# 100. * correct / total))
|
||||
print(f'| Test Epoch {epoch} Test Acc: {100. * correct / total:.3f}%')
|
||||
self.model.no_grad = False
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
pass
|
||||
|
||||
def before_train(self):
|
||||
pass
|
||||
|
||||
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
|
||||
pass
|
||||
|
||||
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
pass
|
||||
|
||||
def finish_train(self):
|
||||
pass
|
||||
|
||||
def before_epoch(self):
|
||||
pass
|
||||
|
||||
def after_epoch(self):
|
||||
pass
|
||||
|
||||
def before_run(self):
|
||||
pass
|
||||
|
||||
def finish_run(self):
|
||||
pass
|
||||
|
||||
def select(self, **kwargs):
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
|
||||
def select_without_train(self, **kwargs):
|
||||
return self.finish_run()
|
||||
|
||||
@torch.no_grad()
|
||||
def calcluate_clip_probability(self,batch):
|
||||
input = batch["img"].cuda()
|
||||
|
||||
self.specific_model = self.specific_model.cuda()
|
||||
image_features = self.specific_model.encode_image(input)
|
||||
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
||||
logit_scale = self.specific_model.logit_scale.exp()
|
||||
return logit_scale * image_features @ self.text_feature.t()
|
||||
|
||||
# using the defined select_dm
|
||||
def select_dm(self,data,ind=None,is_train=None):
|
||||
return select_dm_loader(self.args,data,ind,is_train)
|
||||
|
||||
|
||||
def parse_batch_test(self, batch):
|
||||
input = batch["img"]
|
||||
label = batch["label"]
|
||||
|
||||
input = input.cuda()
|
||||
label = label.cuda()
|
||||
|
||||
return input, label
|
||||
|
||||
def parse_batch_train(self, batch):
|
||||
input = batch["img"].cuda()
|
||||
label = batch["label"].cuda()
|
||||
domain = batch["index"].cuda()
|
||||
|
||||
return input, label, domain
|
||||
|
||||
|
||||
|
||||
def calc_gradient(self, index=None):
|
||||
'''
|
||||
Calculate gradients matrix on current network for specified training dataset.
|
||||
'''
|
||||
self.model.eval()
|
||||
data_loader = self.select_dm(self.dst_train, index, is_train=False)
|
||||
# Initialize a matrix to save gradients.
|
||||
# (on cpu)
|
||||
gradients = []
|
||||
lam = 0.5
|
||||
for i, batch in enumerate(tqdm(data_loader)):
|
||||
self.optim.zero_grad()
|
||||
image, label = batch['img'].cuda(), batch['label'].cuda()
|
||||
bs_size = image.shape[0]
|
||||
loss, visual_embedding, logit= self.model(image, label, cal_gradient=True)
|
||||
embed_dim = visual_embedding.shape[-1]
|
||||
with torch.no_grad():
|
||||
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
|
||||
weight_parameters_grads = visual_embedding.view(bs_size, 1,
|
||||
-1).repeat(1, self.num_classes, 1) * \
|
||||
bias_parameters_grads.view(bs_size, self.num_classes,
|
||||
1).repeat(1, 1, embed_dim)
|
||||
# weight_parameters_grads_t = text_embedding.view(bs_size, 1,
|
||||
# -1).repeat(1, self.num_classes, 1) * \
|
||||
# bias_parameters_grads.view(bs_size, self.num_classes,
|
||||
# 1).repeat(1, 1, embed_dim)
|
||||
# final_weight = torch.abs(weight_parameters_grads-weight_parameters_grads_t)
|
||||
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
|
||||
dim=1).cpu().numpy())
|
||||
|
||||
gradients = np.concatenate(gradients, axis=0, dtype=np.float32)
|
||||
print('Finish Gradient Calculation')
|
||||
self.model.train()
|
||||
return gradients
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import torch, time
|
||||
from torch import nn
|
||||
import numpy as np
|
||||
from datasets.data_manager import select_dm_loader
|
||||
|
||||
# Acknowledgement to
|
||||
# https://github.com/mtoneva/example_forgetting
|
||||
|
||||
class Forgetting(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True, #default True
|
||||
dst_test=None, **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model=specific_model,
|
||||
dst_test=dst_test,**kwargs)
|
||||
|
||||
self.balance = balance
|
||||
|
||||
def get_hms(self, seconds):
|
||||
# Format time for printing purposes
|
||||
|
||||
m, s = divmod(seconds, 60)
|
||||
h, m = divmod(m, 60)
|
||||
|
||||
return h, m, s
|
||||
|
||||
def before_train(self):
|
||||
self.train_loss = 0.
|
||||
self.correct = 0.
|
||||
self.total = 0.
|
||||
|
||||
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
|
||||
with torch.no_grad():
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
|
||||
cur_acc = (predicted == targets).clone().detach().requires_grad_(False).type(torch.float32)
|
||||
self.forgetting_events[batch_inds.clone().detach()[(self.last_acc[batch_inds]-cur_acc)>0.01]]+=1.
|
||||
self.last_acc[batch_inds] = cur_acc
|
||||
|
||||
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
pass
|
||||
# self.train_loss += loss.item()
|
||||
# self.total += targets.size(0)
|
||||
# _, predicted = torch.max(outputs.data, 1)
|
||||
# self.correct += predicted.eq(targets.data).cpu().sum()
|
||||
#
|
||||
# if batch_idx % self.args.print_freq == 0:
|
||||
# print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (
|
||||
# epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item(),
|
||||
# 100. * self.correct.item() / self.total))
|
||||
|
||||
|
||||
|
||||
def after_epoch(self):
|
||||
pass
|
||||
# epoch_time = time.time() - self.start_time
|
||||
# self.elapsed_time += epoch_time
|
||||
# print('| Elapsed time : %d:%02d:%02d' % (self.get_hms(self.elapsed_time)))
|
||||
|
||||
def before_run(self):
|
||||
self.elapsed_time = 0
|
||||
self.forgetting_events = torch.zeros(self.n_train, requires_grad=False).cuda()
|
||||
self.test_initial_acc()
|
||||
# self.last_acc = torch.zeros(self.n_train, requires_grad=False).cuda()
|
||||
|
||||
def test_initial_acc(self):
|
||||
self.model.no_grad = True
|
||||
self.model.eval()
|
||||
self.last_acc = torch.zeros(self.n_train, requires_grad=False).cuda()
|
||||
|
||||
print('\n=> Testing Initial acc for Forgetting')
|
||||
train_loader = select_dm_loader(self.args, self.dst_train)
|
||||
for batch_idx, batch in enumerate(train_loader):
|
||||
image, target,batch_inds = batch['img'].cuda(), batch['label'].cuda(), batch['index'].cuda()
|
||||
output = self.model(image, target)
|
||||
predicted = torch.max(output.data, 1).indices
|
||||
|
||||
cur_acc = (predicted == target).clone().detach().requires_grad_(False).type(torch.float32)
|
||||
self.last_acc[batch_inds] = cur_acc
|
||||
|
||||
|
||||
self.model.no_grad = False
|
||||
|
||||
def finish_run(self):
|
||||
pass
|
||||
|
||||
def select(self, **kwargs):
|
||||
self.run()
|
||||
|
||||
if not self.balance:
|
||||
top_examples = self.train_indx[np.argsort(self.forgetting_events.cpu().numpy())][::-1][:self.coreset_size]
|
||||
else:
|
||||
top_examples = np.array([], dtype=np.int64)
|
||||
for c in range(self.num_classes):
|
||||
c_indx = self.train_indx[self.dst_train_label == c]
|
||||
budget = round(self.fraction * len(c_indx))
|
||||
top_examples = np.append(top_examples,
|
||||
c_indx[np.argsort(self.forgetting_events[c_indx].cpu().numpy())[::-1][:budget]])
|
||||
|
||||
return {"indices": top_examples, "scores": self.forgetting_events}
|
||||
@@ -0,0 +1,10 @@
|
||||
import numpy as np
|
||||
from .coresetmethod import CoresetMethod
|
||||
|
||||
|
||||
class Full(CoresetMethod):
|
||||
def __init__(self, dst_train, args, fraction, random_seed, **kwargs):
|
||||
self.n_train = len(dst_train)
|
||||
|
||||
def select(self, **kwargs):
|
||||
return {"indices": np.arange(self.n_train)}
|
||||
@@ -0,0 +1,210 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
from .methods_utils import submodular_optimizer
|
||||
import torch
|
||||
import numpy as np
|
||||
from ..nets.nets_utils import MyDataParallel
|
||||
from tqdm import tqdm
|
||||
|
||||
class Glister(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
|
||||
balance: bool = True, greedy="StochasticGreedy", eta=None, dst_val=None, **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
|
||||
|
||||
self.balance = balance
|
||||
self.eta = args.OPTIM_SELECTION.LR if eta is None else eta
|
||||
self.dst_val = dst_train.dataset.val
|
||||
self.dst_val_label = self.get_train_label(self.dst_val)
|
||||
self.n_val = len(self.dst_val)
|
||||
|
||||
if greedy not in submodular_optimizer.optimizer_choices:
|
||||
raise ModuleNotFoundError("Greedy optimizer not found.")
|
||||
self._greedy = greedy
|
||||
|
||||
def calc_gradient(self, index=None,val=False):
|
||||
'''
|
||||
Calculate gradients matrix on current network for specified training dataset.
|
||||
'''
|
||||
self.model.eval()
|
||||
if val:
|
||||
val_str = 'Val'
|
||||
data_loader = self.select_dm(self.dst_val, index, is_train=False)
|
||||
# self.init_out = []
|
||||
# self.init_emb = []
|
||||
# self.init_y = []
|
||||
else:
|
||||
val_str = 'Train'
|
||||
data_loader = self.select_dm(self.dst_train, index, is_train=False)
|
||||
# Initialize a matrix to save gradients.
|
||||
# (on cpu)
|
||||
gradients = []
|
||||
|
||||
for i, batch in enumerate(tqdm(data_loader)):
|
||||
|
||||
self.optim.zero_grad()
|
||||
image, label = batch['img'].cuda(), batch['label'].cuda()
|
||||
bs_size = image.shape[0]
|
||||
loss,visual_embedding,logit = self.model(image,label,cal_gradient=True)
|
||||
embed_dim = visual_embedding.shape[-1]
|
||||
with torch.no_grad():
|
||||
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
|
||||
weight_parameters_grads = visual_embedding.view(bs_size, 1,
|
||||
-1).repeat(1, self.num_classes, 1) *\
|
||||
bias_parameters_grads.view(bs_size, self.num_classes,
|
||||
1).repeat(1, 1, embed_dim)
|
||||
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
|
||||
dim=1).cpu().numpy())
|
||||
|
||||
# if val:
|
||||
# self.init_out.append(logit.cpu())
|
||||
# self.init_emb.append(visual_embedding.cpu())
|
||||
# self.init_y.append(label.cpu())
|
||||
|
||||
|
||||
# if val:
|
||||
# with torch.no_grad():
|
||||
# self.init_out = torch.cat(self.init_out,dim=0).numpy().astype(dtype=np.float32)
|
||||
# self.init_emb = torch.cat(self.init_emb,dim=0).numpy().astype(dtype=np.float32)
|
||||
# self.init_y = torch.cat(self.init_y,dim=0).numpy().astype(dtype=np.float32)
|
||||
|
||||
gradients = np.concatenate(gradients, axis=0,dtype=np.float32)
|
||||
print(f'Finish Gradient Calculation on {val_str} dataset')
|
||||
return gradients
|
||||
|
||||
# def calc_gradient(self, index=None, val=False, record_val_detail=False):
|
||||
# '''
|
||||
# Calculate gradients matrix on current network for training or validation dataset.
|
||||
# '''
|
||||
#
|
||||
# self.model.eval()
|
||||
#
|
||||
# if val:
|
||||
# batch_loader = torch.utils.data.DataLoader(
|
||||
# self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index),
|
||||
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
|
||||
# else:
|
||||
# batch_loader = torch.utils.data.DataLoader(
|
||||
# self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
|
||||
# batch_size=self.args.selection_batch, num_workers=self.args.workers)
|
||||
#
|
||||
# self.embedding_dim = self.model.get_last_layer().in_features
|
||||
# gradients = []
|
||||
# if val and record_val_detail:
|
||||
# self.init_out = []
|
||||
# self.init_emb = []
|
||||
# self.init_y = []
|
||||
#
|
||||
# for i, (input, targets) in enumerate(batch_loader):
|
||||
# self.model_optimizer.zero_grad()
|
||||
# outputs = self.model(input.to(self.args.device))
|
||||
# loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum()
|
||||
# batch_num = targets.shape[0]
|
||||
# with torch.no_grad():
|
||||
# bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
|
||||
# weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1,
|
||||
# self.embedding_dim).repeat(1, self.args.num_classes, 1) *\
|
||||
# bias_parameters_grads.view(
|
||||
# batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
|
||||
# gradients.append(torch.cat(
|
||||
# [bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu())
|
||||
#
|
||||
# if val and record_val_detail:
|
||||
# self.init_out.append(outputs.cpu())
|
||||
# self.init_emb.append(self.model.embedding_recorder.embedding.cpu())
|
||||
# self.init_y.append(targets)
|
||||
#
|
||||
# gradients = torch.cat(gradients, dim=0)
|
||||
# if val:
|
||||
# self.val_grads = torch.mean(gradients, dim=0)
|
||||
# if self.dst_val == self.dst_train:
|
||||
# # No validation set was provided while instantiating Glister, so self.dst_val == self.dst_train
|
||||
# self.train_grads = gradients
|
||||
# else:
|
||||
# self.train_grads = gradients
|
||||
# if val and record_val_detail:
|
||||
# with torch.no_grad():
|
||||
# self.init_out = torch.cat(self.init_out, dim=0)
|
||||
# self.init_emb = torch.cat(self.init_emb, dim=0)
|
||||
# self.init_y = torch.cat(self.init_y)
|
||||
#
|
||||
# self.model.train()
|
||||
|
||||
#PASS, worth disussion
|
||||
def update_val_gradients(self, new_selection, selected_for_train):
|
||||
|
||||
sum_selected_train_gradients = np.mean(self.train_gradients[selected_for_train], axis=0)
|
||||
|
||||
new_outputs = self.init_out - self.eta * sum_selected_train_gradients[:self.num_classes].reshape(1,
|
||||
-1).repeat(self.init_out.shape[0], 1) - self.eta * torch.matmul(self.init_emb,
|
||||
sum_selected_train_gradients[self.num_classes:].view(self.num_classes, -1).T)
|
||||
|
||||
sample_num = new_outputs.shape[0]
|
||||
gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)], requires_grad=False)
|
||||
i = 0
|
||||
while i * self.args.selection_batch < sample_num:
|
||||
batch_indx = np.arange(sample_num)[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch,
|
||||
sample_num)]
|
||||
new_out_puts_batch = new_outputs[batch_indx].clone().detach().requires_grad_(True)
|
||||
loss = self.criterion(new_out_puts_batch, self.init_y[batch_indx])
|
||||
batch_num = len(batch_indx)
|
||||
bias_parameters_grads = torch.autograd.grad(loss.sum(), new_out_puts_batch, retain_graph=True)[0]
|
||||
|
||||
weight_parameters_grads = self.init_emb[batch_indx].view(batch_num, 1, self.embedding_dim).repeat(1,
|
||||
self.args.num_classes, 1) * bias_parameters_grads.view(batch_num,
|
||||
self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
|
||||
gradients[batch_indx] = torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu()
|
||||
i += 1
|
||||
|
||||
self.val_grads = torch.mean(gradients, dim=0)
|
||||
|
||||
def finish_run(self):
|
||||
if isinstance(self.model, MyDataParallel):
|
||||
self.model = self.model.module
|
||||
|
||||
self.model.no_grad = True
|
||||
|
||||
self.train_indx = np.arange(self.n_train)
|
||||
self.val_indx = np.arange(self.n_val)
|
||||
|
||||
train_gradients = self.calc_gradient(index=None)
|
||||
val_gradients = self.calc_gradient(index=None,val=True)
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int64)
|
||||
#weights = np.array([], dtype=np.float32)
|
||||
for c in range(self.num_classes):
|
||||
c_indx = self.train_indx[self.dst_train_label == c]
|
||||
c_val_inx = self.val_indx[self.dst_val_label == c]
|
||||
self.train_gradients = train_gradients[c_indx]
|
||||
self.val_gradients = val_gradients[c_val_inx].mean(axis=0)
|
||||
|
||||
# self.init_out = self.init_out[c_val_inx]
|
||||
# self.init_emb = self.init_emb[c_val_inx]
|
||||
# self.init_y = self.init_y[c_val_inx]
|
||||
|
||||
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=c_indx,
|
||||
budget=round(self.fraction * len(c_indx)))
|
||||
#conditioal gain uses taylor series approximation
|
||||
c_selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected,
|
||||
**kwargs: np.dot(self.train_gradients[idx_gain],
|
||||
self.val_gradients.reshape(-1, 1)).
|
||||
flatten(), update_state=None) #self.update val
|
||||
selection_result = np.append(selection_result, c_selection_result)
|
||||
|
||||
else:
|
||||
self.train_gradients = train_gradients
|
||||
self.val_gradients = val_gradients.mean(axis=0)
|
||||
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args,
|
||||
index=np.arange(self.n_train), budget=self.coreset_size)
|
||||
selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected,
|
||||
**kwargs: torch.matmul(self.train_gradients[idx_gain],
|
||||
self.val_gradients.view(-1, 1)).detach().cpu().numpy().flatten(),
|
||||
upadate_state=self.update_val_gradients)
|
||||
|
||||
|
||||
self.model.no_grad = False
|
||||
return {"indices": selection_result}
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
from scipy.linalg import lstsq
|
||||
from scipy.optimize import nnls
|
||||
from .earlytrain import EarlyTrain
|
||||
from ..nets.nets_utils import MyDataParallel
|
||||
|
||||
|
||||
# https://github.com/krishnatejakk/GradMatch
|
||||
|
||||
class GradMatch(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
|
||||
balance=True, dst_val=None, lam: float = 1., **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
|
||||
self.balance = balance
|
||||
self.dst_val = dst_val
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
if batch_idx % self.args.print_freq == 0:
|
||||
print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
|
||||
epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
|
||||
|
||||
def orthogonal_matching_pursuit(self, A, b, budget: int, lam: float = 1.):
|
||||
'''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit
|
||||
Acknowlegement to:
|
||||
https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py
|
||||
Args:
|
||||
A: design matrix of size (d, n)
|
||||
b: measurement vector of length d
|
||||
budget: selection budget
|
||||
lam: regularization coef. for the final output vector
|
||||
Returns:
|
||||
vector of length n
|
||||
'''
|
||||
with torch.no_grad():
|
||||
d, n = A.shape
|
||||
if budget <= 0:
|
||||
budget = 0
|
||||
elif budget > n:
|
||||
budget = n
|
||||
|
||||
x = np.zeros(n, dtype=np.float32)
|
||||
resid = b.clone()
|
||||
indices = []
|
||||
boolean_mask = torch.ones(n, dtype=bool, device="cuda")
|
||||
all_idx = torch.arange(n, device='cuda')
|
||||
|
||||
for i in range(budget):
|
||||
if i % self.args.print_freq == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, budget))
|
||||
projections = torch.matmul(A.T, resid)
|
||||
index = torch.argmax(projections[boolean_mask])
|
||||
index = all_idx[boolean_mask][index]
|
||||
|
||||
indices.append(index.item())
|
||||
boolean_mask[index] = False
|
||||
|
||||
if indices.__len__() == 1:
|
||||
A_i = A[:, index]
|
||||
x_i = projections[index] / torch.dot(A_i, A_i).view(-1)
|
||||
A_i = A[:, index].view(1, -1)
|
||||
else:
|
||||
A_i = torch.cat((A_i, A[:, index].view(1, -1)), dim=0)
|
||||
temp = torch.matmul(A_i, torch.transpose(A_i, 0, 1)) + lam * torch.eye(A_i.shape[0], device="cuda")
|
||||
x_i, _ = torch.lstsq(torch.matmul(A_i, b).view(-1, 1), temp)
|
||||
resid = b - torch.matmul(torch.transpose(A_i, 0, 1), x_i).view(-1)
|
||||
if budget > 1:
|
||||
x_i = nnls(temp.cpu().numpy(), torch.matmul(A_i, b).view(-1).cpu().numpy())[0]
|
||||
x[indices] = x_i
|
||||
elif budget == 1:
|
||||
x[indices[0]] = 1.
|
||||
return x
|
||||
|
||||
def orthogonal_matching_pursuit_np(self, A, b, budget: int, lam: float = 1.):
|
||||
'''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit
|
||||
Acknowlegement to:
|
||||
https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py
|
||||
Args:
|
||||
A: design matrix of size (d, n)
|
||||
b: measurement vector of length d
|
||||
budget: selection budget
|
||||
lam: regularization coef. for the final output vector
|
||||
Returns:
|
||||
vector of length n
|
||||
'''
|
||||
d, n = A.shape
|
||||
if budget <= 0:
|
||||
budget = 0
|
||||
elif budget > n:
|
||||
budget = n
|
||||
|
||||
x = np.zeros(n, dtype=np.float32)
|
||||
resid = np.copy(b)
|
||||
indices = []
|
||||
boolean_mask = np.ones(n, dtype=bool)
|
||||
all_idx = np.arange(n)
|
||||
|
||||
for i in range(budget):
|
||||
if i % self.args.print_freq == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, budget))
|
||||
projections = A.T.dot(resid)
|
||||
index = np.argmax(projections[boolean_mask])
|
||||
index = all_idx[boolean_mask][index]
|
||||
|
||||
indices.append(index.item())
|
||||
boolean_mask[index] = False
|
||||
|
||||
if indices.__len__() == 1:
|
||||
A_i = A[:, index]
|
||||
x_i = projections[index] / A_i.T.dot(A_i)
|
||||
else:
|
||||
A_i = np.vstack([A_i, A[:, index]])
|
||||
x_i = lstsq(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0]
|
||||
resid = b - A_i.T.dot(x_i)
|
||||
if budget > 1:
|
||||
x_i = nnls(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0]
|
||||
x[indices] = x_i
|
||||
elif budget == 1:
|
||||
x[indices[0]] = 1.
|
||||
return x
|
||||
|
||||
def calc_gradient(self, index=None, val=False):
|
||||
self.model.eval()
|
||||
if val:
|
||||
batch_loader = torch.utils.data.DataLoader(
|
||||
self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index),
|
||||
batch_size=self.args.selection_batch, num_workers=self.args.workers)
|
||||
sample_num = len(self.dst_val.targets) if index is None else len(index)
|
||||
else:
|
||||
batch_loader = torch.utils.data.DataLoader(
|
||||
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
|
||||
batch_size=self.args.selection_batch, num_workers=self.args.workers)
|
||||
sample_num = self.n_train if index is None else len(index)
|
||||
|
||||
self.embedding_dim = self.model.get_last_layer().in_features
|
||||
gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)],
|
||||
requires_grad=False, device=self.args.device)
|
||||
|
||||
for i, (input, targets) in enumerate(batch_loader):
|
||||
self.model_optimizer.zero_grad()
|
||||
outputs = self.model(input.to(self.args.device)).requires_grad_(True)
|
||||
loss = self.criterion(outputs, targets.to(self.args.device)).sum()
|
||||
batch_num = targets.shape[0]
|
||||
with torch.no_grad():
|
||||
bias_parameters_grads = torch.autograd.grad(loss, outputs, retain_graph=True)[0].cpu()
|
||||
weight_parameters_grads = self.model.embedding_recorder.embedding.cpu().view(batch_num, 1,
|
||||
self.embedding_dim).repeat(1,self.args.num_classes,1) *\
|
||||
bias_parameters_grads.view(batch_num, self.args.num_classes,
|
||||
1).repeat(1, 1, self.embedding_dim)
|
||||
gradients[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] =\
|
||||
torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1)
|
||||
|
||||
return gradients
|
||||
|
||||
def finish_run(self):
|
||||
if isinstance(self.model, MyDataParallel):
|
||||
self.model = self.model.module
|
||||
|
||||
self.model.no_grad = True
|
||||
with self.model.embedding_recorder:
|
||||
if self.dst_val is not None:
|
||||
val_num = len(self.dst_val.targets)
|
||||
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int64)
|
||||
weights = np.array([], dtype=np.float32)
|
||||
for c in range(self.args.num_classes):
|
||||
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
|
||||
cur_gradients = self.calc_gradient(class_index)
|
||||
if self.dst_val is not None:
|
||||
# Also calculate gradients of the validation set.
|
||||
val_class_index = np.arange(val_num)[self.dst_val.targets == c]
|
||||
cur_val_gradients = torch.mean(self.calc_gradient(val_class_index, val=True), dim=0)
|
||||
else:
|
||||
cur_val_gradients = torch.mean(cur_gradients, dim=0)
|
||||
if self.args.device == "cpu":
|
||||
# Compute OMP on numpy
|
||||
cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T,
|
||||
cur_val_gradients.numpy(),
|
||||
budget=round(len(class_index) * self.fraction))
|
||||
else:
|
||||
cur_weights = self.orthogonal_matching_pursuit(cur_gradients.to(self.args.device).T,
|
||||
cur_val_gradients.to(self.args.device),
|
||||
budget=round(len(class_index) * self.fraction))
|
||||
selection_result = np.append(selection_result, class_index[np.nonzero(cur_weights)[0]])
|
||||
weights = np.append(weights, cur_weights[np.nonzero(cur_weights)[0]])
|
||||
else:
|
||||
cur_gradients = self.calc_gradient()
|
||||
if self.dst_val is not None:
|
||||
# Also calculate gradients of the validation set.
|
||||
cur_val_gradients = torch.mean(self.calc_gradient(val=True), dim=0)
|
||||
else:
|
||||
cur_val_gradients = torch.mean(cur_gradients, dim=0)
|
||||
if self.args.device == "cpu":
|
||||
# Compute OMP on numpy
|
||||
cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T,
|
||||
cur_val_gradients.numpy(),
|
||||
budget=self.coreset_size)
|
||||
else:
|
||||
cur_weights = self.orthogonal_matching_pursuit(cur_gradients.T, cur_val_gradients,
|
||||
budget=self.coreset_size)
|
||||
selection_result = np.nonzero(cur_weights)[0]
|
||||
weights = cur_weights[selection_result]
|
||||
self.model.no_grad = False
|
||||
return {"indices": selection_result, "weights": weights}
|
||||
|
||||
def select(self, **kwargs):
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import torch, time
|
||||
import numpy as np
|
||||
from ..nets.nets_utils import MyDataParallel
|
||||
from tqdm import tqdm
|
||||
|
||||
class GraNd(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, repeat=1,
|
||||
specific_model=None, balance=False, **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model,**kwargs)
|
||||
self.epochs = epochs
|
||||
self.n_train = len(self.dst_train)
|
||||
self.coreset_size = round(self.n_train * fraction)
|
||||
self.specific_model = specific_model
|
||||
self.repeat = repeat
|
||||
|
||||
self.balance = balance
|
||||
|
||||
# def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
# if batch_idx % self.args.print_freq == 0:
|
||||
# print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
|
||||
# epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item()))
|
||||
|
||||
def before_run(self):
|
||||
if isinstance(self.model, MyDataParallel):
|
||||
self.model = self.model.module
|
||||
|
||||
def calc_gradient(self, index=None):
|
||||
'''
|
||||
Calculate gradients matrix on current network for specified training dataset.
|
||||
'''
|
||||
self.model.eval()
|
||||
data_loader = self.select_dm(self.dst_train, index, is_train=False)
|
||||
# Initialize a matrix to save gradients.
|
||||
# (on cpu)
|
||||
gradients = []
|
||||
|
||||
for i, batch in enumerate(tqdm(data_loader)):
|
||||
self.optim.zero_grad()
|
||||
image, label = batch['img'].cuda(), batch['label'].cuda()
|
||||
bs_size = image.shape[0]
|
||||
loss, visual_embedding, logit = self.model(image, label, cal_gradient=True)
|
||||
embed_dim = visual_embedding.shape[-1]
|
||||
with torch.no_grad():
|
||||
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
|
||||
weight_parameters_grads = visual_embedding.view(bs_size, 1,
|
||||
-1).repeat(1, self.num_classes, 1) * \
|
||||
bias_parameters_grads.view(bs_size, self.num_classes,
|
||||
1).repeat(1, 1, embed_dim)
|
||||
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
|
||||
dim=1).cpu().numpy())
|
||||
|
||||
gradients = np.concatenate(gradients, axis=0, dtype=np.float32)
|
||||
print('Finish Gradient Calculation')
|
||||
self.model.train()
|
||||
return gradients
|
||||
|
||||
def finish_run(self):
|
||||
# self.model.embedding_recorder.record_embedding = True # recording embedding vector
|
||||
|
||||
gradients = self.calc_gradient()
|
||||
self.norm_matrix[:,0] = np.linalg.norm(gradients,axis=1)
|
||||
|
||||
|
||||
|
||||
# embedding_dim = self.model.get_last_layer().in_features
|
||||
# data_loader = self.select_dm(self.dst_train, None, is_train=False)
|
||||
# sample_num = self.n_train
|
||||
#
|
||||
# for i, batch in enumerate(data_loader):
|
||||
# self.optim.zero_grad()
|
||||
# image, target,batch_inds = batch['img'].cuda(), batch['label'].cuda(), batch['index'].cuda()
|
||||
#
|
||||
# outputs = self.model(image)
|
||||
# loss = self.criterion(outputs.requires_grad_(True),
|
||||
# targets.to(self.args.device)).sum()
|
||||
# batch_num = targets.shape[0]
|
||||
# with torch.no_grad():
|
||||
# bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
|
||||
# self.norm_matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num),
|
||||
# self.cur_repeat] = torch.norm(torch.cat([bias_parameters_grads, (
|
||||
# self.model.embedding_recorder.embedding.view(batch_num, 1, embedding_dim).repeat(1,
|
||||
# self.args.num_classes, 1) * bias_parameters_grads.view(
|
||||
# batch_num, self.args.num_classes, 1).repeat(1, 1, embedding_dim)).
|
||||
# view(batch_num, -1)], dim=1), dim=1, p=2)
|
||||
#
|
||||
# self.model.train()
|
||||
|
||||
|
||||
def select(self, **kwargs):
|
||||
# Initialize a matrix to save norms of each sample on idependent runs
|
||||
self.norm_matrix = np.zeros([self.n_train, self.repeat])
|
||||
|
||||
# for self.cur_repeat in range(self.repeat):
|
||||
self.run()
|
||||
# self.random_seed = self.random_seed + 5
|
||||
|
||||
self.norm_mean = np.mean(self.norm_matrix, axis=1)
|
||||
if not self.balance:
|
||||
top_examples = self.train_indx[np.argsort(self.norm_mean)][::-1][:self.coreset_size]
|
||||
else:
|
||||
top_examples = np.array([], dtype=np.int64)
|
||||
for c in tqdm(range(self.num_classes)):
|
||||
c_indx = self.train_indx[self.dst_train_label == c]
|
||||
budget = round(self.fraction * len(c_indx))
|
||||
top_examples = np.append(top_examples, c_indx[np.argsort(self.norm_mean[c_indx])[::-1][:budget]])
|
||||
|
||||
return {"indices": top_examples, "scores": self.norm_mean}
|
||||
@@ -0,0 +1,109 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import torch
|
||||
import numpy as np
|
||||
from .methods_utils import euclidean_dist
|
||||
from ..nets.nets_utils import MyDataParallel
|
||||
|
||||
|
||||
class Herding(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
|
||||
specific_model="ResNet18", balance: bool = False, metric="euclidean", **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, **kwargs)
|
||||
|
||||
if metric == "euclidean":
|
||||
self.metric = euclidean_dist
|
||||
elif callable(metric):
|
||||
self.metric = metric
|
||||
else:
|
||||
self.metric = euclidean_dist
|
||||
self.run = lambda: self.finish_run()
|
||||
|
||||
def _construct_matrix(index=None):
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
|
||||
batch_size=self.n_train if index is None else len(index), num_workers=self.args.workers)
|
||||
inputs, _ = next(iter(data_loader))
|
||||
return inputs.flatten(1).requires_grad_(False).to(self.args.device)
|
||||
|
||||
self.construct_matrix = _construct_matrix
|
||||
|
||||
self.balance = balance
|
||||
self.select_bs = self.args.DATASET.SELECTION_BATCH_SIZE
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
pass
|
||||
|
||||
#Initial achievement, may not optimal
|
||||
def mixing_feature(self,img_fea,text_fea,lam=0.5):
|
||||
# return img_fea
|
||||
return lam*img_fea + (1-lam)*text_fea
|
||||
|
||||
def construct_matrix(self, index=None):
|
||||
self.model.eval()
|
||||
self.model.no_grad = True
|
||||
with torch.no_grad():
|
||||
# with self.model.embedding_recorder:
|
||||
sample_num = self.n_train if index is None else len(index)
|
||||
matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).cuda()
|
||||
data_loader = self.select_dm(self.dst_train,index,is_train=False)
|
||||
for i, batch in enumerate(data_loader):
|
||||
image,label = batch['img'].cuda(),batch['label'].cuda()
|
||||
img_f,text_f,_ = self.model(image, label, record=True)
|
||||
final_embed = self.mixing_feature(img_f,text_f) #Using the mixed image_feature and text_feature
|
||||
matrix[i * self.select_bs:min((i + 1) * self.select_bs, sample_num)] = final_embed
|
||||
|
||||
self.model.no_grad = False
|
||||
self.model.train()
|
||||
return matrix
|
||||
|
||||
def before_run(self):
|
||||
self.emb_dim = self.model.image_encoder.output_dim
|
||||
|
||||
def herding(self, matrix, budget: int, index=None):
|
||||
|
||||
sample_num = matrix.shape[0]
|
||||
|
||||
if budget < 0:
|
||||
raise ValueError("Illegal budget size.")
|
||||
elif budget > sample_num:
|
||||
budget = sample_num
|
||||
|
||||
indices = np.arange(sample_num)
|
||||
with torch.no_grad():
|
||||
mu = torch.mean(matrix, dim=0)
|
||||
select_result = np.zeros(sample_num, dtype=bool)
|
||||
|
||||
for i in range(budget):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, budget))
|
||||
dist = self.metric(((i + 1) * mu - torch.sum(matrix[select_result], dim=0)).view(1, -1),
|
||||
matrix[~select_result])
|
||||
p = torch.argmax(dist).item()
|
||||
p = indices[~select_result][p]
|
||||
select_result[p] = True
|
||||
if index is None:
|
||||
index = indices
|
||||
return index[select_result]
|
||||
|
||||
def finish_run(self):
|
||||
if isinstance(self.model, MyDataParallel):
|
||||
self.model = self.model.module
|
||||
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int32)
|
||||
for c in range(self.num_classes):
|
||||
class_index = np.arange(self.n_train)[self.dst_train_label == c]
|
||||
selection_result = np.append(selection_result, self.herding(self.construct_matrix(class_index),
|
||||
budget=round(self.fraction * len(class_index)), index=class_index))
|
||||
else:
|
||||
selection_result = self.herding(self.construct_matrix(), budget=self.coreset_size)
|
||||
return {"indices": selection_result}
|
||||
|
||||
def select(self, **kwargs):
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import torch
|
||||
import numpy as np
|
||||
from .methods_utils import euclidean_dist
|
||||
from ..nets.nets_utils import MyDataParallel
|
||||
|
||||
|
||||
def k_center_greedy(matrix, budget: int, metric, device, random_seed=None, index=None, already_selected=None,
|
||||
print_freq: int = 20):
|
||||
if type(matrix) == torch.Tensor:
|
||||
assert matrix.dim() == 2
|
||||
elif type(matrix) == np.ndarray:
|
||||
assert matrix.ndim == 2
|
||||
matrix = torch.from_numpy(matrix).requires_grad_(False).to(device)
|
||||
|
||||
sample_num = matrix.shape[0]
|
||||
assert sample_num >= 1
|
||||
|
||||
if budget < 0:
|
||||
raise ValueError("Illegal budget size.")
|
||||
elif budget > sample_num:
|
||||
budget = sample_num
|
||||
|
||||
if index is not None:
|
||||
assert matrix.shape[0] == len(index)
|
||||
else:
|
||||
index = np.arange(sample_num)
|
||||
|
||||
assert callable(metric)
|
||||
|
||||
already_selected = np.array(already_selected)
|
||||
|
||||
with torch.no_grad():
|
||||
np.random.seed(random_seed)
|
||||
if already_selected.__len__() == 0:
|
||||
select_result = np.zeros(sample_num, dtype=bool)
|
||||
# Randomly select one initial point.
|
||||
already_selected = [np.random.randint(0, sample_num)]
|
||||
budget -= 1
|
||||
select_result[already_selected] = True
|
||||
else:
|
||||
select_result = np.in1d(index, already_selected)
|
||||
|
||||
num_of_already_selected = np.sum(select_result)
|
||||
|
||||
# Initialize a (num_of_already_selected+budget-1)*sample_num matrix storing distances of pool points from
|
||||
# each clustering center.
|
||||
dis_matrix = -1 * torch.ones([num_of_already_selected + budget - 1, sample_num], requires_grad=False).to(device)
|
||||
|
||||
dis_matrix[:num_of_already_selected, ~select_result] = metric(matrix[select_result], matrix[~select_result])
|
||||
|
||||
mins = torch.min(dis_matrix[:num_of_already_selected, :], dim=0).values
|
||||
|
||||
for i in range(budget):
|
||||
if i % print_freq == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, budget))
|
||||
p = torch.argmax(mins).item()
|
||||
select_result[p] = True
|
||||
|
||||
if i == budget - 1:
|
||||
break
|
||||
mins[p] = -1
|
||||
dis_matrix[num_of_already_selected + i, ~select_result] = metric(matrix[[p]], matrix[~select_result])
|
||||
mins = torch.min(mins, dis_matrix[num_of_already_selected + i])
|
||||
return index[select_result]
|
||||
|
||||
|
||||
class kCenterGreedy(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=0,
|
||||
specific_model="ResNet18", balance: bool = False, already_selected=[], metric="euclidean",
|
||||
torchvision_pretrain: bool = True, **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model,
|
||||
torchvision_pretrain=torchvision_pretrain, **kwargs)
|
||||
|
||||
if already_selected.__len__() != 0:
|
||||
if min(already_selected) < 0 or max(already_selected) >= self.n_train:
|
||||
raise ValueError("List of already selected points out of the boundary.")
|
||||
self.already_selected = np.array(already_selected)
|
||||
|
||||
self.min_distances = None
|
||||
|
||||
if metric == "euclidean":
|
||||
self.metric = euclidean_dist
|
||||
elif callable(metric):
|
||||
self.metric = metric
|
||||
else:
|
||||
self.metric = euclidean_dist
|
||||
self.run = lambda : self.finish_run()
|
||||
def _construct_matrix(index=None):
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
|
||||
batch_size=self.n_train if index is None else len(index),
|
||||
num_workers=self.args.workers)
|
||||
inputs, _ = next(iter(data_loader))
|
||||
return inputs.flatten(1).requires_grad_(False).to(self.args.device)
|
||||
self.construct_matrix = _construct_matrix
|
||||
|
||||
self.balance = balance
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
if batch_idx % self.args.print_freq == 0:
|
||||
print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
|
||||
epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
|
||||
|
||||
def old_construct_matrix(self, index=None):
|
||||
self.model.eval()
|
||||
self.model.no_grad = True
|
||||
with torch.no_grad():
|
||||
with self.model.embedding_recorder:
|
||||
sample_num = self.n_train if index is None else len(index)
|
||||
matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).to(self.args.device)
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
|
||||
torch.utils.data.Subset(self.dst_train, index),
|
||||
batch_size=self.args.selection_batch,
|
||||
num_workers=self.args.workers)
|
||||
|
||||
for i, (inputs, _) in enumerate(data_loader):
|
||||
self.model(inputs.to(self.args.device))
|
||||
matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch,
|
||||
sample_num)] = self.model.embedding_recorder.embedding
|
||||
|
||||
self.model.no_grad = False
|
||||
return matrix
|
||||
|
||||
def construct_matrix(self, index=None):
|
||||
self.model.eval()
|
||||
self.model.no_grad = True
|
||||
with torch.no_grad():
|
||||
with self.model.embedding_recorder:
|
||||
sample_num = self.n_train if index is None else len(index)
|
||||
matrix = []
|
||||
|
||||
data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
|
||||
torch.utils.data.Subset(self.dst_train, index),
|
||||
batch_size=self.args.selection_batch,
|
||||
num_workers=self.args.workers)
|
||||
|
||||
for i, (inputs, _) in enumerate(data_loader):
|
||||
self.model(inputs.to(self.args.device))
|
||||
matrix.append(self.model.embedding_recorder.embedding)
|
||||
|
||||
self.model.no_grad = False
|
||||
return torch.cat(matrix, dim=0)
|
||||
|
||||
def before_run(self):
|
||||
self.emb_dim = self.model.get_last_layer().in_features
|
||||
|
||||
def finish_run(self):
|
||||
if isinstance(self.model, MyDataParallel):
|
||||
self.model = self.model.module
|
||||
|
||||
def select(self, **kwargs):
|
||||
self.run()
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int32)
|
||||
for c in range(self.args.num_classes):
|
||||
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
|
||||
|
||||
selection_result = np.append(selection_result, k_center_greedy(self.construct_matrix(class_index),
|
||||
budget=round(
|
||||
self.fraction * len(class_index)),
|
||||
metric=self.metric,
|
||||
device=self.args.device,
|
||||
random_seed=self.random_seed,
|
||||
index=class_index,
|
||||
already_selected=self.already_selected[
|
||||
np.in1d(self.already_selected,
|
||||
class_index)],
|
||||
print_freq=self.args.print_freq))
|
||||
else:
|
||||
matrix = self.construct_matrix()
|
||||
del self.model_optimizer
|
||||
del self.model
|
||||
selection_result = k_center_greedy(matrix, budget=self.coreset_size,
|
||||
metric=self.metric, device=self.args.device,
|
||||
random_seed=self.random_seed,
|
||||
already_selected=self.already_selected, print_freq=self.args.print_freq)
|
||||
return {"indices": selection_result}
|
||||
@@ -0,0 +1,4 @@
|
||||
from .euclidean import *
|
||||
from .cossim import *
|
||||
from .submodular_function import *
|
||||
from .submodular_optimizer import *
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,35 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
|
||||
def cossim_np(v1, v2):
|
||||
# return cossim(torch.tensor(v1),torch.tensor(v2)).cpu().numpy()
|
||||
num = np.dot(v1, v2.T)
|
||||
denom = np.linalg.norm(v1, axis=1).reshape(-1, 1) * np.linalg.norm(v2, axis=1)
|
||||
res = num / (denom + 1e-6)
|
||||
res[np.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
def cossim_pair_np(v1):
|
||||
num = np.dot(v1, v1.T)
|
||||
norm = np.linalg.norm(v1, axis=1)
|
||||
denom = norm.reshape(-1, 1) * norm
|
||||
res = num / (denom + 1e-6)
|
||||
res[np.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
def cossim(v1, v2):
|
||||
num = torch.matmul(v1, v2.T)
|
||||
denom = torch.norm(v1, dim=1).view(-1, 1) * torch.norm(v2, dim=1)
|
||||
res = num / (denom + 1e-6)
|
||||
res[torch.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
def cossim_pair(v1):
|
||||
num = torch.matmul(v1, v1.T)
|
||||
norm = torch.norm(v1, dim=1)
|
||||
denom = norm.view(-1, 1) * norm
|
||||
res = num / (denom + 1e-6)
|
||||
res[torch.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
|
||||
def euclidean_dist(x, y):
|
||||
m, n = x.size(0), y.size(0)
|
||||
xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
|
||||
yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
|
||||
dist = xx + yy
|
||||
dist.addmm_(1, -2, x, y.t())
|
||||
dist = dist.clamp(min=1e-12).sqrt()
|
||||
return dist
|
||||
|
||||
|
||||
def euclidean_dist_pair(x):
|
||||
m = x.size(0)
|
||||
xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, m)
|
||||
dist = xx + xx.t()
|
||||
dist.addmm_(1, -2, x, x.t())
|
||||
dist = dist.clamp(min=1e-12).sqrt()
|
||||
return dist
|
||||
|
||||
def euclidean_dist_np(x, y):
|
||||
(rowx, colx) = x.shape
|
||||
(rowy, coly) = y.shape
|
||||
xy = np.dot(x, y.T)
|
||||
x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowy, axis=1)
|
||||
y2 = np.repeat(np.reshape(np.sum(np.multiply(y, y), axis=1), (rowy, 1)), repeats=rowx, axis=1).T
|
||||
return np.sqrt(np.clip(x2 + y2 - 2. * xy, 1e-12, None))
|
||||
|
||||
#calculate the euclidean distance of each sample in x, return a N*N matrix, whose diag is zero
|
||||
def euclidean_dist_pair_np(x):
|
||||
(rowx, colx) = x.shape
|
||||
xy = np.dot(x, x.T)
|
||||
x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowx, axis=1)
|
||||
return np.sqrt(np.clip(x2 + x2.T - 2. * xy, 1e-12, None))
|
||||
@@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class SubmodularFunction(object):
|
||||
def __init__(self, index, similarity_kernel=None, similarity_matrix=None, already_selected=[]):
|
||||
self.index = index
|
||||
self.n = len(index)
|
||||
|
||||
self.already_selected = already_selected
|
||||
|
||||
assert similarity_kernel is not None or similarity_matrix is not None
|
||||
|
||||
# For the sample similarity matrix, the method supports two input modes, one is to input a pairwise similarity
|
||||
# matrix for the whole sample, and the other case allows the input of a similarity kernel to be used to
|
||||
# calculate similarities incrementally at a later time if required.
|
||||
if similarity_kernel is not None:
|
||||
assert callable(similarity_kernel)
|
||||
self.similarity_kernel = self._similarity_kernel(similarity_kernel)
|
||||
else:
|
||||
assert similarity_matrix.shape[0] == self.n and similarity_matrix.shape[1] == self.n
|
||||
self.similarity_matrix = similarity_matrix
|
||||
self.similarity_kernel = lambda a, b: self.similarity_matrix[np.ix_(a, b)]
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
return similarity_kernel
|
||||
|
||||
|
||||
class FacilityLocation(SubmodularFunction):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if self.already_selected.__len__()==0:
|
||||
self.cur_max = np.zeros(self.n, dtype=np.float32)
|
||||
else:
|
||||
self.cur_max = np.max(self.similarity_kernel(np.arange(self.n), self.already_selected), axis=1)
|
||||
|
||||
self.all_idx = np.ones(self.n, dtype=bool)
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
# Initialize a matrix to store similarity values of sample points.
|
||||
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
|
||||
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
|
||||
|
||||
def _func(a, b):
|
||||
if not np.all(self.if_columns_calculated[b]):
|
||||
if b.dtype != bool:
|
||||
temp = ~self.all_idx
|
||||
temp[b] = True
|
||||
b = temp
|
||||
not_calculated = b & ~self.if_columns_calculated
|
||||
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
|
||||
self.if_columns_calculated[not_calculated] = True
|
||||
return self.sim_matrix[np.ix_(a, b)]
|
||||
return _func
|
||||
|
||||
def calc_gain(self, idx_gain, selected, **kwargs):
|
||||
gains = np.maximum(0., self.similarity_kernel(self.all_idx, idx_gain) - self.cur_max.reshape(-1, 1)).sum(axis=0)
|
||||
return gains
|
||||
|
||||
def calc_gain_batch(self, idx_gain, selected, **kwargs):
|
||||
batch_idx = ~self.all_idx
|
||||
batch_idx[0:kwargs["batch"]] = True
|
||||
gains = np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1, 1)).sum(axis=0)
|
||||
for i in range(kwargs["batch"], self.n, kwargs["batch"]):
|
||||
batch_idx = ~self.all_idx
|
||||
batch_idx[i * kwargs["batch"]:(i + 1) * kwargs["batch"]] = True
|
||||
gains += np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1,1)).sum(axis=0)
|
||||
return gains
|
||||
|
||||
def update_state(self, new_selection, total_selected, **kwargs):
|
||||
self.cur_max = np.maximum(self.cur_max, np.max(self.similarity_kernel(self.all_idx, new_selection), axis=1))
|
||||
#self.cur_max = np.max(np.append(self.cur_max.reshape(-1, 1), self.similarity_kernel(self.all_idx, new_selection), axis=1), axis=1)
|
||||
|
||||
|
||||
class GraphCut(SubmodularFunction):
|
||||
def __init__(self, lam: float = 1., **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.lam = lam
|
||||
|
||||
if 'similarity_matrix' in kwargs:
|
||||
self.sim_matrix_cols_sum = np.sum(self.similarity_matrix, axis=0)
|
||||
self.all_idx = np.ones(self.n, dtype=bool)
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
# Initialize a matrix to store similarity values of sample points.
|
||||
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
|
||||
self.sim_matrix_cols_sum = np.zeros(self.n, dtype=np.float32)
|
||||
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
|
||||
|
||||
def _func(a, b):
|
||||
if not np.all(self.if_columns_calculated[b]):
|
||||
if b.dtype != bool:
|
||||
temp = ~self.all_idx
|
||||
temp[b] = True
|
||||
b = temp
|
||||
not_calculated = b & ~self.if_columns_calculated
|
||||
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
|
||||
self.sim_matrix_cols_sum[not_calculated] = np.sum(self.sim_matrix[:, not_calculated], axis=0)
|
||||
self.if_columns_calculated[not_calculated] = True
|
||||
return self.sim_matrix[np.ix_(a, b)]
|
||||
return _func
|
||||
|
||||
def calc_gain(self, idx_gain, selected, **kwargs):
|
||||
# Conditional gain
|
||||
# return the sum distance of each unselected sample to the any other one (selected, idx_gain) is for fun. _func()
|
||||
gain = -2. * np.sum(self.similarity_kernel(selected, idx_gain), axis=0) + self.lam * self.sim_matrix_cols_sum[idx_gain]
|
||||
|
||||
return gain
|
||||
|
||||
def update_state(self, new_selection, total_selected, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
class LogDeterminant(SubmodularFunction):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.all_idx = np.ones(self.n, dtype=bool)
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
# Initialize a matrix to store similarity values of sample points.
|
||||
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
|
||||
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
|
||||
|
||||
def _func(a, b):
|
||||
if not np.all(self.if_columns_calculated[b]):
|
||||
if b.dtype != bool:
|
||||
temp = ~self.all_idx
|
||||
temp[b] = True
|
||||
b = temp
|
||||
not_calculated = b & ~self.if_columns_calculated
|
||||
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
|
||||
self.if_columns_calculated[not_calculated] = True
|
||||
return self.sim_matrix[np.ix_(a, b)]
|
||||
return _func
|
||||
|
||||
def calc_gain(self, idx_gain, selected, **kwargs):
|
||||
# Gain for LogDeterminant can be written as $f(x | A ) = \log\det(S_{a} - S_{a,A}S_{A}^{-1}S_{x,A}^T)$.
|
||||
sim_idx_gain = self.similarity_kernel(selected, idx_gain).T
|
||||
sim_selected = self.similarity_kernel(selected, selected)
|
||||
return (np.dot(sim_idx_gain, np.linalg.pinv(sim_selected)) * sim_idx_gain).sum(-1)
|
||||
|
||||
def update_state(self, new_selection, total_selected, **kwargs):
|
||||
pass
|
||||
@@ -0,0 +1,155 @@
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
optimizer_choices = ["NaiveGreedy", "LazyGreedy", "StochasticGreedy", "ApproximateLazyGreedy"]
|
||||
|
||||
class optimizer(object):
|
||||
def __init__(self, args, index, budget:int, already_selected=[]):
|
||||
self.args = args
|
||||
self.index = index
|
||||
|
||||
if budget <= 0 or budget > index.__len__():
|
||||
raise ValueError("Illegal budget for optimizer.")
|
||||
|
||||
self.n = len(index)
|
||||
self.budget = budget
|
||||
self.already_selected = already_selected
|
||||
|
||||
|
||||
class NaiveGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[]):
|
||||
super(NaiveGreedy, self).__init__(args, index, budget, already_selected)
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
for i in range(sum(selected), self.budget):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
|
||||
current_selection = greedy_gain.argmax()
|
||||
selected[current_selection] = True
|
||||
greedy_gain[current_selection] = -np.inf
|
||||
if update_state is not None:
|
||||
update_state(np.array([current_selection]), selected, **kwargs)
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
class LazyGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[]):
|
||||
super(LazyGreedy, self).__init__(args, index, budget, already_selected)
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
|
||||
greedy_gain[selected] = -np.inf
|
||||
|
||||
for i in tqdm(range(sum(selected), self.budget)):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
best_gain = -np.inf
|
||||
last_max_element = -1
|
||||
while True:
|
||||
cur_max_element = greedy_gain.argmax()
|
||||
if last_max_element == cur_max_element:
|
||||
# Select cur_max_element into the current subset
|
||||
selected[cur_max_element] = True
|
||||
greedy_gain[cur_max_element] = -np.inf
|
||||
|
||||
if update_state is not None:
|
||||
update_state(np.array([cur_max_element]), selected, **kwargs)
|
||||
break
|
||||
new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
|
||||
greedy_gain[cur_max_element] = new_gain
|
||||
if new_gain >= best_gain:
|
||||
best_gain = new_gain
|
||||
last_max_element = cur_max_element
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
class StochasticGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[], epsilon: float=0.9):
|
||||
super(StochasticGreedy, self).__init__(args, index, budget, already_selected)
|
||||
self.epsilon = epsilon
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
sample_size = max(round(-np.log(self.epsilon) * self.n / self.budget), 1)
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
all_idx = np.arange(self.n)
|
||||
for i in range(sum(selected), self.budget):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
|
||||
# Uniformly select a subset from unselected samples with size sample_size
|
||||
subset = np.random.choice(all_idx[~selected], replace=False, size=min(sample_size, self.n - i))
|
||||
|
||||
if subset.__len__() == 0:
|
||||
break
|
||||
|
||||
greedy_gain[subset] = gain_function(subset, selected, **kwargs)
|
||||
current_selection = greedy_gain[subset].argmax()
|
||||
selected[subset[current_selection]] = True
|
||||
greedy_gain[subset[current_selection]] = -np.inf
|
||||
if update_state is not None:
|
||||
update_state(np.array([subset[current_selection]]), selected, **kwargs)
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
class ApproximateLazyGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[], beta: float=0.9):
|
||||
super(ApproximateLazyGreedy, self).__init__(args, index, budget, already_selected)
|
||||
self.beta = beta
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
|
||||
greedy_gain[selected] = -np.inf
|
||||
|
||||
for i in range(sum(selected), self.budget):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
while True:
|
||||
cur_max_element = greedy_gain.argmax()
|
||||
max_gain = greedy_gain[cur_max_element]
|
||||
|
||||
new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
|
||||
|
||||
if new_gain >= self.beta * max_gain:
|
||||
# Select cur_max_element into the current subset
|
||||
selected[cur_max_element] = True
|
||||
greedy_gain[cur_max_element] = -np.inf
|
||||
|
||||
if update_state is not None:
|
||||
update_state(np.array([cur_max_element]), selected, **kwargs)
|
||||
break
|
||||
else:
|
||||
greedy_gain[cur_max_element] = new_gain
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import numpy as np
|
||||
import torch
|
||||
from .methods_utils import cossim_np, submodular_function, submodular_optimizer
|
||||
from ..nets.nets_utils import MyDataParallel
|
||||
|
||||
|
||||
class Submodular(EarlyTrain):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True,
|
||||
function="GraphCut", greedy="LazyGreedy", metric="cossim", **kwargs):
|
||||
super(Submodular, self).__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
|
||||
|
||||
if greedy not in submodular_optimizer.optimizer_choices:
|
||||
raise ModuleNotFoundError("Greedy optimizer not found.")
|
||||
print(f"The Submodular Method is {function}")
|
||||
self._greedy = greedy
|
||||
self._metric = metric
|
||||
self._function = function
|
||||
|
||||
self.balance = balance
|
||||
|
||||
def before_train(self):
|
||||
pass
|
||||
|
||||
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
|
||||
pass
|
||||
|
||||
def before_epoch(self):
|
||||
pass
|
||||
|
||||
def after_epoch(self):
|
||||
pass
|
||||
|
||||
def before_run(self):
|
||||
pass
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
|
||||
def calc_gradient(self, index=None):
|
||||
'''
|
||||
Calculate gradients matrix on current network for specified training dataset.
|
||||
'''
|
||||
self.model.eval()
|
||||
data_loader = self.select_dm(self.dst_train, index, is_train=False)
|
||||
# Initialize a matrix to save gradients.
|
||||
# (on cpu)
|
||||
gradients = []
|
||||
|
||||
for i, batch in enumerate(data_loader):
|
||||
|
||||
self.optim.zero_grad()
|
||||
image, label = batch['img'].cuda(), batch['label'].cuda()
|
||||
bs_size = image.shape[0]
|
||||
loss,visual_embedding,logit = self.model(image,label,cal_gradient=True)
|
||||
embed_dim = visual_embedding.shape[-1]
|
||||
with torch.no_grad():
|
||||
bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
|
||||
weight_parameters_grads = visual_embedding.view(bs_size, 1,
|
||||
-1).repeat(1, self.num_classes, 1) *\
|
||||
bias_parameters_grads.view(bs_size, self.num_classes,
|
||||
1).repeat(1, 1, embed_dim)
|
||||
gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
|
||||
dim=1).cpu().numpy())
|
||||
|
||||
gradients = np.concatenate(gradients, axis=0,dtype=np.float32)
|
||||
print('Finish Gradient Calculation')
|
||||
return gradients
|
||||
|
||||
def finish_run(self):
|
||||
if isinstance(self.model, MyDataParallel):
|
||||
self.model = self.model.module
|
||||
|
||||
# Turn on the embedding recorder and the no_grad flag
|
||||
|
||||
self.model.no_grad = True
|
||||
self.train_indx = np.arange(self.n_train)
|
||||
|
||||
gradients = self.calc_gradient(index=None)
|
||||
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int64)
|
||||
for c in range(self.num_classes):
|
||||
print(f'class {c}')
|
||||
c_indx = self.train_indx[self.dst_train_label == c]
|
||||
# Calculate gradients into a matrix
|
||||
c_gradients = gradients[c_indx]
|
||||
# Instantiate a submodular function
|
||||
submod_function = submodular_function.__dict__[self._function](index=c_indx,
|
||||
similarity_kernel=lambda a, b:cossim_np(c_gradients[a], c_gradients[b]))
|
||||
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args,
|
||||
index=c_indx, budget=round(self.fraction * len(c_indx)), already_selected=[])
|
||||
|
||||
c_selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
|
||||
update_state=submod_function.update_state)
|
||||
selection_result = np.append(selection_result, c_selection_result)
|
||||
else:
|
||||
# Calculate gradients into a matrix
|
||||
gradients = self.calc_gradient()
|
||||
# Instantiate a submodular function
|
||||
submod_function = submodular_function.__dict__[self._function](index=self.train_indx,
|
||||
similarity_kernel=lambda a, b: cossim_np(gradients[a], gradients[b]))
|
||||
submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=self.train_indx,
|
||||
budget=self.coreset_size)
|
||||
selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
|
||||
update_state=submod_function.update_state)
|
||||
|
||||
self.model.no_grad = False
|
||||
return {"indices": selection_result}
|
||||
|
||||
def select(self, **kwargs):
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
from .earlytrain import EarlyTrain
|
||||
import torch
|
||||
import numpy as np
|
||||
from datasets.data_manager import select_dm_loader
|
||||
import time
|
||||
|
||||
class Uncertainty(EarlyTrain):
|
||||
def __init__(self, dst_train, args,fraction=0.5, random_seed=None, epochs=200, selection_method="Margin",
|
||||
specific_model=None, balance=False, **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
|
||||
|
||||
selection_choices = ["LeastConfidence",
|
||||
"Entropy",
|
||||
"Margin"]
|
||||
if selection_method not in selection_choices:
|
||||
raise NotImplementedError("Selection algorithm unavailable.")
|
||||
self.selection_method = selection_method
|
||||
|
||||
self.epochs = epochs
|
||||
self.balance = balance
|
||||
|
||||
def before_train(self):
|
||||
pass
|
||||
|
||||
def after_loss(self, outputs, loss, targets, batch_inds, epoch):
|
||||
pass
|
||||
|
||||
|
||||
def after_epoch(self):
|
||||
pass
|
||||
|
||||
def before_run(self):
|
||||
pass
|
||||
|
||||
def num_classes_mismatch(self):
|
||||
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
|
||||
|
||||
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
|
||||
pass
|
||||
|
||||
def finish_run(self):
|
||||
if self.balance:
|
||||
selection_result = np.array([], dtype=np.int64)
|
||||
scores = []
|
||||
for c in range(self.num_classes):
|
||||
print(f"Balance Processing on the train set class {c}")
|
||||
class_index = np.arange(self.n_train)[self.dst_train_label == c]
|
||||
scores.append(self.rank_uncertainty_clip(class_index))
|
||||
selection_result = np.append(selection_result, class_index[np.argsort(scores[-1])[
|
||||
:round(len(class_index) * self.fraction)]])
|
||||
else:
|
||||
print(f"Imbalance Processing on the train set class")
|
||||
scores = self.rank_uncertainty_clip()
|
||||
selection_result = np.argsort(scores)[::-1][:self.coreset_size]
|
||||
return {"indices": selection_result, "scores": scores}
|
||||
|
||||
def rank_uncertainty(self,index=None):
|
||||
self.specific_model.eval()
|
||||
with torch.no_grad():
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
|
||||
batch_size=self.args.selection_batch,
|
||||
num_workers=self.args.workers)
|
||||
|
||||
scores = np.array([])
|
||||
batch_num = len(train_loader)
|
||||
|
||||
for i, (input, _) in enumerate(train_loader):
|
||||
if i % self.args.print_freq == 0:
|
||||
print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num))
|
||||
if self.selection_method == "LeastConfidence":
|
||||
scores = np.append(scores, self.model(input.to(self.args.device)).max(axis=1).values.cpu().numpy())
|
||||
elif self.selection_method == "Entropy":
|
||||
preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1).cpu().numpy()
|
||||
scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1))
|
||||
elif self.selection_method == 'Margin':
|
||||
preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1)
|
||||
preds_argmax = torch.argmax(preds, dim=1)
|
||||
max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone()
|
||||
preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0
|
||||
preds_sub_argmax = torch.argmax(preds, dim=1)
|
||||
scores = np.append(scores, (max_preds - preds[
|
||||
torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy())
|
||||
return scores
|
||||
|
||||
|
||||
def rank_uncertainty_clip(self,index=None):
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
train_loader = select_dm_loader(self.args,self.dst_train,index)
|
||||
scores = np.array([])
|
||||
|
||||
for i, batch in enumerate(train_loader):
|
||||
# if i % self.args.print_freq == 0:
|
||||
# print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num))
|
||||
image, label = batch['img'].cuda(), batch['label'].cuda()
|
||||
logits = self.model(image,label) ##Eval mode
|
||||
if self.selection_method == "LeastConfidence":
|
||||
scores = np.append(scores, logits.max(axis=1).values.cpu().numpy())
|
||||
elif self.selection_method == "Entropy":
|
||||
preds = torch.softmax(logits, dim=1).cpu().numpy()
|
||||
scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1))
|
||||
elif self.selection_method == 'Margin':
|
||||
preds = torch.softmax(logits, dim=1)
|
||||
preds_argmax = torch.argmax(preds, dim=1)
|
||||
max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone()
|
||||
preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0
|
||||
preds_sub_argmax = torch.argmax(preds, dim=1)
|
||||
scores = np.append(scores, (max_preds - preds[torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy())
|
||||
self.model.train()
|
||||
return scores
|
||||
|
||||
|
||||
def select(self, **kwargs):
|
||||
selection_result = self.run()
|
||||
return selection_result
|
||||
|
||||
def select_without_train(self):
|
||||
selection_result = self.finish_run()
|
||||
return selection_result
|
||||
@@ -0,0 +1,34 @@
|
||||
import numpy as np
|
||||
from .coresetmethod import CoresetMethod
|
||||
|
||||
|
||||
class Uniform(CoresetMethod):
|
||||
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, balance=True, replace=False, **kwargs):
|
||||
super().__init__(dst_train, args, fraction, random_seed)
|
||||
self.balance = balance
|
||||
self.replace = replace
|
||||
self.n_train = len(self.dst_train)
|
||||
|
||||
def select_balance(self):
|
||||
"""The same sampling proportions were used in each class separately."""
|
||||
np.random.seed(self.random_seed)
|
||||
self.index = np.array([], dtype=np.int64)
|
||||
all_index = np.arange(self.n_train)
|
||||
for c in range(self.num_classes):
|
||||
|
||||
c_index = (self.dst_train_label == c)
|
||||
self.index = np.append(self.index,
|
||||
np.random.choice(all_index[c_index], round(self.fraction * c_index.sum().item()),
|
||||
replace=self.replace))
|
||||
return self.index
|
||||
|
||||
def select_no_balance(self):
|
||||
np.random.seed(self.random_seed)
|
||||
self.index = np.random.choice(np.arange(self.n_train), round(self.n_train * self.fraction),
|
||||
replace=self.replace)
|
||||
|
||||
return self.index
|
||||
|
||||
def select(self, **kwargs):
|
||||
|
||||
return {"indices": self.select_balance() if self.balance else self.select_no_balance()}
|
||||
Reference in New Issue
Block a user