Upload to Main

2025-10-07 22:42:55 +08:00
commit d3ddab7c5d
218 changed files with 125815 additions and 0 deletions
--- a/deepcore/methods/init.py
+++ b/deepcore/methods/init.py
@@ -0,0 +1,17 @@
+from .cal import *
+from .contextualdiversity import *
+from .coresetmethod import *
+from .craig import *
+from .deepfool import *
+from .earlytrain import *
+from .forgetting import *
+from .full import *
+from .glister import *
+from .grand import *
+from .gradmatch import *
+from .herding import *
+from .kcentergreedy import *
+from .submodular import *
+from .uncertainty import *
+from .uniform import *
+
--- a/deepcore/methods/pycache/init.cpython-39.pyc
+++ b/deepcore/methods/pycache/init.cpython-39.pyc
--- a/deepcore/methods/pycache/cal.cpython-39.pyc
+++ b/deepcore/methods/pycache/cal.cpython-39.pyc
--- a/deepcore/methods/pycache/contextualdiversity.cpython-39.pyc
+++ b/deepcore/methods/pycache/contextualdiversity.cpython-39.pyc
--- a/deepcore/methods/pycache/coresetmethod.cpython-39.pyc
+++ b/deepcore/methods/pycache/coresetmethod.cpython-39.pyc
--- a/deepcore/methods/pycache/craig.cpython-39.pyc
+++ b/deepcore/methods/pycache/craig.cpython-39.pyc
--- a/deepcore/methods/pycache/deepfool.cpython-39.pyc
+++ b/deepcore/methods/pycache/deepfool.cpython-39.pyc
--- a/deepcore/methods/pycache/earlytrain.cpython-39.pyc
+++ b/deepcore/methods/pycache/earlytrain.cpython-39.pyc
--- a/deepcore/methods/pycache/forgetting.cpython-39.pyc
+++ b/deepcore/methods/pycache/forgetting.cpython-39.pyc
--- a/deepcore/methods/pycache/full.cpython-39.pyc
+++ b/deepcore/methods/pycache/full.cpython-39.pyc
--- a/deepcore/methods/pycache/glister.cpython-39.pyc
+++ b/deepcore/methods/pycache/glister.cpython-39.pyc
--- a/deepcore/methods/pycache/gradmatch.cpython-39.pyc
+++ b/deepcore/methods/pycache/gradmatch.cpython-39.pyc
--- a/deepcore/methods/pycache/grand.cpython-39.pyc
+++ b/deepcore/methods/pycache/grand.cpython-39.pyc
--- a/deepcore/methods/pycache/herding.cpython-39.pyc
+++ b/deepcore/methods/pycache/herding.cpython-39.pyc
--- a/deepcore/methods/pycache/kcentergreedy.cpython-39.pyc
+++ b/deepcore/methods/pycache/kcentergreedy.cpython-39.pyc
--- a/deepcore/methods/pycache/submodular.cpython-39.pyc
+++ b/deepcore/methods/pycache/submodular.cpython-39.pyc
--- a/deepcore/methods/pycache/uncertainty.cpython-39.pyc
+++ b/deepcore/methods/pycache/uncertainty.cpython-39.pyc
--- a/deepcore/methods/pycache/uniform.cpython-39.pyc
+++ b/deepcore/methods/pycache/uniform.cpython-39.pyc
--- a/deepcore/methods/cal.py
+++ b/deepcore/methods/cal.py
@@ -0,0 +1,146 @@
+from .earlytrain import EarlyTrain
+from .methods_utils.euclidean import euclidean_dist_pair_np
+from .methods_utils.cossim import cossim_pair_np
+import numpy as np
+import torch
+from tqdm import tqdm
+from .. import nets
+from copy import deepcopy
+from torchvision import transforms
+
+
+class Cal(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
+                 balance=False, metric="euclidean", neighbors: int = 10, pretrain_model: str = "ResNet18", **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
+
+        self.balance = balance
+
+        assert neighbors > 0 and neighbors < 100
+        self.neighbors = neighbors
+
+        if metric == "euclidean":
+            self.metric = euclidean_dist_pair_np
+        elif metric == "cossim":
+            self.metric = lambda a, b: -1. * cossim_pair_np(a, b)
+        elif callable(metric):
+            self.metric = metric
+        else:
+            self.metric = euclidean_dist_pair_np
+
+        self.pretrain_model = pretrain_model
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+    #Initial achievement, may not optimal
+    def mixing_feature(self,img_fea,text_fea,lam=0.5):
+        # return img_fea
+        return lam*img_fea + (1-lam)*text_fea
+
+    def find_knn(self):
+        """
+        Find k-nearest-neighbor data points with the pretrained embedding model
+        :return: knn matrix
+        """
+
+        # Initialize pretrained model
+        # model = nets.__dict__[self.pretrain_model](channel=self.args.channel, num_classes=self.args.num_classes,
+        #                                            im_size=(224, 224), record_embedding=True, no_grad=True,
+        #                                            pretrained=True).to(self.args.device)
+        self.model.eval()
+        probs = []
+        # # Resize dst_train to 224*224
+        # if self.args.im_size[0] != 224 or self.args.im_size[1] != 224:
+        #     dst_train = deepcopy(self.dst_train)
+        #     dst_train.transform = transforms.Compose([dst_train.transform, transforms.Resize(224)])
+        # else:
+        #     dst_train = self.dst_train
+
+        # Calculate the distance matrix and return knn results
+        if self.balance:
+            knn = []
+            for c in tqdm(range(self.num_classes)):
+                print(f'Start processing class {c}/{self.num_classes}')
+                class_index = np.arange(self.n_train)[self.dst_train_label == c]
+
+                # Start recording embedding vectors
+                #                batch_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(dst_train, class_index),
+                #                                            batch_size=self.args.selection_batch,
+                #                                            num_workers=self.args.workers)
+                embdeddings = []
+                c_probs = np.zeros([len(class_index), self.num_classes])
+                data_loader = self.select_dm(self.dst_train, class_index, is_train=False)
+                for i, batch in enumerate(data_loader):
+                    image, label = batch['img'].cuda(), batch['label'].cuda()
+                    img_f, text_f,logit = self.model(image, label, record=True)
+                    final_feature = self.mixing_feature(img_f,text_f)
+                    embdeddings.append(final_feature.cpu().numpy())
+                    c_probs[i * self.args.DATASET.SELECTION_BATCH_SIZE:(i + 1) * self.args.DATASET.SELECTION_BATCH_SIZE] = \
+                    torch.softmax(logit, dim=1).detach().cpu()
+
+                embdeddings = np.concatenate(embdeddings, axis=0)
+                probs.append(c_probs)
+                knn.append(np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)])
+            self.probs = np.concatenate(probs,axis=0)
+            return knn
+        else:
+            # Start recording embedding vectors
+            embdeddings = []
+            batch_loader = self.select_dm(self.dst_train, None, is_train=False)
+            print(f'Start processing all class')
+            for i, batch in enumerate(tqdm(batch_loader)):
+                image, label = batch['img'].cuda(), batch['label'].cuda()
+                img_f, text_f,logit = self.model(image, label, record=True)
+                final_feature = self.mixing_feature(img_f, text_f)
+                embdeddings.append(final_feature.cpu().numpy())
+                probs[i * self.args.DATASET.SELECTION_BATCH_SIZE:(i + 1) * self.args.DATASET.SELECTION_BATCH_SIZE] = \
+                    torch.softmax(logit, dim=1).detach().cpu()
+            embdeddings = np.concatenate(embdeddings, axis=0)
+            self.probs = np.concatenate(probs, axis=0)
+            return np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)]
+
+    def calc_kl(self, knn, index=None):
+        self.model.eval()
+        self.model.no_grad = True
+        sample_num = self.n_train if index is None else len(index)
+        # probs = np.zeros([sample_num, self.num_classes])
+        #
+        # batch_loader = torch.utils.data.DataLoader(
+        #     self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
+        #     batch_size=self.args.selection_batch, num_workers=self.args.workers)
+        # batch_num = len(batch_loader)
+        #
+        # for i, (inputs, _) in enumerate(batch_loader):
+        #     probs[i * self.args.selection_batch:(i + 1) * self.args.selection_batch] = torch.nn.functional.softmax(
+        #         self.model(inputs.to(self.args.device)), dim=1).detach().cpu()
+        probs = self.probs[index]
+        s = np.zeros(sample_num)
+        for i in range(0, sample_num, self.args.DATASET.SELECTION_BATCH_SIZE):
+
+            print("| Caculating KL-divergence for batch [%3d/%3d] with batchsize [%3d]" % (i, sample_num, self.args.DATASET.SELECTION_BATCH_SIZE))
+            aa = np.expand_dims(probs[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)], 1).repeat(self.neighbors, 1)
+            bb = probs[knn[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)], :]
+            s[i:(i + self.args.DATASET.SELECTION_BATCH_SIZE)] = np.mean(
+                np.sum(0.5 * aa * np.log(aa / bb) + 0.5 * bb * np.log(bb / aa), axis=2), axis=1)
+        self.model.no_grad = False
+        return s
+
+    def finish_run(self):
+        scores=[]
+        if self.balance:
+            selection_result = np.array([], dtype=np.int32)
+            for c, knn in zip(range(self.num_classes), self.knn):
+                class_index = np.arange(self.n_train)[self.dst_train_label == c]
+                scores.append(self.calc_kl(knn, class_index))
+                selection_result = np.append(selection_result, class_index[np.argsort(
+                    #self.calc_kl(knn, class_index))[::1][:round(self.fraction * len(class_index))]])
+                    scores[-1])[::1][:round(self.fraction * len(class_index))]])
+        else:
+            selection_result = np.argsort(self.calc_kl(self.knn))[::1][:self.coreset_size]
+        return {"indices": selection_result, "scores":scores}
+
+    def select(self, **kwargs):
+        self.knn = self.find_knn()
+        selection_result = self.run()
+        return selection_result
--- a/deepcore/methods/contextualdiversity.py
+++ b/deepcore/methods/contextualdiversity.py
@@ -0,0 +1,33 @@
+from .kcentergreedy import kCenterGreedy
+import torch
+
+
+# Acknowlegement to:
+# https://github.com/sharat29ag/CDAL
+
+
+class ContextualDiversity(kCenterGreedy):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
+                 specific_model=None, balance=True, already_selected=[], torchvision_pretrain: bool = False, **kwargs):
+        super(ContextualDiversity, self).__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, balance=balance, already_selected=already_selected, torchvision_pretrain=torchvision_pretrain, **kwargs)
+        self.metric = self._metric
+
+    def _metric(self, a_output, b_output):
+        with torch.no_grad():
+            # Overload self.metric function for kCenterGreedy Algorithm
+            aa = a_output.view(a_output.shape[0], 1, a_output.shape[1]).repeat(1, b_output.shape[0], 1)
+            bb = b_output.view(1, b_output.shape[0], b_output.shape[1]).repeat(a_output.shape[0], 1, 1)
+            return torch.sum(0.5 * aa * torch.log(aa / bb) + 0.5 * bb * torch.log(bb / aa), dim=2)
+
+    def construct_matrix(self, index=None):
+        self.model.eval()
+        self.model.no_grad = True
+        sample_num = self.n_train if index is None else len(index)
+        matrix = torch.zeros([sample_num, self.args.num_classes], requires_grad=False).to(self.args.device)
+        batch_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
+                            torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch
+                                                   ,num_workers=self.args.workers)
+        for i, (inputs, _) in enumerate(batch_loader):
+            matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = torch.nn.functional.softmax(self.model(inputs.to(self.args.device)), dim=1)
+        self.model.no_grad = False
+        return matrix
--- a/deepcore/methods/coresetmethod.py
+++ b/deepcore/methods/coresetmethod.py
@@ -0,0 +1,49 @@
+import numpy as np
+import os
+
+class CoresetMethod(object):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None,**kwargs):
+        if fraction <= 0.0 or fraction > 1.0:
+            raise ValueError("Illegal Coreset Size.")
+
+        self.dm = dst_train
+        self.dst_train = dst_train.dataset.train_x
+        self.num_classes = dst_train.dataset.num_classes
+        self.fraction = fraction
+        self.random_seed = random_seed
+        self.index = []
+        self.args = args
+        self.dst_train_label = self.get_train_label(self.dst_train)
+        self.n_train = len(self.dst_train)
+        self.coreset_size = round(self.n_train * fraction)
+        self.max_epoch = self.args.OPTIM_SELECTION.MAX_EPOCH
+
+    def select(self, **kwargs):
+        return
+
+    def get_train_label(self,dst_train):
+        ####Readable
+        ind = []
+        for i,item in enumerate(dst_train):
+            ind.append(item.label)
+        return np.asarray(ind)
+    def pre_run(self):
+        self.train_indx = np.arange(self.n_train)
+        print(f'Start pre-funing CLIP with all datasets by {self.max_epoch} epoch')
+        file_save_name = self.args.DATASET.NAME + '_' + str(self.args.SEED) + '.pth'
+        output_checkpoint_dir = os.path.join('checkpoints', file_save_name)
+        if self.max_epoch > 0:
+
+            if os.path.exists(output_checkpoint_dir):
+                print(f'The checkpiont exists! Load that shit')
+                ckpt = torch.load(output_checkpoint_dir)
+                self.model.load_state_dict(ckpt)
+            else:
+                for epoch in range(self.epoch, self.max_epoch):
+                    # list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train),
+                    #                                      self.n_pretrain_size, replace=False)
+                    self.before_epoch()  # PASS
+                    self.train(epoch)
+                    self.test(epoch)
+                    self.after_epoch()
+        torch.save(self.model.state_dict(), output_checkpoint_dir)
--- a/deepcore/methods/craig.py
+++ b/deepcore/methods/craig.py
@@ -0,0 +1,126 @@
+from .earlytrain import EarlyTrain
+import torch
+from .methods_utils import FacilityLocation, submodular_optimizer
+import numpy as np
+from .methods_utils.euclidean import euclidean_dist_pair_np
+from ..nets.nets_utils import MyDataParallel
+from tqdm import tqdm
+
+class Craig(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
+                 balance=True, greedy="LazyGreedy", **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
+
+        if greedy not in submodular_optimizer.optimizer_choices:
+            raise ModuleNotFoundError("Greedy optimizer not found.")
+        self._greedy = greedy
+        self.balance = balance
+
+    def before_train(self):
+        pass
+
+    def after_loss(self, outputs, loss, targets, batch_inds, epoch):
+        pass
+
+    def before_epoch(self):
+        pass
+
+    def after_epoch(self):
+        pass
+
+    def before_run(self):
+        pass
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+    # def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+    #     if batch_idx % self.args.print_freq == 0:
+    #         print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
+    #             epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
+
+    # def calc_gradient(self, index=None):
+    #     self.model.eval()
+    #
+    #     batch_loader = torch.utils.data.DataLoader(
+    #         self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
+    #         batch_size=self.args.selection_batch, num_workers=self.args.workers)
+    #     sample_num = len(self.dst_val.targets) if index is None else len(index)
+    #     self.embedding_dim = self.model.get_last_layer().in_features
+    #
+    #     gradients = []
+    #
+    #     for i, (input, targets) in enumerate(batch_loader):
+    #         self.model_optimizer.zero_grad()
+    #         outputs = self.model(input.to(self.args.device))
+    #         loss = self.criterion(outputs.requires_grad_(True),
+    #                               targets.to(self.args.device)).sum()
+    #         batch_num = targets.shape[0]
+    #         with torch.no_grad():
+    #             bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
+    #             weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1,
+    #                                                                                    self.embedding_dim).repeat(1,
+    #                                                                                                               self.args.num_classes,
+    #                                                                                                               1) * bias_parameters_grads.view(
+    #                 batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
+    #             gradients.append(
+    #                 torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu().numpy())
+    #
+    #     gradients = np.concatenate(gradients, axis=0)
+    #
+    #     self.model.train()
+    #     return euclidean_dist_pair_np(gradients)
+
+    def calc_weights(self, matrix, result):
+        min_sample = np.argmax(matrix[result], axis=0)
+        weights = np.ones(np.sum(result) if result.dtype == bool else len(result))
+        for i in min_sample:
+            weights[i] = weights[i] + 1
+        return weights
+
+    def finish_run(self):
+        if isinstance(self.model, MyDataParallel):
+            self.model = self.model.module
+
+        self.model.no_grad = True
+        grad = self.calc_gradient()
+        grad_matrix = euclidean_dist_pair_np(grad)
+        # with self.model.embedding_recorder:
+        if self.balance:
+
+            # Do selection by class
+            selection_result = np.array([], dtype=np.int32)
+            weights = np.array([])
+            for c in tqdm(range(self.num_classes)):
+                class_index = np.arange(self.n_train)[self.dst_train_label == c]
+                matrix = -1. * grad_matrix[class_index[:,None],class_index]  # Change to column index
+                # matrix = -1. * self.calc_gradient(class_index)
+                matrix -= np.min(matrix) - 1e-3  #The least is zero
+                submod_function = FacilityLocation(index=class_index, similarity_matrix=matrix)
+                submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=class_index,
+                                                                               budget=round(self.fraction * len(
+                                                                                   class_index)))
+                class_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
+                                                       update_state=submod_function.update_state)
+                selection_result = np.append(selection_result, class_result)
+                weights = np.append(weights, self.calc_weights(matrix, np.isin(class_index, class_result)))
+        else:
+            matrix = np.zeros([self.n_train, self.n_train])
+            all_index = np.arange(self.n_train)
+            for c in range(self.num_classes):  # Sparse Matrix
+                class_index = np.arange(self.n_train)[self.dst_train_label== c]
+                matrix[np.ix_(class_index, class_index)] = -1. * self.calc_gradient(class_index)
+                matrix[np.ix_(class_index, class_index)] -= np.min(matrix[np.ix_(class_index, class_index)]) - 1e-3
+            submod_function = FacilityLocation(index=all_index, similarity_matrix=matrix)
+            submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=all_index,
+                                                                           budget=self.coreset_size)
+            selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain_batch,
+                                                       update_state=submod_function.update_state,
+                                                       batch=self.args.selection_batch)
+            weights = self.calc_weights(matrix, selection_result)
+        self.model.no_grad = False
+        return {"indices": selection_result, "weights": weights}
+
+    def select(self, **kwargs):
+        selection_result = self.run()
+        return selection_result
--- a/deepcore/methods/deepfool.py
+++ b/deepcore/methods/deepfool.py
@@ -0,0 +1,120 @@
+from .earlytrain import EarlyTrain
+import torch
+import numpy as np
+
+
+class DeepFool(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
+                 specific_model=None, balance: bool = False, max_iter: int = 50, **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
+
+        self.balance = balance
+        self.max_iter = max_iter
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+    def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+        if batch_idx % self.args.print_freq == 0:
+            print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
+                epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
+
+    def finish_run(self):
+        self.model.no_grad = False
+
+        # Create a data loader for self.dst_train with batch size self.args.selection_batch
+        batch_loader = torch.utils.data.DataLoader(self.dst_train, batch_size=self.args.selection_batch
+                                                   , num_workers=self.args.workers)
+
+        r = np.zeros(self.n_train, dtype=np.float32)
+        batch_num = len(batch_loader)
+        for i, (inputs, targets) in enumerate(batch_loader):
+            if i % self.args.print_freq == 0:
+                print('| Selecting Batch [%3d/%3d]' % (i + 1, batch_num))
+            r[(i * self.args.selection_batch):(i * self.args.selection_batch + targets.shape[0])] = self.deep_fool(
+                inputs)
+
+        if self.balance:
+            selection_result = np.array([], dtype=np.int64)
+            for c in range(self.args.num_classes):
+                class_index = np.arange(self.n_train)[self.dst_train.targets == c]
+                selection_result = np.append(selection_result, class_index[
+                    r[class_index].argsort()[:round(len(class_index) * self.fraction)]])
+        else:
+            selection_result = r.argsort()[:self.coreset_size]
+        return {"indices": selection_result, "scores": r}
+
+    def deep_fool(self, inputs):
+        # Here, start running DeepFool algorithm.
+        self.model.eval()
+
+        # Initialize a boolean mask indicating if selection has been stopped at corresponding positions.
+        sample_size = inputs.shape[0]
+        boolean_mask = np.ones(sample_size, dtype=bool)
+        all_idx = np.arange(sample_size)
+
+        # A matrix to store total pertubations.
+        r_tot = np.zeros([sample_size, inputs.shape[1] * inputs.shape[2] * inputs.shape[3]])
+
+        # Set requires_grad for inputs.
+        cur_inputs = inputs.requires_grad_(True).to(self.args.device)
+
+        original_shape = inputs.shape[1:]
+
+        # set requires_grad for all parametres in network as False to accelerate autograd
+        for p in self.model.parameters():
+            p.requires_grad_(False)
+
+        self.model.no_grad = True
+        first_preds = self.model(cur_inputs).argmax(dim=1)
+        self.model.no_grad = False
+
+        for i in range(self.max_iter):
+            f_all = self.model(cur_inputs)
+
+            w_k = []
+            for c in range(self.args.num_classes):
+                w_k.append(torch.autograd.grad(f_all[:, c].sum(), cur_inputs,
+                                               retain_graph=False if c + 1 == self.args.num_classes else True)[
+                               0].flatten(1))
+            w_k = torch.stack(w_k, dim=0)
+            w_k = w_k - w_k[first_preds, boolean_mask[boolean_mask]].unsqueeze(0)
+            w_k_norm = w_k.norm(dim=2)
+
+            w_k_norm[first_preds, boolean_mask[
+                boolean_mask]] = 1.  # Set w_k_norm for preds positions to 1. to avoid division by zero.
+
+            l_all = (f_all - f_all[boolean_mask[boolean_mask], first_preds].unsqueeze(1)).detach().abs() / w_k_norm.T
+            l_all[boolean_mask[
+                      boolean_mask], first_preds] = np.inf  # Set l_k for preds positions to inf, as the argmin for each
+                                                            # row will be calculated soon.
+
+            l_hat = l_all.argmin(dim=1)
+            r_i = l_all[boolean_mask[boolean_mask], l_hat].unsqueeze(1) / w_k_norm[
+                l_hat, boolean_mask[boolean_mask]].T.unsqueeze(1) * w_k[l_hat, boolean_mask[boolean_mask]]
+
+            # Update r_tot values.
+            r_tot[boolean_mask] += r_i.cpu().numpy()
+
+            cur_inputs += r_i.reshape([r_i.shape[0]] + list(original_shape))
+
+            # Re-input the updated sample into the network and get new predictions.
+            self.model.no_grad = True
+            preds = self.model(cur_inputs).argmax(dim=1)
+            self.model.no_grad = False
+
+            # In DeepFool algorithm, the iteration stops when the updated sample produces a different prediction
+            # in the model.
+            index_unfinished = (preds == first_preds)
+            if torch.all(~index_unfinished):
+                break
+
+            cur_inputs = cur_inputs[index_unfinished]
+            first_preds = first_preds[index_unfinished]
+            boolean_mask[all_idx[boolean_mask][~index_unfinished.cpu().numpy()]] = False
+
+        return (r_tot * r_tot).sum(axis=1)
+
+    def select(self, **kwargs):
+        selection_result = self.run()
+        return selection_result
--- a/deepcore/methods/earlytrain.py
+++ b/deepcore/methods/earlytrain.py
@@ -0,0 +1,322 @@
+from .coresetmethod import CoresetMethod
+import torch, time
+from torch import nn
+import numpy as np
+from copy import deepcopy
+from .. import nets
+from torchvision import transforms
+from datasets.data_manager import select_dm_loader
+from dassl.utils import MetricMeter, AverageMeter
+from torch.cuda.amp import GradScaler, autocast
+import datetime
+from tqdm import tqdm
+import os
+
+class EarlyTrain(CoresetMethod):
+    '''
+    Core code for training related to coreset selection methods when pre-training is required.
+    '''
+
+    def __init__(self, dst_train, args,fraction=0.5, random_seed=None, epochs=200, specific_model=None,
+                 torchvision_pretrain: bool = False, dst_pretrain_dict: dict = {}, fraction_pretrain=1., dst_test=None,
+                 **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed)
+        self.epochs = epochs
+        self.n_train = len(self.dst_train)
+        self.coreset_size = round(self.n_train * fraction)
+        self.model = specific_model
+        self.train_loader = self.dm.train_loader_x
+        self.test_loader = self.dm.test_loader
+
+
+        if kwargs:
+            # self.text_feature = kwargs['text_feature']
+            self.optim = kwargs['optim']
+            self.sche = kwargs['schedule']
+            self.scar = kwargs['scar']
+
+
+
+        self.start_epoch = self.epoch = 0
+        self.max_epoch = self.args.OPTIM_SELECTION.MAX_EPOCH
+
+        if fraction_pretrain <= 0. or fraction_pretrain > 1.:
+            raise ValueError("Illegal pretrain fraction value.")
+        self.fraction_pretrain = fraction_pretrain
+
+        if dst_pretrain_dict.__len__() != 0:
+            dict_keys = dst_pretrain_dict.keys()
+            if 'im_size' not in dict_keys or 'channel' not in dict_keys or 'dst_train' not in dict_keys or \
+                    'num_classes' not in dict_keys:
+                raise AttributeError(
+                    'Argument dst_pretrain_dict must contain imszie, channel, dst_train and num_classes.')
+            if dst_pretrain_dict['im_size'][0] != args.im_size[0] or dst_pretrain_dict['im_size'][0] != args.im_size[0]:
+                raise ValueError("im_size of pretrain dataset does not match that of the training dataset.")
+            if dst_pretrain_dict['channel'] != args.channel:
+                raise ValueError("channel of pretrain dataset does not match that of the training dataset.")
+            if dst_pretrain_dict['num_classes'] != args.num_classes:
+                self.num_classes_mismatch()
+
+        self.dst_pretrain_dict = dst_pretrain_dict
+        self.torchvision_pretrain = torchvision_pretrain
+        self.if_dst_pretrain = (len(self.dst_pretrain_dict) != 0)
+
+        if torchvision_pretrain:
+            # Pretrained models in torchvision only accept 224*224 inputs, therefore we resize current
+            # datasets to 224*224.
+            if args.im_size[0] != 224 or args.im_size[1] != 224:
+                self.dst_train = deepcopy(dst_train)
+                self.dst_train.transform = transforms.Compose([self.dst_train.transform, transforms.Resize(224)])
+                if self.if_dst_pretrain:
+                    self.dst_pretrain_dict['dst_train'] = deepcopy(dst_pretrain_dict['dst_train'])
+                    self.dst_pretrain_dict['dst_train'].transform = transforms.Compose(
+                        [self.dst_pretrain_dict['dst_train'].transform, transforms.Resize(224)])
+        if self.if_dst_pretrain:
+            self.n_pretrain = len(self.dst_pretrain_dict['dst_train'])
+        self.n_pretrain_size = round(
+            self.fraction_pretrain * (self.n_pretrain if self.if_dst_pretrain else self.n_train))
+        self.dst_test = dst_test
+
+
+    def train(self, epoch, list_of_train_idx=None, **kwargs):
+        """ Train model for one epoch """
+
+        self.before_train()
+        self.model.train()
+
+        losses = MetricMeter()
+        batch_time = AverageMeter()
+        data_time = AverageMeter()
+
+
+        end = time.time()
+
+        print('\n=> Training Pre-tuning Epoch #%d' % epoch)
+        train_loader = select_dm_loader(self.args,self.dst_train,is_train=True)
+        self.num_batches = len(train_loader)
+
+        # trainset_permutation_inds = np.random.permutation(list_of_train_idx)
+        # batch_sampler = torch.utils.data.BatchSampler(trainset_permutation_inds, batch_size=self.args.selection_batch,
+        #                                               drop_last=False)
+        # trainset_permutation_inds = list(batch_sampler)
+        #
+        # train_loader = torch.utils.data.DataLoader(self.dst_pretrain_dict['dst_train'] if self.if_dst_pretrain
+        #                                            else self.dst_train, shuffle=False, batch_sampler=batch_sampler,
+        #
+        #
+        #                                            num_workers=self.args.workers, pin_memory=True)
+
+        for i, batch in enumerate(train_loader):
+            data_time.update(time.time() - end)
+            image, label,real_ind = batch['img'].cuda(),batch['label'].cuda(),batch['index'].cuda()
+
+            model = self.model
+            optim = self.optim
+            scaler = self.scar
+
+            prec = self.args.TRAINER.MAPLE.PREC
+            if prec == "amp":
+                with autocast():
+                    loss,outputs = model(image, label)
+                optim.zero_grad()
+                scaler.scale(loss).backward()
+                scaler.step(optim)
+                scaler.update()
+            else:
+                loss,outputs = model(image, label)
+                optim.zero_grad()
+                loss.backward()
+                optim.step()
+
+            self.after_loss(outputs, loss, label, real_ind, epoch)
+            self.while_update(outputs, loss, label, epoch, i, self.args.DATALOADER.TRAIN_X.BATCH_SIZE)
+
+            loss_summary = {"loss": loss.item()}
+
+            if (i + 1) == self.num_batches:
+                self.sche.step()
+            batch_time.update(time.time() - end)
+            losses.update(loss_summary)
+
+            meet_freq = (i + 1) % self.args.TRAIN.PRINT_FREQ == 0
+            only_few_batches = self.num_batches < self.args.TRAIN.PRINT_FREQ
+
+            if meet_freq or only_few_batches:
+                nb_remain = 0
+                nb_remain += self.num_batches - i - 1
+                nb_remain += (self.max_epoch - self.epoch - 1) * self.num_batches
+                eta_seconds = batch_time.avg * nb_remain
+                eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+
+                info = []
+                info += [f"epoch [{self.epoch + 1}/{self.max_epoch}]"]
+                info += [f"batch [{i + 1}/{self.num_batches}]"]
+                info += [f"time {batch_time.val:.3f} ({batch_time.avg:.3f})"]
+                info += [f"data {data_time.val:.3f} ({data_time.avg:.3f})"]
+                info += [f"{losses}"]
+                info += [f"lr {optim.param_groups[0]['lr']:.4e}"]
+                info += [f"eta {eta}"]
+                print(" ".join(info))
+
+            # n_iter = self.epoch * self.num_batches + i
+            # for name, meter in losses.meters.items():
+            #     self.write_scalar("train/" + name, meter.avg, n_iter)
+            # self.write_scalar("train/lr", self.get_current_lr(), n_iter)
+
+            end = time.time()
+
+        return self.finish_train()
+
+    def run(self):
+        self.train_indx = np.arange(self.n_train)
+        self.before_run()
+        print(f'Start pre-funing CLIP with all datasets by {self.max_epoch} epoch')
+        file_save_name = self.args.DATASET.NAME + '_' + str(self.args.SEED) + '.pth'
+        output_checkpoint_dir = os.path.join('checkpoints', file_save_name)
+        if self.max_epoch > 0:
+
+            if os.path.exists(output_checkpoint_dir):
+                print(f'The checkpiont exists! Load that shit')
+                ckpt = torch.load(output_checkpoint_dir)
+                self.model.load_state_dict(ckpt)
+            else:
+                for epoch in range(self.epoch,self.max_epoch):
+                    # list_of_train_idx = np.random.choice(np.arange(self.n_pretrain if self.if_dst_pretrain else self.n_train),
+                    #                                      self.n_pretrain_size, replace=False)
+                    self.before_epoch()  #PASS
+                    self.train(epoch)
+                    self.test(epoch)
+                    self.after_epoch()
+        torch.save(self.model.state_dict(),output_checkpoint_dir)
+
+        return self.finish_run()
+
+    def test(self, epoch):
+        self.model.no_grad = True
+        self.model.eval()
+
+
+        correct = 0.
+        total = 0.
+
+        print('\n=> Testing Tuning Epoch #%d' % epoch)
+
+        for batch_idx, batch in enumerate(self.test_loader):
+            image, target = batch['img'].cuda(), batch['label']
+            output = self.model(image, target.cuda())
+
+
+            predicted = torch.max(output.data, 1).indices.cpu()
+            correct += predicted.eq(target).sum().item()
+            total += target.size(0)
+
+            # if batch_idx % self.args.print_freq == 0:
+            #     print('| Test Epoch [%3d/%3d] Iter[%3d/%3d]\t\t Test Acc: %.3f%%' % (
+            #         epoch, self.epochs, batch_idx + 1, (round(len(self.dst_test) * self.args.selection_test_fraction) //
+            #                                             self.args.selection_batch) + 1, loss.item(),
+            #         100. * correct / total))
+        print(f'| Test Epoch {epoch} Test Acc: {100. * correct / total:.3f}%')
+        self.model.no_grad = False
+
+    def num_classes_mismatch(self):
+        pass
+
+    def before_train(self):
+        pass
+
+    def after_loss(self, outputs, loss, targets, batch_inds, epoch):
+        pass
+
+    def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+        pass
+
+    def finish_train(self):
+        pass
+
+    def before_epoch(self):
+        pass
+
+    def after_epoch(self):
+        pass
+
+    def before_run(self):
+        pass
+
+    def finish_run(self):
+        pass
+
+    def select(self, **kwargs):
+        selection_result = self.run()
+        return selection_result
+
+    def select_without_train(self, **kwargs):
+        return self.finish_run()
+
+    @torch.no_grad()
+    def calcluate_clip_probability(self,batch):
+        input = batch["img"].cuda()
+
+        self.specific_model = self.specific_model.cuda()
+        image_features = self.specific_model.encode_image(input)
+        image_features = image_features / image_features.norm(dim=-1, keepdim=True)
+        logit_scale = self.specific_model.logit_scale.exp()
+        return logit_scale * image_features @ self.text_feature.t()
+
+    # using the defined select_dm
+    def select_dm(self,data,ind=None,is_train=None):
+        return select_dm_loader(self.args,data,ind,is_train)
+
+
+    def parse_batch_test(self, batch):
+        input = batch["img"]
+        label = batch["label"]
+
+        input = input.cuda()
+        label = label.cuda()
+
+        return input, label
+
+    def parse_batch_train(self, batch):
+        input = batch["img"].cuda()
+        label = batch["label"].cuda()
+        domain = batch["index"].cuda()
+
+        return input, label, domain
+
+
+
+    def calc_gradient(self, index=None):
+        '''
+        Calculate gradients matrix on current network for specified training dataset.
+        '''
+        self.model.eval()
+        data_loader = self.select_dm(self.dst_train, index, is_train=False)
+        # Initialize a matrix to save gradients.
+        # (on cpu)
+        gradients = []
+        lam = 0.5
+        for i, batch in enumerate(tqdm(data_loader)):
+            self.optim.zero_grad()
+            image, label = batch['img'].cuda(), batch['label'].cuda()
+            bs_size = image.shape[0]
+            loss, visual_embedding, logit= self.model(image, label, cal_gradient=True)
+            embed_dim = visual_embedding.shape[-1]
+            with torch.no_grad():
+                bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
+                weight_parameters_grads = visual_embedding.view(bs_size, 1,
+                                                                -1).repeat(1, self.num_classes, 1) * \
+                                          bias_parameters_grads.view(bs_size, self.num_classes,
+                                                                     1).repeat(1, 1, embed_dim)
+                # weight_parameters_grads_t = text_embedding.view(bs_size, 1,
+                #                                                 -1).repeat(1, self.num_classes, 1) * \
+                #                           bias_parameters_grads.view(bs_size, self.num_classes,
+                #                                                      1).repeat(1, 1, embed_dim)
+                # final_weight = torch.abs(weight_parameters_grads-weight_parameters_grads_t)
+                gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
+                                           dim=1).cpu().numpy())
+
+        gradients = np.concatenate(gradients, axis=0, dtype=np.float32)
+        print('Finish Gradient Calculation')
+        self.model.train()
+        return gradients
+
--- a/deepcore/methods/forgetting.py
+++ b/deepcore/methods/forgetting.py
@@ -0,0 +1,99 @@
+from .earlytrain import EarlyTrain
+import torch, time
+from torch import nn
+import numpy as np
+from datasets.data_manager import select_dm_loader
+
+# Acknowledgement to
+# https://github.com/mtoneva/example_forgetting
+
+class Forgetting(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True,  #default True
+                 dst_test=None, **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model=specific_model,
+                         dst_test=dst_test,**kwargs)
+
+        self.balance = balance
+
+    def get_hms(self, seconds):
+        # Format time for printing purposes
+
+        m, s = divmod(seconds, 60)
+        h, m = divmod(m, 60)
+
+        return h, m, s
+
+    def before_train(self):
+        self.train_loss = 0.
+        self.correct = 0.
+        self.total = 0.
+
+    def after_loss(self, outputs, loss, targets, batch_inds, epoch):
+        with torch.no_grad():
+            _, predicted = torch.max(outputs.data, 1)
+
+            cur_acc = (predicted == targets).clone().detach().requires_grad_(False).type(torch.float32)
+            self.forgetting_events[batch_inds.clone().detach()[(self.last_acc[batch_inds]-cur_acc)>0.01]]+=1.
+            self.last_acc[batch_inds] = cur_acc
+
+    def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+        pass
+        # self.train_loss += loss.item()
+        # self.total += targets.size(0)
+        # _, predicted = torch.max(outputs.data, 1)
+        # self.correct += predicted.eq(targets.data).cpu().sum()
+        #
+        # if batch_idx % self.args.print_freq == 0:
+        #     print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (
+        #     epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item(),
+        #     100. * self.correct.item() / self.total))
+
+
+
+    def after_epoch(self):
+        pass
+        # epoch_time = time.time() - self.start_time
+        # self.elapsed_time += epoch_time
+        # print('| Elapsed time : %d:%02d:%02d' % (self.get_hms(self.elapsed_time)))
+
+    def before_run(self):
+        self.elapsed_time = 0
+        self.forgetting_events = torch.zeros(self.n_train, requires_grad=False).cuda()
+        self.test_initial_acc()
+        # self.last_acc = torch.zeros(self.n_train, requires_grad=False).cuda()
+
+    def test_initial_acc(self):
+        self.model.no_grad = True
+        self.model.eval()
+        self.last_acc = torch.zeros(self.n_train, requires_grad=False).cuda()
+
+        print('\n=> Testing Initial acc for Forgetting')
+        train_loader = select_dm_loader(self.args, self.dst_train)
+        for batch_idx, batch in enumerate(train_loader):
+            image, target,batch_inds = batch['img'].cuda(), batch['label'].cuda(), batch['index'].cuda()
+            output = self.model(image, target)
+            predicted = torch.max(output.data, 1).indices
+
+            cur_acc = (predicted == target).clone().detach().requires_grad_(False).type(torch.float32)
+            self.last_acc[batch_inds] = cur_acc
+
+
+        self.model.no_grad = False
+
+    def finish_run(self):
+        pass
+
+    def select(self, **kwargs):
+        self.run()
+
+        if not self.balance:
+            top_examples = self.train_indx[np.argsort(self.forgetting_events.cpu().numpy())][::-1][:self.coreset_size]
+        else:
+            top_examples = np.array([], dtype=np.int64)
+            for c in range(self.num_classes):
+                c_indx = self.train_indx[self.dst_train_label == c]
+                budget = round(self.fraction * len(c_indx))
+                top_examples = np.append(top_examples,
+                                    c_indx[np.argsort(self.forgetting_events[c_indx].cpu().numpy())[::-1][:budget]])
+
+        return {"indices": top_examples, "scores": self.forgetting_events}
--- a/deepcore/methods/full.py
+++ b/deepcore/methods/full.py
@@ -0,0 +1,10 @@
+import numpy as np
+from .coresetmethod import CoresetMethod
+
+
+class Full(CoresetMethod):
+    def __init__(self, dst_train, args, fraction, random_seed, **kwargs):
+        self.n_train = len(dst_train)
+
+    def select(self, **kwargs):
+        return {"indices": np.arange(self.n_train)}
--- a/deepcore/methods/glister.py
+++ b/deepcore/methods/glister.py
@@ -0,0 +1,210 @@
+from .earlytrain import EarlyTrain
+from .methods_utils import submodular_optimizer
+import torch
+import numpy as np
+from ..nets.nets_utils import MyDataParallel
+from tqdm import tqdm
+
+class Glister(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
+                 balance: bool = True, greedy="StochasticGreedy", eta=None, dst_val=None, **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
+
+        self.balance = balance
+        self.eta = args.OPTIM_SELECTION.LR if eta is None else eta
+        self.dst_val = dst_train.dataset.val
+        self.dst_val_label = self.get_train_label(self.dst_val)
+        self.n_val = len(self.dst_val)
+
+        if greedy not in submodular_optimizer.optimizer_choices:
+            raise ModuleNotFoundError("Greedy optimizer not found.")
+        self._greedy = greedy
+
+    def calc_gradient(self, index=None,val=False):
+        '''
+        Calculate gradients matrix on current network for specified training dataset.
+        '''
+        self.model.eval()
+        if val:
+            val_str = 'Val'
+            data_loader = self.select_dm(self.dst_val, index, is_train=False)
+            # self.init_out = []
+            # self.init_emb = []
+            # self.init_y = []
+        else:
+            val_str = 'Train'
+            data_loader = self.select_dm(self.dst_train, index, is_train=False)
+        # Initialize a matrix to save gradients.
+        # (on cpu)
+        gradients = []
+
+        for i, batch in enumerate(tqdm(data_loader)):
+
+            self.optim.zero_grad()
+            image, label = batch['img'].cuda(), batch['label'].cuda()
+            bs_size = image.shape[0]
+            loss,visual_embedding,logit = self.model(image,label,cal_gradient=True)
+            embed_dim = visual_embedding.shape[-1]
+            with torch.no_grad():
+                bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
+                weight_parameters_grads = visual_embedding.view(bs_size, 1,
+                                        -1).repeat(1, self.num_classes, 1) *\
+                                        bias_parameters_grads.view(bs_size, self.num_classes,
+                                        1).repeat(1, 1, embed_dim)
+                gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
+                                            dim=1).cpu().numpy())
+
+                # if val:
+                #     self.init_out.append(logit.cpu())
+                #     self.init_emb.append(visual_embedding.cpu())
+                #     self.init_y.append(label.cpu())
+
+
+        # if val:
+        #     with torch.no_grad():
+        #         self.init_out = torch.cat(self.init_out,dim=0).numpy().astype(dtype=np.float32)
+        #         self.init_emb = torch.cat(self.init_emb,dim=0).numpy().astype(dtype=np.float32)
+        #         self.init_y = torch.cat(self.init_y,dim=0).numpy().astype(dtype=np.float32)
+
+        gradients = np.concatenate(gradients, axis=0,dtype=np.float32)
+        print(f'Finish Gradient Calculation on {val_str} dataset')
+        return gradients
+
+    # def calc_gradient(self, index=None, val=False, record_val_detail=False):
+    #     '''
+    #     Calculate gradients matrix on current network for training or validation dataset.
+    #     '''
+    #
+    #     self.model.eval()
+    #
+    #     if val:
+    #         batch_loader = torch.utils.data.DataLoader(
+    #             self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index),
+    #             batch_size=self.args.selection_batch, num_workers=self.args.workers)
+    #     else:
+    #         batch_loader = torch.utils.data.DataLoader(
+    #             self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
+    #             batch_size=self.args.selection_batch, num_workers=self.args.workers)
+    #
+    #     self.embedding_dim = self.model.get_last_layer().in_features
+    #     gradients = []
+    #     if val and record_val_detail:
+    #         self.init_out = []
+    #         self.init_emb = []
+    #         self.init_y = []
+    #
+    #     for i, (input, targets) in enumerate(batch_loader):
+    #         self.model_optimizer.zero_grad()
+    #         outputs = self.model(input.to(self.args.device))
+    #         loss = self.criterion(outputs.requires_grad_(True), targets.to(self.args.device)).sum()
+    #         batch_num = targets.shape[0]
+    #         with torch.no_grad():
+    #             bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
+    #             weight_parameters_grads = self.model.embedding_recorder.embedding.view(batch_num, 1,
+    #                                             self.embedding_dim).repeat(1, self.args.num_classes, 1) *\
+    #                                             bias_parameters_grads.view(
+    #                                             batch_num, self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
+    #             gradients.append(torch.cat(
+    #                 [bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu())
+    #
+    #             if val and record_val_detail:
+    #                 self.init_out.append(outputs.cpu())
+    #                 self.init_emb.append(self.model.embedding_recorder.embedding.cpu())
+    #                 self.init_y.append(targets)
+    #
+    #     gradients = torch.cat(gradients, dim=0)
+    #     if val:
+    #         self.val_grads = torch.mean(gradients, dim=0)
+    #         if self.dst_val == self.dst_train:
+    #             # No validation set was provided while instantiating Glister, so self.dst_val == self.dst_train
+    #             self.train_grads = gradients
+    #     else:
+    #         self.train_grads = gradients
+    #     if val and record_val_detail:
+    #         with torch.no_grad():
+    #             self.init_out = torch.cat(self.init_out, dim=0)
+    #             self.init_emb = torch.cat(self.init_emb, dim=0)
+    #             self.init_y = torch.cat(self.init_y)
+    #
+    #     self.model.train()
+
+    #PASS, worth disussion
+    def update_val_gradients(self, new_selection, selected_for_train):
+
+        sum_selected_train_gradients = np.mean(self.train_gradients[selected_for_train], axis=0)
+
+        new_outputs = self.init_out - self.eta * sum_selected_train_gradients[:self.num_classes].reshape(1,
+                      -1).repeat(self.init_out.shape[0], 1) - self.eta * torch.matmul(self.init_emb,
+                      sum_selected_train_gradients[self.num_classes:].view(self.num_classes, -1).T)
+
+        sample_num = new_outputs.shape[0]
+        gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)], requires_grad=False)
+        i = 0
+        while i * self.args.selection_batch < sample_num:
+            batch_indx = np.arange(sample_num)[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch,
+                                                                                 sample_num)]
+            new_out_puts_batch = new_outputs[batch_indx].clone().detach().requires_grad_(True)
+            loss = self.criterion(new_out_puts_batch, self.init_y[batch_indx])
+            batch_num = len(batch_indx)
+            bias_parameters_grads = torch.autograd.grad(loss.sum(), new_out_puts_batch, retain_graph=True)[0]
+
+            weight_parameters_grads = self.init_emb[batch_indx].view(batch_num, 1, self.embedding_dim).repeat(1,
+                                      self.args.num_classes, 1) * bias_parameters_grads.view(batch_num,
+                                      self.args.num_classes, 1).repeat(1, 1, self.embedding_dim)
+            gradients[batch_indx] = torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1).cpu()
+            i += 1
+
+        self.val_grads = torch.mean(gradients, dim=0)
+
+    def finish_run(self):
+        if isinstance(self.model, MyDataParallel):
+            self.model = self.model.module
+
+        self.model.no_grad = True
+
+        self.train_indx = np.arange(self.n_train)
+        self.val_indx = np.arange(self.n_val)
+
+        train_gradients = self.calc_gradient(index=None)
+        val_gradients = self.calc_gradient(index=None,val=True)
+        if self.balance:
+            selection_result = np.array([], dtype=np.int64)
+            #weights = np.array([], dtype=np.float32)
+            for c in range(self.num_classes):
+                c_indx = self.train_indx[self.dst_train_label == c]
+                c_val_inx = self.val_indx[self.dst_val_label == c]
+                self.train_gradients = train_gradients[c_indx]
+                self.val_gradients = val_gradients[c_val_inx].mean(axis=0)
+
+                # self.init_out = self.init_out[c_val_inx]
+                # self.init_emb = self.init_emb[c_val_inx]
+                # self.init_y = self.init_y[c_val_inx]
+
+                submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=c_indx,
+                                                            budget=round(self.fraction * len(c_indx)))
+                #conditioal gain uses taylor series approximation
+                c_selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected,
+                                                             **kwargs: np.dot(self.train_gradients[idx_gain],
+                                                             self.val_gradients.reshape(-1, 1)).
+                                                             flatten(), update_state=None) #self.update val
+                selection_result = np.append(selection_result, c_selection_result)
+
+        else:
+            self.train_gradients = train_gradients
+            self.val_gradients = val_gradients.mean(axis=0)
+            submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args,
+                                  index=np.arange(self.n_train), budget=self.coreset_size)
+            selection_result = submod_optimizer.select(gain_function=lambda idx_gain, selected,
+                                                       **kwargs: torch.matmul(self.train_gradients[idx_gain],
+                                                       self.val_gradients.view(-1, 1)).detach().cpu().numpy().flatten(),
+                                                       upadate_state=self.update_val_gradients)
+
+
+        self.model.no_grad = False
+        return {"indices": selection_result}
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+
+
--- a/deepcore/methods/gradmatch.py
+++ b/deepcore/methods/gradmatch.py
@@ -0,0 +1,213 @@
+import torch
+import numpy as np
+from scipy.linalg import lstsq
+from scipy.optimize import nnls
+from .earlytrain import EarlyTrain
+from ..nets.nets_utils import MyDataParallel
+
+
+# https://github.com/krishnatejakk/GradMatch
+
+class GradMatch(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
+                 balance=True, dst_val=None, lam: float = 1., **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
+        self.balance = balance
+        self.dst_val = dst_val
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+    def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+        if batch_idx % self.args.print_freq == 0:
+            print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
+                epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
+
+    def orthogonal_matching_pursuit(self, A, b, budget: int, lam: float = 1.):
+        '''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit
+        Acknowlegement to:
+        https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py
+        Args:
+          A: design matrix of size (d, n)
+          b: measurement vector of length d
+          budget: selection budget
+          lam: regularization coef. for the final output vector
+        Returns:
+           vector of length n
+        '''
+        with torch.no_grad():
+            d, n = A.shape
+            if budget <= 0:
+                budget = 0
+            elif budget > n:
+                budget = n
+
+            x = np.zeros(n, dtype=np.float32)
+            resid = b.clone()
+            indices = []
+            boolean_mask = torch.ones(n, dtype=bool, device="cuda")
+            all_idx = torch.arange(n, device='cuda')
+
+            for i in range(budget):
+                if i % self.args.print_freq == 0:
+                    print("| Selecting [%3d/%3d]" % (i + 1, budget))
+                projections = torch.matmul(A.T, resid)
+                index = torch.argmax(projections[boolean_mask])
+                index = all_idx[boolean_mask][index]
+
+                indices.append(index.item())
+                boolean_mask[index] = False
+
+                if indices.__len__() == 1:
+                    A_i = A[:, index]
+                    x_i = projections[index] / torch.dot(A_i, A_i).view(-1)
+                    A_i = A[:, index].view(1, -1)
+                else:
+                    A_i = torch.cat((A_i, A[:, index].view(1, -1)), dim=0)
+                    temp = torch.matmul(A_i, torch.transpose(A_i, 0, 1)) + lam * torch.eye(A_i.shape[0], device="cuda")
+                    x_i, _ = torch.lstsq(torch.matmul(A_i, b).view(-1, 1), temp)
+                resid = b - torch.matmul(torch.transpose(A_i, 0, 1), x_i).view(-1)
+            if budget > 1:
+                x_i = nnls(temp.cpu().numpy(), torch.matmul(A_i, b).view(-1).cpu().numpy())[0]
+                x[indices] = x_i
+            elif budget == 1:
+                x[indices[0]] = 1.
+        return x
+
+    def orthogonal_matching_pursuit_np(self, A, b, budget: int, lam: float = 1.):
+        '''approximately solves min_x |x|_0 s.t. Ax=b using Orthogonal Matching Pursuit
+        Acknowlegement to:
+        https://github.com/krishnatejakk/GradMatch/blob/main/GradMatch/selectionstrategies/helpers/omp_solvers.py
+        Args:
+          A: design matrix of size (d, n)
+          b: measurement vector of length d
+          budget: selection budget
+          lam: regularization coef. for the final output vector
+        Returns:
+           vector of length n
+        '''
+        d, n = A.shape
+        if budget <= 0:
+            budget = 0
+        elif budget > n:
+            budget = n
+
+        x = np.zeros(n, dtype=np.float32)
+        resid = np.copy(b)
+        indices = []
+        boolean_mask = np.ones(n, dtype=bool)
+        all_idx = np.arange(n)
+
+        for i in range(budget):
+            if i % self.args.print_freq == 0:
+                print("| Selecting [%3d/%3d]" % (i + 1, budget))
+            projections = A.T.dot(resid)
+            index = np.argmax(projections[boolean_mask])
+            index = all_idx[boolean_mask][index]
+
+            indices.append(index.item())
+            boolean_mask[index] = False
+
+            if indices.__len__() == 1:
+                A_i = A[:, index]
+                x_i = projections[index] / A_i.T.dot(A_i)
+            else:
+                A_i = np.vstack([A_i, A[:, index]])
+                x_i = lstsq(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0]
+            resid = b - A_i.T.dot(x_i)
+        if budget > 1:
+            x_i = nnls(A_i.dot(A_i.T) + lam * np.identity(A_i.shape[0]), A_i.dot(b))[0]
+            x[indices] = x_i
+        elif budget == 1:
+            x[indices[0]] = 1.
+        return x
+
+    def calc_gradient(self, index=None, val=False):
+        self.model.eval()
+        if val:
+            batch_loader = torch.utils.data.DataLoader(
+                self.dst_val if index is None else torch.utils.data.Subset(self.dst_val, index),
+                batch_size=self.args.selection_batch, num_workers=self.args.workers)
+            sample_num = len(self.dst_val.targets) if index is None else len(index)
+        else:
+            batch_loader = torch.utils.data.DataLoader(
+                self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
+                batch_size=self.args.selection_batch, num_workers=self.args.workers)
+            sample_num = self.n_train if index is None else len(index)
+
+        self.embedding_dim = self.model.get_last_layer().in_features
+        gradients = torch.zeros([sample_num, self.args.num_classes * (self.embedding_dim + 1)],
+                                requires_grad=False, device=self.args.device)
+
+        for i, (input, targets) in enumerate(batch_loader):
+            self.model_optimizer.zero_grad()
+            outputs = self.model(input.to(self.args.device)).requires_grad_(True)
+            loss = self.criterion(outputs, targets.to(self.args.device)).sum()
+            batch_num = targets.shape[0]
+            with torch.no_grad():
+                bias_parameters_grads = torch.autograd.grad(loss, outputs, retain_graph=True)[0].cpu()
+                weight_parameters_grads = self.model.embedding_recorder.embedding.cpu().view(batch_num, 1,
+                                                    self.embedding_dim).repeat(1,self.args.num_classes,1) *\
+                                                    bias_parameters_grads.view(batch_num, self.args.num_classes,
+                                                    1).repeat(1, 1, self.embedding_dim)
+                gradients[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] =\
+                    torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)], dim=1)
+
+        return gradients
+
+    def finish_run(self):
+        if isinstance(self.model, MyDataParallel):
+            self.model = self.model.module
+
+        self.model.no_grad = True
+        with self.model.embedding_recorder:
+            if self.dst_val is not None:
+                val_num = len(self.dst_val.targets)
+
+            if self.balance:
+                selection_result = np.array([], dtype=np.int64)
+                weights = np.array([], dtype=np.float32)
+                for c in range(self.args.num_classes):
+                    class_index = np.arange(self.n_train)[self.dst_train.targets == c]
+                    cur_gradients = self.calc_gradient(class_index)
+                    if self.dst_val is not None:
+                        # Also calculate gradients of the validation set.
+                        val_class_index = np.arange(val_num)[self.dst_val.targets == c]
+                        cur_val_gradients = torch.mean(self.calc_gradient(val_class_index, val=True), dim=0)
+                    else:
+                        cur_val_gradients = torch.mean(cur_gradients, dim=0)
+                    if self.args.device == "cpu":
+                        # Compute OMP on numpy
+                        cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T,
+                                                                          cur_val_gradients.numpy(),
+                                                                        budget=round(len(class_index) * self.fraction))
+                    else:
+                        cur_weights = self.orthogonal_matching_pursuit(cur_gradients.to(self.args.device).T,
+                                                                       cur_val_gradients.to(self.args.device),
+                                                                       budget=round(len(class_index) * self.fraction))
+                    selection_result = np.append(selection_result, class_index[np.nonzero(cur_weights)[0]])
+                    weights = np.append(weights, cur_weights[np.nonzero(cur_weights)[0]])
+            else:
+                cur_gradients = self.calc_gradient()
+                if self.dst_val is not None:
+                    # Also calculate gradients of the validation set.
+                    cur_val_gradients = torch.mean(self.calc_gradient(val=True), dim=0)
+                else:
+                    cur_val_gradients = torch.mean(cur_gradients, dim=0)
+                if self.args.device == "cpu":
+                    # Compute OMP on numpy
+                    cur_weights = self.orthogonal_matching_pursuit_np(cur_gradients.numpy().T,
+                                                                      cur_val_gradients.numpy(),
+                                                                      budget=self.coreset_size)
+                else:
+                    cur_weights = self.orthogonal_matching_pursuit(cur_gradients.T, cur_val_gradients,
+                                                                   budget=self.coreset_size)
+                selection_result = np.nonzero(cur_weights)[0]
+                weights = cur_weights[selection_result]
+        self.model.no_grad = False
+        return {"indices": selection_result, "weights": weights}
+
+    def select(self, **kwargs):
+        selection_result = self.run()
+        return selection_result
+
--- a/deepcore/methods/grand.py
+++ b/deepcore/methods/grand.py
@@ -0,0 +1,108 @@
+from .earlytrain import EarlyTrain
+import torch, time
+import numpy as np
+from ..nets.nets_utils import MyDataParallel
+from tqdm import tqdm
+
+class GraNd(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, repeat=1,
+                 specific_model=None, balance=False, **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model,**kwargs)
+        self.epochs = epochs
+        self.n_train = len(self.dst_train)
+        self.coreset_size = round(self.n_train * fraction)
+        self.specific_model = specific_model
+        self.repeat = repeat
+
+        self.balance = balance
+
+    # def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+    #     if batch_idx % self.args.print_freq == 0:
+    #         print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
+    #             epoch, self.epochs, batch_idx + 1, (self.n_train // batch_size) + 1, loss.item()))
+
+    def before_run(self):
+        if isinstance(self.model, MyDataParallel):
+            self.model = self.model.module
+
+    def calc_gradient(self, index=None):
+        '''
+        Calculate gradients matrix on current network for specified training dataset.
+        '''
+        self.model.eval()
+        data_loader = self.select_dm(self.dst_train, index, is_train=False)
+        # Initialize a matrix to save gradients.
+        # (on cpu)
+        gradients = []
+
+        for i, batch in enumerate(tqdm(data_loader)):
+            self.optim.zero_grad()
+            image, label = batch['img'].cuda(), batch['label'].cuda()
+            bs_size = image.shape[0]
+            loss, visual_embedding, logit = self.model(image, label, cal_gradient=True)
+            embed_dim = visual_embedding.shape[-1]
+            with torch.no_grad():
+                bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
+                weight_parameters_grads = visual_embedding.view(bs_size, 1,
+                                                                -1).repeat(1, self.num_classes, 1) * \
+                                          bias_parameters_grads.view(bs_size, self.num_classes,
+                                                                     1).repeat(1, 1, embed_dim)
+                gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
+                                           dim=1).cpu().numpy())
+
+        gradients = np.concatenate(gradients, axis=0, dtype=np.float32)
+        print('Finish Gradient Calculation')
+        self.model.train()
+        return gradients
+
+    def finish_run(self):
+        # self.model.embedding_recorder.record_embedding = True  # recording embedding vector
+
+        gradients = self.calc_gradient()
+        self.norm_matrix[:,0] = np.linalg.norm(gradients,axis=1)
+
+
+
+        # embedding_dim = self.model.get_last_layer().in_features
+        # data_loader = self.select_dm(self.dst_train, None, is_train=False)
+        # sample_num = self.n_train
+        #
+        # for i, batch in enumerate(data_loader):
+        #     self.optim.zero_grad()
+        #     image, target,batch_inds = batch['img'].cuda(), batch['label'].cuda(), batch['index'].cuda()
+        #
+        #     outputs = self.model(image)
+        #     loss = self.criterion(outputs.requires_grad_(True),
+        #                           targets.to(self.args.device)).sum()
+        #     batch_num = targets.shape[0]
+        #     with torch.no_grad():
+        #         bias_parameters_grads = torch.autograd.grad(loss, outputs)[0]
+        #         self.norm_matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num),
+        #         self.cur_repeat] = torch.norm(torch.cat([bias_parameters_grads, (
+        #                 self.model.embedding_recorder.embedding.view(batch_num, 1, embedding_dim).repeat(1,
+        #                                      self.args.num_classes, 1) * bias_parameters_grads.view(
+        #                                      batch_num, self.args.num_classes, 1).repeat(1, 1, embedding_dim)).
+        #                                      view(batch_num, -1)], dim=1), dim=1, p=2)
+        #
+        # self.model.train()
+
+
+    def select(self, **kwargs):
+        # Initialize a matrix to save norms of each sample on idependent runs
+        self.norm_matrix = np.zeros([self.n_train, self.repeat])
+
+        # for self.cur_repeat in range(self.repeat):
+        self.run()
+            # self.random_seed = self.random_seed + 5
+
+        self.norm_mean = np.mean(self.norm_matrix, axis=1)
+        if not self.balance:
+            top_examples = self.train_indx[np.argsort(self.norm_mean)][::-1][:self.coreset_size]
+        else:
+            top_examples = np.array([], dtype=np.int64)
+            for c in tqdm(range(self.num_classes)):
+                c_indx = self.train_indx[self.dst_train_label == c]
+                budget = round(self.fraction * len(c_indx))
+                top_examples = np.append(top_examples, c_indx[np.argsort(self.norm_mean[c_indx])[::-1][:budget]])
+
+        return {"indices": top_examples, "scores": self.norm_mean}
--- a/deepcore/methods/herding.py
+++ b/deepcore/methods/herding.py
@@ -0,0 +1,109 @@
+from .earlytrain import EarlyTrain
+import torch
+import numpy as np
+from .methods_utils import euclidean_dist
+from ..nets.nets_utils import MyDataParallel
+
+
+class Herding(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
+                 specific_model="ResNet18", balance: bool = False, metric="euclidean", **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, **kwargs)
+
+        if metric == "euclidean":
+            self.metric = euclidean_dist
+        elif callable(metric):
+            self.metric = metric
+        else:
+            self.metric = euclidean_dist
+            self.run = lambda: self.finish_run()
+
+            def _construct_matrix(index=None):
+                data_loader = torch.utils.data.DataLoader(
+                    self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
+                    batch_size=self.n_train if index is None else len(index), num_workers=self.args.workers)
+                inputs, _ = next(iter(data_loader))
+                return inputs.flatten(1).requires_grad_(False).to(self.args.device)
+
+            self.construct_matrix = _construct_matrix
+
+        self.balance = balance
+        self.select_bs = self.args.DATASET.SELECTION_BATCH_SIZE
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+    def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+        pass
+
+    #Initial achievement, may not optimal
+    def mixing_feature(self,img_fea,text_fea,lam=0.5):
+        # return img_fea
+        return lam*img_fea + (1-lam)*text_fea
+
+    def construct_matrix(self, index=None):
+        self.model.eval()
+        self.model.no_grad = True
+        with torch.no_grad():
+            # with self.model.embedding_recorder:
+                sample_num = self.n_train if index is None else len(index)
+                matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).cuda()
+                data_loader = self.select_dm(self.dst_train,index,is_train=False)
+                for i, batch in enumerate(data_loader):
+                    image,label = batch['img'].cuda(),batch['label'].cuda()
+                    img_f,text_f,_ = self.model(image, label, record=True)
+                    final_embed = self.mixing_feature(img_f,text_f)  #Using the mixed image_feature and text_feature
+                    matrix[i * self.select_bs:min((i + 1) * self.select_bs, sample_num)] = final_embed
+
+        self.model.no_grad = False
+        self.model.train()
+        return matrix
+
+    def before_run(self):
+        self.emb_dim = self.model.image_encoder.output_dim
+
+    def herding(self, matrix, budget: int, index=None):
+
+        sample_num = matrix.shape[0]
+
+        if budget < 0:
+            raise ValueError("Illegal budget size.")
+        elif budget > sample_num:
+            budget = sample_num
+
+        indices = np.arange(sample_num)
+        with torch.no_grad():
+            mu = torch.mean(matrix, dim=0)
+            select_result = np.zeros(sample_num, dtype=bool)
+
+            for i in range(budget):
+                if i % self.args.TRAIN.PRINT_FREQ == 0:
+                    print("| Selecting [%3d/%3d]" % (i + 1, budget))
+                dist = self.metric(((i + 1) * mu - torch.sum(matrix[select_result], dim=0)).view(1, -1),
+                                   matrix[~select_result])
+                p = torch.argmax(dist).item()
+                p = indices[~select_result][p]
+                select_result[p] = True
+        if index is None:
+            index = indices
+        return index[select_result]
+
+    def finish_run(self):
+        if isinstance(self.model, MyDataParallel):
+            self.model = self.model.module
+
+        if self.balance:
+            selection_result = np.array([], dtype=np.int32)
+            for c in range(self.num_classes):
+                class_index = np.arange(self.n_train)[self.dst_train_label == c]
+                selection_result = np.append(selection_result, self.herding(self.construct_matrix(class_index),
+                        budget=round(self.fraction * len(class_index)), index=class_index))
+        else:
+            selection_result = self.herding(self.construct_matrix(), budget=self.coreset_size)
+        return {"indices": selection_result}
+
+    def select(self, **kwargs):
+        selection_result = self.run()
+        return selection_result
+
+
--- a/deepcore/methods/kcentergreedy.py
+++ b/deepcore/methods/kcentergreedy.py
@@ -0,0 +1,182 @@
+from .earlytrain import EarlyTrain
+import torch
+import numpy as np
+from .methods_utils import euclidean_dist
+from ..nets.nets_utils import MyDataParallel
+
+
+def k_center_greedy(matrix, budget: int, metric, device, random_seed=None, index=None, already_selected=None,
+                    print_freq: int = 20):
+    if type(matrix) == torch.Tensor:
+        assert matrix.dim() == 2
+    elif type(matrix) == np.ndarray:
+        assert matrix.ndim == 2
+        matrix = torch.from_numpy(matrix).requires_grad_(False).to(device)
+
+    sample_num = matrix.shape[0]
+    assert sample_num >= 1
+
+    if budget < 0:
+        raise ValueError("Illegal budget size.")
+    elif budget > sample_num:
+        budget = sample_num
+
+    if index is not None:
+        assert matrix.shape[0] == len(index)
+    else:
+        index = np.arange(sample_num)
+
+    assert callable(metric)
+
+    already_selected = np.array(already_selected)
+
+    with torch.no_grad():
+        np.random.seed(random_seed)
+        if already_selected.__len__() == 0:
+            select_result = np.zeros(sample_num, dtype=bool)
+            # Randomly select one initial point.
+            already_selected = [np.random.randint(0, sample_num)]
+            budget -= 1
+            select_result[already_selected] = True
+        else:
+            select_result = np.in1d(index, already_selected)
+
+        num_of_already_selected = np.sum(select_result)
+
+        # Initialize a (num_of_already_selected+budget-1)*sample_num matrix storing distances of pool points from
+        # each clustering center.
+        dis_matrix = -1 * torch.ones([num_of_already_selected + budget - 1, sample_num], requires_grad=False).to(device)
+
+        dis_matrix[:num_of_already_selected, ~select_result] = metric(matrix[select_result], matrix[~select_result])
+
+        mins = torch.min(dis_matrix[:num_of_already_selected, :], dim=0).values
+
+        for i in range(budget):
+            if i % print_freq == 0:
+                print("| Selecting [%3d/%3d]" % (i + 1, budget))
+            p = torch.argmax(mins).item()
+            select_result[p] = True
+
+            if i == budget - 1:
+                break
+            mins[p] = -1
+            dis_matrix[num_of_already_selected + i, ~select_result] = metric(matrix[[p]], matrix[~select_result])
+            mins = torch.min(mins, dis_matrix[num_of_already_selected + i])
+    return index[select_result]
+
+
+class kCenterGreedy(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=0,
+                 specific_model="ResNet18", balance: bool = False, already_selected=[], metric="euclidean",
+                 torchvision_pretrain: bool = True, **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model,
+                         torchvision_pretrain=torchvision_pretrain, **kwargs)
+
+        if already_selected.__len__() != 0:
+            if min(already_selected) < 0 or max(already_selected) >= self.n_train:
+                raise ValueError("List of already selected points out of the boundary.")
+        self.already_selected = np.array(already_selected)
+
+        self.min_distances = None
+
+        if metric == "euclidean":
+            self.metric = euclidean_dist
+        elif callable(metric):
+            self.metric = metric
+        else:
+            self.metric = euclidean_dist
+            self.run = lambda : self.finish_run()
+            def _construct_matrix(index=None):
+                data_loader = torch.utils.data.DataLoader(
+                    self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
+                    batch_size=self.n_train if index is None else len(index),
+                    num_workers=self.args.workers)
+                inputs, _ = next(iter(data_loader))
+                return inputs.flatten(1).requires_grad_(False).to(self.args.device)
+            self.construct_matrix = _construct_matrix
+
+        self.balance = balance
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+    def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+        if batch_idx % self.args.print_freq == 0:
+            print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
+            epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
+
+    def old_construct_matrix(self, index=None):
+        self.model.eval()
+        self.model.no_grad = True
+        with torch.no_grad():
+            with self.model.embedding_recorder:
+                sample_num = self.n_train if index is None else len(index)
+                matrix = torch.zeros([sample_num, self.emb_dim], requires_grad=False).to(self.args.device)
+
+                data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
+                                        torch.utils.data.Subset(self.dst_train, index),
+                                                batch_size=self.args.selection_batch,
+                                                num_workers=self.args.workers)
+
+                for i, (inputs, _) in enumerate(data_loader):
+                    self.model(inputs.to(self.args.device))
+                    matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch,
+                                                             sample_num)] = self.model.embedding_recorder.embedding
+
+        self.model.no_grad = False
+        return matrix
+
+    def construct_matrix(self, index=None):
+        self.model.eval()
+        self.model.no_grad = True
+        with torch.no_grad():
+            with self.model.embedding_recorder:
+                sample_num = self.n_train if index is None else len(index)
+                matrix = []
+
+                data_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
+                                    torch.utils.data.Subset(self.dst_train, index),
+                                    batch_size=self.args.selection_batch,
+                                    num_workers=self.args.workers)
+
+                for i, (inputs, _) in enumerate(data_loader):
+                    self.model(inputs.to(self.args.device))
+                    matrix.append(self.model.embedding_recorder.embedding)
+
+        self.model.no_grad = False
+        return torch.cat(matrix, dim=0)
+
+    def before_run(self):
+        self.emb_dim = self.model.get_last_layer().in_features
+
+    def finish_run(self):
+        if isinstance(self.model, MyDataParallel):
+            self.model = self.model.module
+
+    def select(self, **kwargs):
+        self.run()
+        if self.balance:
+            selection_result = np.array([], dtype=np.int32)
+            for c in range(self.args.num_classes):
+                class_index = np.arange(self.n_train)[self.dst_train.targets == c]
+
+                selection_result = np.append(selection_result, k_center_greedy(self.construct_matrix(class_index),
+                                                                               budget=round(
+                                                                                   self.fraction * len(class_index)),
+                                                                               metric=self.metric,
+                                                                               device=self.args.device,
+                                                                               random_seed=self.random_seed,
+                                                                               index=class_index,
+                                                                               already_selected=self.already_selected[
+                                                                                   np.in1d(self.already_selected,
+                                                                                           class_index)],
+                                                                               print_freq=self.args.print_freq))
+        else:
+            matrix = self.construct_matrix()
+            del self.model_optimizer
+            del self.model
+            selection_result = k_center_greedy(matrix, budget=self.coreset_size,
+                                               metric=self.metric, device=self.args.device,
+                                               random_seed=self.random_seed,
+                                               already_selected=self.already_selected, print_freq=self.args.print_freq)
+        return {"indices": selection_result}
--- a/deepcore/methods/methods_utils/init.py
+++ b/deepcore/methods/methods_utils/init.py
@@ -0,0 +1,4 @@
+from .euclidean import *
+from .cossim import *
+from .submodular_function import *
+from .submodular_optimizer import *
--- a/deepcore/methods/methods_utils/pycache/init.cpython-39.pyc
+++ b/deepcore/methods/methods_utils/pycache/init.cpython-39.pyc
--- a/deepcore/methods/methods_utils/pycache/cossim.cpython-39.pyc
+++ b/deepcore/methods/methods_utils/pycache/cossim.cpython-39.pyc
--- a/deepcore/methods/methods_utils/pycache/euclidean.cpython-39.pyc
+++ b/deepcore/methods/methods_utils/pycache/euclidean.cpython-39.pyc
--- a/deepcore/methods/methods_utils/pycache/submodular_function.cpython-39.pyc
+++ b/deepcore/methods/methods_utils/pycache/submodular_function.cpython-39.pyc
--- a/deepcore/methods/methods_utils/pycache/submodular_optimizer.cpython-39.pyc
+++ b/deepcore/methods/methods_utils/pycache/submodular_optimizer.cpython-39.pyc
--- a/deepcore/methods/methods_utils/cossim.py
+++ b/deepcore/methods/methods_utils/cossim.py
@@ -0,0 +1,35 @@
+import numpy as np
+import torch
+
+
+def cossim_np(v1, v2):
+    # return cossim(torch.tensor(v1),torch.tensor(v2)).cpu().numpy()
+    num = np.dot(v1, v2.T)
+    denom = np.linalg.norm(v1, axis=1).reshape(-1, 1) * np.linalg.norm(v2, axis=1)
+    res = num / (denom + 1e-6)
+    res[np.isneginf(res)] = 0.
+    return 0.5 + 0.5 * res
+
+def cossim_pair_np(v1):
+    num = np.dot(v1, v1.T)
+    norm = np.linalg.norm(v1, axis=1)
+    denom = norm.reshape(-1, 1) * norm
+    res = num / (denom + 1e-6)
+    res[np.isneginf(res)] = 0.
+    return 0.5 + 0.5 * res
+
+def cossim(v1, v2):
+    num = torch.matmul(v1, v2.T)
+    denom = torch.norm(v1, dim=1).view(-1, 1) * torch.norm(v2, dim=1)
+    res = num / (denom + 1e-6)
+    res[torch.isneginf(res)] = 0.
+    return 0.5 + 0.5 * res
+
+def cossim_pair(v1):
+    num = torch.matmul(v1, v1.T)
+    norm = torch.norm(v1, dim=1)
+    denom = norm.view(-1, 1) * norm
+    res = num / (denom + 1e-6)
+    res[torch.isneginf(res)] = 0.
+    return 0.5 + 0.5 * res
+
--- a/deepcore/methods/methods_utils/euclidean.py
+++ b/deepcore/methods/methods_utils/euclidean.py
@@ -0,0 +1,36 @@
+import torch
+import numpy as np
+
+
+def euclidean_dist(x, y):
+    m, n = x.size(0), y.size(0)
+    xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
+    yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
+    dist = xx + yy
+    dist.addmm_(1, -2, x, y.t())
+    dist = dist.clamp(min=1e-12).sqrt()
+    return dist
+
+
+def euclidean_dist_pair(x):
+    m = x.size(0)
+    xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, m)
+    dist = xx + xx.t()
+    dist.addmm_(1, -2, x, x.t())
+    dist = dist.clamp(min=1e-12).sqrt()
+    return dist
+
+def euclidean_dist_np(x, y):
+    (rowx, colx) = x.shape
+    (rowy, coly) = y.shape
+    xy = np.dot(x, y.T)
+    x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowy, axis=1)
+    y2 = np.repeat(np.reshape(np.sum(np.multiply(y, y), axis=1), (rowy, 1)), repeats=rowx, axis=1).T
+    return np.sqrt(np.clip(x2 + y2 - 2. * xy, 1e-12, None))
+
+#calculate the euclidean distance of each sample in x, return a N*N matrix, whose diag is zero
+def euclidean_dist_pair_np(x):
+    (rowx, colx) = x.shape
+    xy = np.dot(x, x.T)
+    x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowx, axis=1)
+    return np.sqrt(np.clip(x2 + x2.T - 2. * xy, 1e-12, None))
--- a/deepcore/methods/methods_utils/submodular_function.py
+++ b/deepcore/methods/methods_utils/submodular_function.py
@@ -0,0 +1,144 @@
+import numpy as np
+
+
+class SubmodularFunction(object):
+    def __init__(self, index, similarity_kernel=None, similarity_matrix=None, already_selected=[]):
+        self.index = index
+        self.n = len(index)
+
+        self.already_selected = already_selected
+
+        assert similarity_kernel is not None or similarity_matrix is not None
+
+        # For the sample similarity matrix, the method supports two input modes, one is to input a pairwise similarity
+        # matrix for the whole sample, and the other case allows the input of a similarity kernel to be used to
+        # calculate similarities incrementally at a later time if required.
+        if similarity_kernel is not None:
+            assert callable(similarity_kernel)
+            self.similarity_kernel = self._similarity_kernel(similarity_kernel)
+        else:
+            assert similarity_matrix.shape[0] == self.n and similarity_matrix.shape[1] == self.n
+            self.similarity_matrix = similarity_matrix
+            self.similarity_kernel = lambda a, b: self.similarity_matrix[np.ix_(a, b)]
+
+    def _similarity_kernel(self, similarity_kernel):
+        return similarity_kernel
+
+
+class FacilityLocation(SubmodularFunction):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        if self.already_selected.__len__()==0:
+            self.cur_max = np.zeros(self.n, dtype=np.float32)
+        else:
+            self.cur_max = np.max(self.similarity_kernel(np.arange(self.n), self.already_selected), axis=1)
+
+        self.all_idx = np.ones(self.n, dtype=bool)
+
+    def _similarity_kernel(self, similarity_kernel):
+        # Initialize a matrix to store similarity values of sample points.
+        self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
+        self.if_columns_calculated = np.zeros(self.n, dtype=bool)
+
+        def _func(a, b):
+            if not np.all(self.if_columns_calculated[b]):
+                if b.dtype != bool:
+                    temp = ~self.all_idx
+                    temp[b] = True
+                    b = temp
+                not_calculated = b & ~self.if_columns_calculated
+                self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
+                self.if_columns_calculated[not_calculated] = True
+            return self.sim_matrix[np.ix_(a, b)]
+        return _func
+
+    def calc_gain(self, idx_gain, selected, **kwargs):
+        gains = np.maximum(0., self.similarity_kernel(self.all_idx, idx_gain) - self.cur_max.reshape(-1, 1)).sum(axis=0)
+        return gains
+
+    def calc_gain_batch(self, idx_gain, selected, **kwargs):
+        batch_idx = ~self.all_idx
+        batch_idx[0:kwargs["batch"]] = True
+        gains = np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1, 1)).sum(axis=0)
+        for i in range(kwargs["batch"], self.n, kwargs["batch"]):
+            batch_idx = ~self.all_idx
+            batch_idx[i * kwargs["batch"]:(i + 1) * kwargs["batch"]] = True
+            gains += np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1,1)).sum(axis=0)
+        return gains
+
+    def update_state(self, new_selection, total_selected, **kwargs):
+        self.cur_max = np.maximum(self.cur_max, np.max(self.similarity_kernel(self.all_idx, new_selection), axis=1))
+        #self.cur_max = np.max(np.append(self.cur_max.reshape(-1, 1), self.similarity_kernel(self.all_idx, new_selection), axis=1), axis=1)
+
+
+class GraphCut(SubmodularFunction):
+    def __init__(self, lam: float = 1., **kwargs):
+        super().__init__(**kwargs)
+        self.lam = lam
+
+        if 'similarity_matrix' in kwargs:
+            self.sim_matrix_cols_sum = np.sum(self.similarity_matrix, axis=0)
+        self.all_idx = np.ones(self.n, dtype=bool)
+
+    def _similarity_kernel(self, similarity_kernel):
+        # Initialize a matrix to store similarity values of sample points.
+        self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
+        self.sim_matrix_cols_sum = np.zeros(self.n, dtype=np.float32)
+        self.if_columns_calculated = np.zeros(self.n, dtype=bool)
+
+        def _func(a, b):
+            if not np.all(self.if_columns_calculated[b]):
+                if b.dtype != bool:
+                    temp = ~self.all_idx
+                    temp[b] = True
+                    b = temp
+                not_calculated = b & ~self.if_columns_calculated
+                self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
+                self.sim_matrix_cols_sum[not_calculated] = np.sum(self.sim_matrix[:, not_calculated], axis=0)
+                self.if_columns_calculated[not_calculated] = True
+            return self.sim_matrix[np.ix_(a, b)]
+        return _func
+
+    def calc_gain(self, idx_gain, selected, **kwargs):
+        # Conditional gain
+        # return the sum distance of each unselected sample to the any other one  (selected, idx_gain) is for fun. _func()
+        gain = -2. * np.sum(self.similarity_kernel(selected, idx_gain), axis=0) + self.lam * self.sim_matrix_cols_sum[idx_gain]
+
+        return gain
+
+    def update_state(self, new_selection, total_selected, **kwargs):
+        pass
+
+
+class LogDeterminant(SubmodularFunction):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+
+        self.all_idx = np.ones(self.n, dtype=bool)
+
+    def _similarity_kernel(self, similarity_kernel):
+        # Initialize a matrix to store similarity values of sample points.
+        self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
+        self.if_columns_calculated = np.zeros(self.n, dtype=bool)
+
+        def _func(a, b):
+            if not np.all(self.if_columns_calculated[b]):
+                if b.dtype != bool:
+                    temp = ~self.all_idx
+                    temp[b] = True
+                    b = temp
+                not_calculated = b & ~self.if_columns_calculated
+                self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
+                self.if_columns_calculated[not_calculated] = True
+            return self.sim_matrix[np.ix_(a, b)]
+        return _func
+
+    def calc_gain(self, idx_gain, selected, **kwargs):
+        # Gain for LogDeterminant can be written as $f(x | A ) = \log\det(S_{a} - S_{a,A}S_{A}^{-1}S_{x,A}^T)$.
+        sim_idx_gain = self.similarity_kernel(selected, idx_gain).T
+        sim_selected = self.similarity_kernel(selected, selected)
+        return (np.dot(sim_idx_gain, np.linalg.pinv(sim_selected)) * sim_idx_gain).sum(-1)
+
+    def update_state(self, new_selection, total_selected, **kwargs):
+        pass
--- a/deepcore/methods/methods_utils/submodular_optimizer.py
+++ b/deepcore/methods/methods_utils/submodular_optimizer.py
@@ -0,0 +1,155 @@
+import numpy as np
+from tqdm import tqdm
+
+optimizer_choices = ["NaiveGreedy", "LazyGreedy", "StochasticGreedy", "ApproximateLazyGreedy"]
+
+class optimizer(object):
+    def __init__(self, args, index, budget:int, already_selected=[]):
+        self.args = args
+        self.index = index
+
+        if budget <= 0 or budget > index.__len__():
+            raise ValueError("Illegal budget for optimizer.")
+
+        self.n = len(index)
+        self.budget = budget
+        self.already_selected = already_selected
+
+
+class NaiveGreedy(optimizer):
+    def __init__(self, args, index, budget:int, already_selected=[]):
+        super(NaiveGreedy, self).__init__(args, index, budget, already_selected)
+
+    def select(self, gain_function, update_state=None, **kwargs):
+        assert callable(gain_function)
+        if update_state is not None:
+            assert callable(update_state)
+        selected = np.zeros(self.n, dtype=bool)
+        selected[self.already_selected] = True
+
+        greedy_gain = np.zeros(len(self.index))
+        for i in range(sum(selected), self.budget):
+            if i % self.args.TRAIN.PRINT_FREQ == 0:
+                print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
+            greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
+            current_selection = greedy_gain.argmax()
+            selected[current_selection] = True
+            greedy_gain[current_selection] = -np.inf
+            if update_state is not None:
+                update_state(np.array([current_selection]), selected, **kwargs)
+        return self.index[selected]
+
+
+class LazyGreedy(optimizer):
+    def __init__(self, args, index, budget:int, already_selected=[]):
+        super(LazyGreedy, self).__init__(args, index, budget, already_selected)
+
+    def select(self, gain_function, update_state=None, **kwargs):
+        assert callable(gain_function)
+        if update_state is not None:
+            assert callable(update_state)
+        selected = np.zeros(self.n, dtype=bool)
+        selected[self.already_selected] = True
+
+        greedy_gain = np.zeros(len(self.index))
+        greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
+        greedy_gain[selected] = -np.inf
+
+        for i in tqdm(range(sum(selected), self.budget)):
+            if i % self.args.TRAIN.PRINT_FREQ == 0:
+                print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
+            best_gain = -np.inf
+            last_max_element = -1
+            while True:
+                cur_max_element = greedy_gain.argmax()
+                if last_max_element == cur_max_element:
+                    # Select cur_max_element into the current subset
+                    selected[cur_max_element] = True
+                    greedy_gain[cur_max_element] = -np.inf
+
+                    if update_state is not None:
+                        update_state(np.array([cur_max_element]), selected, **kwargs)
+                    break
+                new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
+                greedy_gain[cur_max_element] = new_gain
+                if new_gain >= best_gain:
+                    best_gain = new_gain
+                    last_max_element = cur_max_element
+        return self.index[selected]
+
+
+class StochasticGreedy(optimizer):
+    def __init__(self, args, index, budget:int, already_selected=[], epsilon: float=0.9):
+        super(StochasticGreedy, self).__init__(args, index, budget, already_selected)
+        self.epsilon = epsilon
+
+    def select(self, gain_function, update_state=None, **kwargs):
+        assert callable(gain_function)
+        if update_state is not None:
+            assert callable(update_state)
+        selected = np.zeros(self.n, dtype=bool)
+        selected[self.already_selected] = True
+
+        sample_size = max(round(-np.log(self.epsilon) * self.n / self.budget), 1)
+
+        greedy_gain = np.zeros(len(self.index))
+        all_idx = np.arange(self.n)
+        for i in range(sum(selected), self.budget):
+            if i % self.args.TRAIN.PRINT_FREQ == 0:
+                print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
+
+            # Uniformly select a subset from unselected samples with size sample_size
+            subset = np.random.choice(all_idx[~selected], replace=False, size=min(sample_size, self.n - i))
+
+            if subset.__len__() == 0:
+                break
+
+            greedy_gain[subset] = gain_function(subset, selected, **kwargs)
+            current_selection = greedy_gain[subset].argmax()
+            selected[subset[current_selection]] = True
+            greedy_gain[subset[current_selection]] = -np.inf
+            if update_state is not None:
+                update_state(np.array([subset[current_selection]]), selected, **kwargs)
+        return self.index[selected]
+
+
+class ApproximateLazyGreedy(optimizer):
+    def __init__(self, args, index, budget:int, already_selected=[], beta: float=0.9):
+        super(ApproximateLazyGreedy, self).__init__(args, index, budget, already_selected)
+        self.beta = beta
+
+    def select(self, gain_function, update_state=None, **kwargs):
+        assert callable(gain_function)
+        if update_state is not None:
+            assert callable(update_state)
+        selected = np.zeros(self.n, dtype=bool)
+        selected[self.already_selected] = True
+
+        greedy_gain = np.zeros(len(self.index))
+        greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
+        greedy_gain[selected] = -np.inf
+
+        for i in range(sum(selected), self.budget):
+            if i % self.args.TRAIN.PRINT_FREQ == 0:
+                print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
+            while True:
+                cur_max_element = greedy_gain.argmax()
+                max_gain = greedy_gain[cur_max_element]
+
+                new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
+
+                if new_gain >= self.beta * max_gain:
+                    # Select cur_max_element into the current subset
+                    selected[cur_max_element] = True
+                    greedy_gain[cur_max_element] = -np.inf
+
+                    if update_state is not None:
+                        update_state(np.array([cur_max_element]), selected, **kwargs)
+                    break
+                else:
+                    greedy_gain[cur_max_element] = new_gain
+        return self.index[selected]
+
+
+
+
--- a/deepcore/methods/submodular.py
+++ b/deepcore/methods/submodular.py
@@ -0,0 +1,116 @@
+from .earlytrain import EarlyTrain
+import numpy as np
+import torch
+from .methods_utils import cossim_np, submodular_function, submodular_optimizer
+from ..nets.nets_utils import MyDataParallel
+
+
+class Submodular(EarlyTrain):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None, balance=True,
+                 function="GraphCut", greedy="LazyGreedy", metric="cossim", **kwargs):
+        super(Submodular, self).__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
+
+        if greedy not in submodular_optimizer.optimizer_choices:
+            raise ModuleNotFoundError("Greedy optimizer not found.")
+        print(f"The Submodular Method is {function}")
+        self._greedy = greedy
+        self._metric = metric
+        self._function = function
+
+        self.balance = balance
+
+    def before_train(self):
+        pass
+
+    def after_loss(self, outputs, loss, targets, batch_inds, epoch):
+        pass
+
+    def before_epoch(self):
+        pass
+
+    def after_epoch(self):
+        pass
+
+    def before_run(self):
+        pass
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+
+    def calc_gradient(self, index=None):
+        '''
+        Calculate gradients matrix on current network for specified training dataset.
+        '''
+        self.model.eval()
+        data_loader = self.select_dm(self.dst_train, index, is_train=False)
+        # Initialize a matrix to save gradients.
+        # (on cpu)
+        gradients = []
+
+        for i, batch in enumerate(data_loader):
+
+            self.optim.zero_grad()
+            image, label = batch['img'].cuda(), batch['label'].cuda()
+            bs_size = image.shape[0]
+            loss,visual_embedding,logit = self.model(image,label,cal_gradient=True)
+            embed_dim = visual_embedding.shape[-1]
+            with torch.no_grad():
+                bias_parameters_grads = torch.autograd.grad(loss, logit)[0]
+                weight_parameters_grads = visual_embedding.view(bs_size, 1,
+                                        -1).repeat(1, self.num_classes, 1) *\
+                                        bias_parameters_grads.view(bs_size, self.num_classes,
+                                        1).repeat(1, 1, embed_dim)
+                gradients.append(torch.cat([bias_parameters_grads, weight_parameters_grads.flatten(1)],
+                                            dim=1).cpu().numpy())
+
+        gradients = np.concatenate(gradients, axis=0,dtype=np.float32)
+        print('Finish Gradient Calculation')
+        return gradients
+
+    def finish_run(self):
+        if isinstance(self.model, MyDataParallel):
+            self.model = self.model.module
+
+        # Turn on the embedding recorder and the no_grad flag
+
+        self.model.no_grad = True
+        self.train_indx = np.arange(self.n_train)
+
+        gradients = self.calc_gradient(index=None)
+
+        if self.balance:
+            selection_result = np.array([], dtype=np.int64)
+            for c in range(self.num_classes):
+                print(f'class {c}')
+                c_indx = self.train_indx[self.dst_train_label == c]
+                # Calculate gradients into a matrix
+                c_gradients = gradients[c_indx]
+                # Instantiate a submodular function
+                submod_function = submodular_function.__dict__[self._function](index=c_indx,
+                                    similarity_kernel=lambda a, b:cossim_np(c_gradients[a], c_gradients[b]))
+                submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args,
+                                    index=c_indx, budget=round(self.fraction * len(c_indx)), already_selected=[])
+
+                c_selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
+                                                             update_state=submod_function.update_state)
+                selection_result = np.append(selection_result, c_selection_result)
+        else:
+            # Calculate gradients into a matrix
+            gradients = self.calc_gradient()
+            # Instantiate a submodular function
+            submod_function = submodular_function.__dict__[self._function](index=self.train_indx,
+                                        similarity_kernel=lambda a, b: cossim_np(gradients[a], gradients[b]))
+            submod_optimizer = submodular_optimizer.__dict__[self._greedy](args=self.args, index=self.train_indx,
+                                                                              budget=self.coreset_size)
+            selection_result = submod_optimizer.select(gain_function=submod_function.calc_gain,
+                                                       update_state=submod_function.update_state)
+
+        self.model.no_grad = False
+        return {"indices": selection_result}
+
+    def select(self, **kwargs):
+        selection_result = self.run()
+        return selection_result
+
+
--- a/deepcore/methods/uncertainty.py
+++ b/deepcore/methods/uncertainty.py
@@ -0,0 +1,120 @@
+from .earlytrain import EarlyTrain
+import torch
+import numpy as np
+from datasets.data_manager import select_dm_loader
+import time
+
+class Uncertainty(EarlyTrain):
+    def __init__(self, dst_train, args,fraction=0.5, random_seed=None, epochs=200, selection_method="Margin",
+                 specific_model=None, balance=False, **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
+
+        selection_choices = ["LeastConfidence",
+                             "Entropy",
+                             "Margin"]
+        if selection_method not in selection_choices:
+            raise NotImplementedError("Selection algorithm unavailable.")
+        self.selection_method = selection_method
+
+        self.epochs = epochs
+        self.balance = balance
+
+    def before_train(self):
+        pass
+
+    def after_loss(self, outputs, loss, targets, batch_inds, epoch):
+        pass
+
+
+    def after_epoch(self):
+        pass
+
+    def before_run(self):
+        pass
+
+    def num_classes_mismatch(self):
+        raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
+
+    def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
+        pass
+
+    def finish_run(self):
+        if self.balance:
+            selection_result = np.array([], dtype=np.int64)
+            scores = []
+            for c in range(self.num_classes):
+                print(f"Balance Processing on the train set class {c}")
+                class_index = np.arange(self.n_train)[self.dst_train_label == c]
+                scores.append(self.rank_uncertainty_clip(class_index))
+                selection_result = np.append(selection_result, class_index[np.argsort(scores[-1])[
+                                                               :round(len(class_index) * self.fraction)]])
+        else:
+            print(f"Imbalance Processing on the train set class")
+            scores = self.rank_uncertainty_clip()
+            selection_result = np.argsort(scores)[::-1][:self.coreset_size]
+        return {"indices": selection_result, "scores": scores}
+
+    def rank_uncertainty(self,index=None):
+        self.specific_model.eval()
+        with torch.no_grad():
+            train_loader = torch.utils.data.DataLoader(
+                self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
+                batch_size=self.args.selection_batch,
+                num_workers=self.args.workers)
+
+            scores = np.array([])
+            batch_num = len(train_loader)
+
+            for i, (input, _) in enumerate(train_loader):
+                if i % self.args.print_freq == 0:
+                    print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num))
+                if self.selection_method == "LeastConfidence":
+                    scores = np.append(scores, self.model(input.to(self.args.device)).max(axis=1).values.cpu().numpy())
+                elif self.selection_method == "Entropy":
+                    preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1).cpu().numpy()
+                    scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1))
+                elif self.selection_method == 'Margin':
+                    preds = torch.nn.functional.softmax(self.model(input.to(self.args.device)), dim=1)
+                    preds_argmax = torch.argmax(preds, dim=1)
+                    max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone()
+                    preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0
+                    preds_sub_argmax = torch.argmax(preds, dim=1)
+                    scores = np.append(scores, (max_preds - preds[
+                        torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy())
+        return scores
+
+
+    def rank_uncertainty_clip(self,index=None):
+        self.model.eval()
+        with torch.no_grad():
+            train_loader = select_dm_loader(self.args,self.dst_train,index)
+            scores = np.array([])
+
+            for i, batch in enumerate(train_loader):
+                # if i % self.args.print_freq == 0:
+                #     print("| Selecting for batch [%3d/%3d]" % (i + 1, batch_num))
+                image, label = batch['img'].cuda(), batch['label'].cuda()
+                logits = self.model(image,label)  ##Eval mode
+                if self.selection_method == "LeastConfidence":
+                    scores = np.append(scores, logits.max(axis=1).values.cpu().numpy())
+                elif self.selection_method == "Entropy":
+                    preds = torch.softmax(logits, dim=1).cpu().numpy()
+                    scores = np.append(scores, (np.log(preds + 1e-6) * preds).sum(axis=1))
+                elif self.selection_method == 'Margin':
+                    preds = torch.softmax(logits, dim=1)
+                    preds_argmax = torch.argmax(preds, dim=1)
+                    max_preds = preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax].clone()
+                    preds[torch.ones(preds.shape[0], dtype=bool), preds_argmax] = -1.0
+                    preds_sub_argmax = torch.argmax(preds, dim=1)
+                    scores = np.append(scores, (max_preds - preds[torch.ones(preds.shape[0], dtype=bool), preds_sub_argmax]).cpu().numpy())
+        self.model.train()
+        return scores
+
+
+    def select(self, **kwargs):
+        selection_result = self.run()
+        return selection_result
+
+    def select_without_train(self):
+        selection_result = self.finish_run()
+        return selection_result
--- a/deepcore/methods/uniform.py
+++ b/deepcore/methods/uniform.py
@@ -0,0 +1,34 @@
+import numpy as np
+from .coresetmethod import CoresetMethod
+
+
+class Uniform(CoresetMethod):
+    def __init__(self, dst_train, args, fraction=0.5, random_seed=None, balance=True, replace=False, **kwargs):
+        super().__init__(dst_train, args, fraction, random_seed)
+        self.balance = balance
+        self.replace = replace
+        self.n_train = len(self.dst_train)
+
+    def select_balance(self):
+        """The same sampling proportions were used in each class separately."""
+        np.random.seed(self.random_seed)
+        self.index = np.array([], dtype=np.int64)
+        all_index = np.arange(self.n_train)
+        for c in range(self.num_classes):
+
+            c_index = (self.dst_train_label == c)
+            self.index = np.append(self.index,
+                                   np.random.choice(all_index[c_index], round(self.fraction * c_index.sum().item()),
+                                                    replace=self.replace))
+        return self.index
+
+    def select_no_balance(self):
+        np.random.seed(self.random_seed)
+        self.index = np.random.choice(np.arange(self.n_train), round(self.n_train * self.fraction),
+                                      replace=self.replace)
+
+        return  self.index
+
+    def select(self, **kwargs):
+
+        return {"indices": self.select_balance() if self.balance else self.select_no_balance()}