Upload to Main
This commit is contained in:
4
deepcore/methods/methods_utils/__init__.py
Normal file
4
deepcore/methods/methods_utils/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from .euclidean import *
|
||||
from .cossim import *
|
||||
from .submodular_function import *
|
||||
from .submodular_optimizer import *
|
||||
Binary file not shown.
BIN
deepcore/methods/methods_utils/__pycache__/cossim.cpython-39.pyc
Normal file
BIN
deepcore/methods/methods_utils/__pycache__/cossim.cpython-39.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
35
deepcore/methods/methods_utils/cossim.py
Normal file
35
deepcore/methods/methods_utils/cossim.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
|
||||
def cossim_np(v1, v2):
|
||||
# return cossim(torch.tensor(v1),torch.tensor(v2)).cpu().numpy()
|
||||
num = np.dot(v1, v2.T)
|
||||
denom = np.linalg.norm(v1, axis=1).reshape(-1, 1) * np.linalg.norm(v2, axis=1)
|
||||
res = num / (denom + 1e-6)
|
||||
res[np.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
def cossim_pair_np(v1):
|
||||
num = np.dot(v1, v1.T)
|
||||
norm = np.linalg.norm(v1, axis=1)
|
||||
denom = norm.reshape(-1, 1) * norm
|
||||
res = num / (denom + 1e-6)
|
||||
res[np.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
def cossim(v1, v2):
|
||||
num = torch.matmul(v1, v2.T)
|
||||
denom = torch.norm(v1, dim=1).view(-1, 1) * torch.norm(v2, dim=1)
|
||||
res = num / (denom + 1e-6)
|
||||
res[torch.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
def cossim_pair(v1):
|
||||
num = torch.matmul(v1, v1.T)
|
||||
norm = torch.norm(v1, dim=1)
|
||||
denom = norm.view(-1, 1) * norm
|
||||
res = num / (denom + 1e-6)
|
||||
res[torch.isneginf(res)] = 0.
|
||||
return 0.5 + 0.5 * res
|
||||
|
||||
36
deepcore/methods/methods_utils/euclidean.py
Normal file
36
deepcore/methods/methods_utils/euclidean.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
|
||||
def euclidean_dist(x, y):
|
||||
m, n = x.size(0), y.size(0)
|
||||
xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
|
||||
yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
|
||||
dist = xx + yy
|
||||
dist.addmm_(1, -2, x, y.t())
|
||||
dist = dist.clamp(min=1e-12).sqrt()
|
||||
return dist
|
||||
|
||||
|
||||
def euclidean_dist_pair(x):
|
||||
m = x.size(0)
|
||||
xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, m)
|
||||
dist = xx + xx.t()
|
||||
dist.addmm_(1, -2, x, x.t())
|
||||
dist = dist.clamp(min=1e-12).sqrt()
|
||||
return dist
|
||||
|
||||
def euclidean_dist_np(x, y):
|
||||
(rowx, colx) = x.shape
|
||||
(rowy, coly) = y.shape
|
||||
xy = np.dot(x, y.T)
|
||||
x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowy, axis=1)
|
||||
y2 = np.repeat(np.reshape(np.sum(np.multiply(y, y), axis=1), (rowy, 1)), repeats=rowx, axis=1).T
|
||||
return np.sqrt(np.clip(x2 + y2 - 2. * xy, 1e-12, None))
|
||||
|
||||
#calculate the euclidean distance of each sample in x, return a N*N matrix, whose diag is zero
|
||||
def euclidean_dist_pair_np(x):
|
||||
(rowx, colx) = x.shape
|
||||
xy = np.dot(x, x.T)
|
||||
x2 = np.repeat(np.reshape(np.sum(np.multiply(x, x), axis=1), (rowx, 1)), repeats=rowx, axis=1)
|
||||
return np.sqrt(np.clip(x2 + x2.T - 2. * xy, 1e-12, None))
|
||||
144
deepcore/methods/methods_utils/submodular_function.py
Normal file
144
deepcore/methods/methods_utils/submodular_function.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class SubmodularFunction(object):
|
||||
def __init__(self, index, similarity_kernel=None, similarity_matrix=None, already_selected=[]):
|
||||
self.index = index
|
||||
self.n = len(index)
|
||||
|
||||
self.already_selected = already_selected
|
||||
|
||||
assert similarity_kernel is not None or similarity_matrix is not None
|
||||
|
||||
# For the sample similarity matrix, the method supports two input modes, one is to input a pairwise similarity
|
||||
# matrix for the whole sample, and the other case allows the input of a similarity kernel to be used to
|
||||
# calculate similarities incrementally at a later time if required.
|
||||
if similarity_kernel is not None:
|
||||
assert callable(similarity_kernel)
|
||||
self.similarity_kernel = self._similarity_kernel(similarity_kernel)
|
||||
else:
|
||||
assert similarity_matrix.shape[0] == self.n and similarity_matrix.shape[1] == self.n
|
||||
self.similarity_matrix = similarity_matrix
|
||||
self.similarity_kernel = lambda a, b: self.similarity_matrix[np.ix_(a, b)]
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
return similarity_kernel
|
||||
|
||||
|
||||
class FacilityLocation(SubmodularFunction):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if self.already_selected.__len__()==0:
|
||||
self.cur_max = np.zeros(self.n, dtype=np.float32)
|
||||
else:
|
||||
self.cur_max = np.max(self.similarity_kernel(np.arange(self.n), self.already_selected), axis=1)
|
||||
|
||||
self.all_idx = np.ones(self.n, dtype=bool)
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
# Initialize a matrix to store similarity values of sample points.
|
||||
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
|
||||
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
|
||||
|
||||
def _func(a, b):
|
||||
if not np.all(self.if_columns_calculated[b]):
|
||||
if b.dtype != bool:
|
||||
temp = ~self.all_idx
|
||||
temp[b] = True
|
||||
b = temp
|
||||
not_calculated = b & ~self.if_columns_calculated
|
||||
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
|
||||
self.if_columns_calculated[not_calculated] = True
|
||||
return self.sim_matrix[np.ix_(a, b)]
|
||||
return _func
|
||||
|
||||
def calc_gain(self, idx_gain, selected, **kwargs):
|
||||
gains = np.maximum(0., self.similarity_kernel(self.all_idx, idx_gain) - self.cur_max.reshape(-1, 1)).sum(axis=0)
|
||||
return gains
|
||||
|
||||
def calc_gain_batch(self, idx_gain, selected, **kwargs):
|
||||
batch_idx = ~self.all_idx
|
||||
batch_idx[0:kwargs["batch"]] = True
|
||||
gains = np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1, 1)).sum(axis=0)
|
||||
for i in range(kwargs["batch"], self.n, kwargs["batch"]):
|
||||
batch_idx = ~self.all_idx
|
||||
batch_idx[i * kwargs["batch"]:(i + 1) * kwargs["batch"]] = True
|
||||
gains += np.maximum(0., self.similarity_kernel(batch_idx, idx_gain) - self.cur_max[batch_idx].reshape(-1,1)).sum(axis=0)
|
||||
return gains
|
||||
|
||||
def update_state(self, new_selection, total_selected, **kwargs):
|
||||
self.cur_max = np.maximum(self.cur_max, np.max(self.similarity_kernel(self.all_idx, new_selection), axis=1))
|
||||
#self.cur_max = np.max(np.append(self.cur_max.reshape(-1, 1), self.similarity_kernel(self.all_idx, new_selection), axis=1), axis=1)
|
||||
|
||||
|
||||
class GraphCut(SubmodularFunction):
|
||||
def __init__(self, lam: float = 1., **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.lam = lam
|
||||
|
||||
if 'similarity_matrix' in kwargs:
|
||||
self.sim_matrix_cols_sum = np.sum(self.similarity_matrix, axis=0)
|
||||
self.all_idx = np.ones(self.n, dtype=bool)
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
# Initialize a matrix to store similarity values of sample points.
|
||||
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
|
||||
self.sim_matrix_cols_sum = np.zeros(self.n, dtype=np.float32)
|
||||
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
|
||||
|
||||
def _func(a, b):
|
||||
if not np.all(self.if_columns_calculated[b]):
|
||||
if b.dtype != bool:
|
||||
temp = ~self.all_idx
|
||||
temp[b] = True
|
||||
b = temp
|
||||
not_calculated = b & ~self.if_columns_calculated
|
||||
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
|
||||
self.sim_matrix_cols_sum[not_calculated] = np.sum(self.sim_matrix[:, not_calculated], axis=0)
|
||||
self.if_columns_calculated[not_calculated] = True
|
||||
return self.sim_matrix[np.ix_(a, b)]
|
||||
return _func
|
||||
|
||||
def calc_gain(self, idx_gain, selected, **kwargs):
|
||||
# Conditional gain
|
||||
# return the sum distance of each unselected sample to the any other one (selected, idx_gain) is for fun. _func()
|
||||
gain = -2. * np.sum(self.similarity_kernel(selected, idx_gain), axis=0) + self.lam * self.sim_matrix_cols_sum[idx_gain]
|
||||
|
||||
return gain
|
||||
|
||||
def update_state(self, new_selection, total_selected, **kwargs):
|
||||
pass
|
||||
|
||||
|
||||
class LogDeterminant(SubmodularFunction):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
self.all_idx = np.ones(self.n, dtype=bool)
|
||||
|
||||
def _similarity_kernel(self, similarity_kernel):
|
||||
# Initialize a matrix to store similarity values of sample points.
|
||||
self.sim_matrix = np.zeros([self.n, self.n], dtype=np.float32)
|
||||
self.if_columns_calculated = np.zeros(self.n, dtype=bool)
|
||||
|
||||
def _func(a, b):
|
||||
if not np.all(self.if_columns_calculated[b]):
|
||||
if b.dtype != bool:
|
||||
temp = ~self.all_idx
|
||||
temp[b] = True
|
||||
b = temp
|
||||
not_calculated = b & ~self.if_columns_calculated
|
||||
self.sim_matrix[:, not_calculated] = similarity_kernel(self.all_idx, not_calculated)
|
||||
self.if_columns_calculated[not_calculated] = True
|
||||
return self.sim_matrix[np.ix_(a, b)]
|
||||
return _func
|
||||
|
||||
def calc_gain(self, idx_gain, selected, **kwargs):
|
||||
# Gain for LogDeterminant can be written as $f(x | A ) = \log\det(S_{a} - S_{a,A}S_{A}^{-1}S_{x,A}^T)$.
|
||||
sim_idx_gain = self.similarity_kernel(selected, idx_gain).T
|
||||
sim_selected = self.similarity_kernel(selected, selected)
|
||||
return (np.dot(sim_idx_gain, np.linalg.pinv(sim_selected)) * sim_idx_gain).sum(-1)
|
||||
|
||||
def update_state(self, new_selection, total_selected, **kwargs):
|
||||
pass
|
||||
155
deepcore/methods/methods_utils/submodular_optimizer.py
Normal file
155
deepcore/methods/methods_utils/submodular_optimizer.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
optimizer_choices = ["NaiveGreedy", "LazyGreedy", "StochasticGreedy", "ApproximateLazyGreedy"]
|
||||
|
||||
class optimizer(object):
|
||||
def __init__(self, args, index, budget:int, already_selected=[]):
|
||||
self.args = args
|
||||
self.index = index
|
||||
|
||||
if budget <= 0 or budget > index.__len__():
|
||||
raise ValueError("Illegal budget for optimizer.")
|
||||
|
||||
self.n = len(index)
|
||||
self.budget = budget
|
||||
self.already_selected = already_selected
|
||||
|
||||
|
||||
class NaiveGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[]):
|
||||
super(NaiveGreedy, self).__init__(args, index, budget, already_selected)
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
for i in range(sum(selected), self.budget):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
|
||||
current_selection = greedy_gain.argmax()
|
||||
selected[current_selection] = True
|
||||
greedy_gain[current_selection] = -np.inf
|
||||
if update_state is not None:
|
||||
update_state(np.array([current_selection]), selected, **kwargs)
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
class LazyGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[]):
|
||||
super(LazyGreedy, self).__init__(args, index, budget, already_selected)
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
|
||||
greedy_gain[selected] = -np.inf
|
||||
|
||||
for i in tqdm(range(sum(selected), self.budget)):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
best_gain = -np.inf
|
||||
last_max_element = -1
|
||||
while True:
|
||||
cur_max_element = greedy_gain.argmax()
|
||||
if last_max_element == cur_max_element:
|
||||
# Select cur_max_element into the current subset
|
||||
selected[cur_max_element] = True
|
||||
greedy_gain[cur_max_element] = -np.inf
|
||||
|
||||
if update_state is not None:
|
||||
update_state(np.array([cur_max_element]), selected, **kwargs)
|
||||
break
|
||||
new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
|
||||
greedy_gain[cur_max_element] = new_gain
|
||||
if new_gain >= best_gain:
|
||||
best_gain = new_gain
|
||||
last_max_element = cur_max_element
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
class StochasticGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[], epsilon: float=0.9):
|
||||
super(StochasticGreedy, self).__init__(args, index, budget, already_selected)
|
||||
self.epsilon = epsilon
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
sample_size = max(round(-np.log(self.epsilon) * self.n / self.budget), 1)
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
all_idx = np.arange(self.n)
|
||||
for i in range(sum(selected), self.budget):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
|
||||
# Uniformly select a subset from unselected samples with size sample_size
|
||||
subset = np.random.choice(all_idx[~selected], replace=False, size=min(sample_size, self.n - i))
|
||||
|
||||
if subset.__len__() == 0:
|
||||
break
|
||||
|
||||
greedy_gain[subset] = gain_function(subset, selected, **kwargs)
|
||||
current_selection = greedy_gain[subset].argmax()
|
||||
selected[subset[current_selection]] = True
|
||||
greedy_gain[subset[current_selection]] = -np.inf
|
||||
if update_state is not None:
|
||||
update_state(np.array([subset[current_selection]]), selected, **kwargs)
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
class ApproximateLazyGreedy(optimizer):
|
||||
def __init__(self, args, index, budget:int, already_selected=[], beta: float=0.9):
|
||||
super(ApproximateLazyGreedy, self).__init__(args, index, budget, already_selected)
|
||||
self.beta = beta
|
||||
|
||||
def select(self, gain_function, update_state=None, **kwargs):
|
||||
assert callable(gain_function)
|
||||
if update_state is not None:
|
||||
assert callable(update_state)
|
||||
selected = np.zeros(self.n, dtype=bool)
|
||||
selected[self.already_selected] = True
|
||||
|
||||
greedy_gain = np.zeros(len(self.index))
|
||||
greedy_gain[~selected] = gain_function(~selected, selected, **kwargs)
|
||||
greedy_gain[selected] = -np.inf
|
||||
|
||||
for i in range(sum(selected), self.budget):
|
||||
if i % self.args.TRAIN.PRINT_FREQ == 0:
|
||||
print("| Selecting [%3d/%3d]" % (i + 1, self.budget))
|
||||
while True:
|
||||
cur_max_element = greedy_gain.argmax()
|
||||
max_gain = greedy_gain[cur_max_element]
|
||||
|
||||
new_gain = gain_function(np.array([cur_max_element]), selected, **kwargs)[0]
|
||||
|
||||
if new_gain >= self.beta * max_gain:
|
||||
# Select cur_max_element into the current subset
|
||||
selected[cur_max_element] = True
|
||||
greedy_gain[cur_max_element] = -np.inf
|
||||
|
||||
if update_state is not None:
|
||||
update_state(np.array([cur_max_element]), selected, **kwargs)
|
||||
break
|
||||
else:
|
||||
greedy_gain[cur_max_element] = new_gain
|
||||
return self.index[selected]
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user