fix cache hit
This commit is contained in:
@@ -40,15 +40,15 @@ class BlockManager:
|
|||||||
h.update(np.array(token_ids).tobytes())
|
h.update(np.array(token_ids).tobytes())
|
||||||
return h.intdigest()
|
return h.intdigest()
|
||||||
|
|
||||||
def _allocate_block(self, block_id: int) -> Block:
|
def _allocate_block(self) -> int:
|
||||||
|
block_id = self.free_block_ids.popleft()
|
||||||
block = self.blocks[block_id]
|
block = self.blocks[block_id]
|
||||||
assert block.ref_count == 0
|
assert block.ref_count == 0
|
||||||
if block.hash != -1 and self.hash_to_block_id.get(block.hash) == block_id:
|
if block.hash != -1 and self.hash_to_block_id.get(block.hash) == block_id:
|
||||||
del self.hash_to_block_id[block.hash]
|
del self.hash_to_block_id[block.hash]
|
||||||
block.reset()
|
block.reset()
|
||||||
self.free_block_ids.remove(block_id)
|
|
||||||
self.used_block_ids.add(block_id)
|
self.used_block_ids.add(block_id)
|
||||||
return block
|
return block_id
|
||||||
|
|
||||||
def _deallocate_block(self, block_id: int):
|
def _deallocate_block(self, block_id: int):
|
||||||
assert self.blocks[block_id].ref_count == 0
|
assert self.blocks[block_id].ref_count == 0
|
||||||
@@ -83,12 +83,12 @@ class BlockManager:
|
|||||||
if block_id in self.used_block_ids:
|
if block_id in self.used_block_ids:
|
||||||
block.ref_count += 1
|
block.ref_count += 1
|
||||||
else:
|
else:
|
||||||
self._allocate_block(block_id)
|
block.ref_count = 1
|
||||||
|
self.free_block_ids.remove(block_id)
|
||||||
|
self.used_block_ids.add(block_id)
|
||||||
seq.block_table.append(block_id)
|
seq.block_table.append(block_id)
|
||||||
for i in range(num_cached_blocks, seq.num_blocks):
|
for i in range(num_cached_blocks, seq.num_blocks):
|
||||||
block_id = self.free_block_ids[0]
|
seq.block_table.append(self._allocate_block())
|
||||||
self._allocate_block(block_id)
|
|
||||||
seq.block_table.append(block_id)
|
|
||||||
seq.num_cached_tokens = num_cached_blocks * self.block_size
|
seq.num_cached_tokens = num_cached_blocks * self.block_size
|
||||||
|
|
||||||
def deallocate(self, seq: Sequence):
|
def deallocate(self, seq: Sequence):
|
||||||
@@ -105,9 +105,7 @@ class BlockManager:
|
|||||||
|
|
||||||
def may_append(self, seq: Sequence):
|
def may_append(self, seq: Sequence):
|
||||||
if len(seq) % self.block_size == 1:
|
if len(seq) % self.block_size == 1:
|
||||||
block_id = self.free_block_ids[0]
|
seq.block_table.append(self._allocate_block())
|
||||||
self._allocate_block(block_id)
|
|
||||||
seq.block_table.append(block_id)
|
|
||||||
|
|
||||||
def hash_blocks(self, seq: Sequence):
|
def hash_blocks(self, seq: Sequence):
|
||||||
start = seq.num_cached_tokens // self.block_size
|
start = seq.num_cached_tokens // self.block_size
|
||||||
|
|||||||
Reference in New Issue
Block a user