diff --git a/nanovllm/engine/block_manager.py b/nanovllm/engine/block_manager.py index 0f1789a..a48989c 100644 --- a/nanovllm/engine/block_manager.py +++ b/nanovllm/engine/block_manager.py @@ -40,15 +40,15 @@ class BlockManager: h.update(np.array(token_ids).tobytes()) return h.intdigest() - def _allocate_block(self, block_id: int) -> Block: + def _allocate_block(self) -> int: + block_id = self.free_block_ids.popleft() block = self.blocks[block_id] assert block.ref_count == 0 if block.hash != -1 and self.hash_to_block_id.get(block.hash) == block_id: del self.hash_to_block_id[block.hash] block.reset() - self.free_block_ids.remove(block_id) self.used_block_ids.add(block_id) - return block + return block_id def _deallocate_block(self, block_id: int): assert self.blocks[block_id].ref_count == 0 @@ -83,12 +83,12 @@ class BlockManager: if block_id in self.used_block_ids: block.ref_count += 1 else: - self._allocate_block(block_id) + block.ref_count = 1 + self.free_block_ids.remove(block_id) + self.used_block_ids.add(block_id) seq.block_table.append(block_id) for i in range(num_cached_blocks, seq.num_blocks): - block_id = self.free_block_ids[0] - self._allocate_block(block_id) - seq.block_table.append(block_id) + seq.block_table.append(self._allocate_block()) seq.num_cached_tokens = num_cached_blocks * self.block_size def deallocate(self, seq: Sequence): @@ -105,9 +105,7 @@ class BlockManager: def may_append(self, seq: Sequence): if len(seq) % self.block_size == 1: - block_id = self.free_block_ids[0] - self._allocate_block(block_id) - seq.block_table.append(block_id) + seq.block_table.append(self._allocate_block()) def hash_blocks(self, seq: Sequence): start = seq.num_cached_tokens // self.block_size