support chunked prefill and fix minor bug

This commit is contained in:
GeekExplorer
2026-04-14 02:47:35 +08:00
parent 9e8507ef41
commit 8d63a98c03
8 changed files with 65 additions and 53 deletions
+3 -3
View File
@@ -18,7 +18,7 @@ class LLMEngine:
config_fields = {field.name for field in fields(Config)}
config_kwargs = {k: v for k, v in kwargs.items() if k in config_fields}
config = Config(model, **config_kwargs)
Sequence.set_block_size(config.kvcache_block_size)
Sequence.block_size = config.kvcache_block_size
self.ps = []
self.events = []
ctx = mp.get_context("spawn")
@@ -48,10 +48,10 @@ class LLMEngine:
def step(self):
seqs, is_prefill = self.scheduler.schedule()
num_tokens = sum(seq.num_scheduled_tokens for seq in seqs) if is_prefill else -len(seqs)
token_ids = self.model_runner.call("run", seqs, is_prefill)
self.scheduler.postprocess(seqs, token_ids)
self.scheduler.postprocess(seqs, token_ids, is_prefill)
outputs = [(seq.seq_id, seq.completion_token_ids) for seq in seqs if seq.is_finished]
num_tokens = sum(len(seq) for seq in seqs) if is_prefill else -len(seqs)
return outputs, num_tokens
def is_finished(self):