fix chunked prefill bugs and refactor

This commit is contained in:
GeekExplorer
2026-04-26 02:53:06 +08:00
parent 44a51afc8a
commit f64d821c20
4 changed files with 68 additions and 62 deletions
+1 -2
View File
@@ -136,8 +136,7 @@ class ModelRunner:
slot_mapping = []
block_tables = None
for seq in seqs:
seqlen = len(seq)
start = min(seq.num_cached_tokens, seqlen - 1)
start = seq.num_cached_tokens
seqlen_q = seq.num_scheduled_tokens
end = start + seqlen_q
seqlen_k = end