fix chunked prefill bugs and refactor
This commit is contained in:
@@ -136,8 +136,7 @@ class ModelRunner:
|
||||
slot_mapping = []
|
||||
block_tables = None
|
||||
for seq in seqs:
|
||||
seqlen = len(seq)
|
||||
start = min(seq.num_cached_tokens, seqlen - 1)
|
||||
start = seq.num_cached_tokens
|
||||
seqlen_q = seq.num_scheduled_tokens
|
||||
end = start + seqlen_q
|
||||
seqlen_k = end
|
||||
|
||||
Reference in New Issue
Block a user