Merge pull request #213 from Anai-Guo/fix/prepare-prefill-seqlen-k-chunked-prefill
fix(model_runner): correct seqlen_k to chunk boundary in prepare_prefill
This commit is contained in:
@@ -139,8 +139,8 @@ class ModelRunner:
|
|||||||
seqlen = len(seq)
|
seqlen = len(seq)
|
||||||
start = min(seq.num_cached_tokens, seqlen - 1)
|
start = min(seq.num_cached_tokens, seqlen - 1)
|
||||||
seqlen_q = seq.num_scheduled_tokens
|
seqlen_q = seq.num_scheduled_tokens
|
||||||
seqlen_k = seqlen
|
|
||||||
end = start + seqlen_q
|
end = start + seqlen_q
|
||||||
|
seqlen_k = end
|
||||||
input_ids.extend(seq[start:end])
|
input_ids.extend(seq[start:end])
|
||||||
positions.extend(range(start, end))
|
positions.extend(range(start, end))
|
||||||
cu_seqlens_q.append(cu_seqlens_q[-1] + seqlen_q)
|
cu_seqlens_q.append(cu_seqlens_q[-1] + seqlen_q)
|
||||||
|
|||||||
Reference in New Issue
Block a user