diff --git a/nanovllm/engine/model_runner.py b/nanovllm/engine/model_runner.py index 5e6342b..c58f619 100644 --- a/nanovllm/engine/model_runner.py +++ b/nanovllm/engine/model_runner.py @@ -139,8 +139,8 @@ class ModelRunner: seqlen = len(seq) start = min(seq.num_cached_tokens, seqlen - 1) seqlen_q = seq.num_scheduled_tokens - seqlen_k = seqlen end = start + seqlen_q + seqlen_k = end input_ids.extend(seq[start:end]) positions.extend(range(start, end)) cu_seqlens_q.append(cu_seqlens_q[-1] + seqlen_q)