From 9e8507ef412efe9d921ecc70d65bcc2fc3dd18eb Mon Sep 17 00:00:00 2001 From: GeekExplorer Date: Mon, 13 Apr 2026 22:09:46 +0800 Subject: [PATCH] minor simplify --- nanovllm/engine/block_manager.py | 2 +- nanovllm/engine/llm_engine.py | 26 +++++++++++--------------- nanovllm/engine/model_runner.py | 4 +--- nanovllm/layers/activation.py | 3 --- nanovllm/layers/linear.py | 3 +-- nanovllm/layers/sampler.py | 3 --- 6 files changed, 14 insertions(+), 27 deletions(-) diff --git a/nanovllm/engine/block_manager.py b/nanovllm/engine/block_manager.py index 763aae2..1ad00f6 100644 --- a/nanovllm/engine/block_manager.py +++ b/nanovllm/engine/block_manager.py @@ -46,7 +46,7 @@ class BlockManager: block.reset() self.free_block_ids.remove(block_id) self.used_block_ids.add(block_id) - return self.blocks[block_id] + return block def _deallocate_block(self, block_id: int) -> Block: assert self.blocks[block_id].ref_count == 0 diff --git a/nanovllm/engine/llm_engine.py b/nanovllm/engine/llm_engine.py index 2f97afe..45e75aa 100644 --- a/nanovllm/engine/llm_engine.py +++ b/nanovllm/engine/llm_engine.py @@ -63,8 +63,7 @@ class LLMEngine: sampling_params: SamplingParams | list[SamplingParams], use_tqdm: bool = True, ) -> list[str]: - if use_tqdm: - pbar = tqdm(total=len(prompts), desc="Generating", dynamic_ncols=True) + pbar = tqdm(total=len(prompts), desc="Generating", dynamic_ncols=True, disable=not use_tqdm) if not isinstance(sampling_params, list): sampling_params = [sampling_params] * len(prompts) for prompt, sp in zip(prompts, sampling_params): @@ -74,21 +73,18 @@ class LLMEngine: while not self.is_finished(): t = perf_counter() output, num_tokens = self.step() - if use_tqdm: - if num_tokens > 0: - prefill_throughput = num_tokens / (perf_counter() - t) - else: - decode_throughput = -num_tokens / (perf_counter() - t) - pbar.set_postfix({ - "Prefill": f"{int(prefill_throughput)}tok/s", - "Decode": f"{int(decode_throughput)}tok/s", - }) + if num_tokens > 0: + prefill_throughput = num_tokens / (perf_counter() - t) + else: + decode_throughput = -num_tokens / (perf_counter() - t) + pbar.set_postfix({ + "Prefill": f"{int(prefill_throughput)}tok/s", + "Decode": f"{int(decode_throughput)}tok/s", + }) for seq_id, token_ids in output: outputs[seq_id] = token_ids - if use_tqdm: - pbar.update(1) + pbar.update(1) + pbar.close() outputs = [outputs[seq_id] for seq_id in sorted(outputs.keys())] outputs = [{"text": self.tokenizer.decode(token_ids), "token_ids": token_ids} for token_ids in outputs] - if use_tqdm: - pbar.close() return outputs diff --git a/nanovllm/engine/model_runner.py b/nanovllm/engine/model_runner.py index f66c38e..e70a193 100644 --- a/nanovllm/engine/model_runner.py +++ b/nanovllm/engine/model_runner.py @@ -180,9 +180,7 @@ class ModelRunner: return input_ids, positions def prepare_sample(self, seqs: list[Sequence]): - temperatures = [] - for seq in seqs: - temperatures.append(seq.temperature) + temperatures = [seq.temperature for seq in seqs] temperatures = torch.tensor(temperatures, dtype=torch.float32, pin_memory=True).cuda(non_blocking=True) return temperatures diff --git a/nanovllm/layers/activation.py b/nanovllm/layers/activation.py index 041ee20..06cced3 100755 --- a/nanovllm/layers/activation.py +++ b/nanovllm/layers/activation.py @@ -5,9 +5,6 @@ import torch.nn.functional as F class SiluAndMul(nn.Module): - def __init__(self): - super().__init__() - @torch.compile def forward(self, x: torch.Tensor) -> torch.Tensor: x, y = x.chunk(2, -1) diff --git a/nanovllm/layers/linear.py b/nanovllm/layers/linear.py index 5e54baa..d9e8158 100755 --- a/nanovllm/layers/linear.py +++ b/nanovllm/layers/linear.py @@ -141,8 +141,7 @@ class RowParallelLinear(LinearBase): def weight_loader(self, param: nn.Parameter, loaded_weight: torch.Tensor): param_data = param.data - if param_data.dim() == 1: - # bias is not sharded in RowParallelLinear + if param_data.ndim == 1: param_data.copy_(loaded_weight) return shard_size = param_data.size(self.tp_dim) diff --git a/nanovllm/layers/sampler.py b/nanovllm/layers/sampler.py index b101018..41838ac 100644 --- a/nanovllm/layers/sampler.py +++ b/nanovllm/layers/sampler.py @@ -4,9 +4,6 @@ from torch import nn class Sampler(nn.Module): - def __init__(self): - super().__init__() - @torch.compile def forward(self, logits: torch.Tensor, temperatures: torch.Tensor): logits = logits.float().div_(temperatures.unsqueeze(dim=1))