diff --git a/nanovllm/engine/llm_engine.py b/nanovllm/engine/llm_engine.py index 6e64afd..2d43b50 100644 --- a/nanovllm/engine/llm_engine.py +++ b/nanovllm/engine/llm_engine.py @@ -15,8 +15,8 @@ from nanovllm.engine.model_runner import ModelRunner class LLMEngine: def __init__(self, model, **kwargs): - config_fileds = {field.name for field in fields(Config)} - config_kwargs = {k: v for k, v in kwargs.items() if k in config_fileds} + config_fields = {field.name for field in fields(Config)} + config_kwargs = {k: v for k, v in kwargs.items() if k in config_fields} config = Config(model, **config_kwargs) self.ps = [] self.events = [] diff --git a/nanovllm/layers/sampler.py b/nanovllm/layers/sampler.py index 88e59ee..e4b9816 100644 --- a/nanovllm/layers/sampler.py +++ b/nanovllm/layers/sampler.py @@ -13,5 +13,6 @@ class Sampler(nn.Module): logits.div_(temperatures.unsqueeze(dim=1)) probs = torch.softmax(logits, dim=-1, dtype=torch.float) # logprobs = torch.log_softmax(logits, dim=-1, dtype=torch.float) - sample_tokens = probs.div_(torch.empty_like(probs).exponential_(1)).argmax(dim=-1) + epsilon = 1e-10 + sample_tokens = probs.div_(torch.empty_like(probs).exponential_(1) + epsilon).argmax(dim=-1) return torch.where(temperatures == 0, greedy_tokens, sample_tokens)