diff --git a/bench.py b/bench.py
index 5789754..0c4825e 100644
--- a/bench.py
+++ b/bench.py
@@ -15,6 +15,6 @@ prompt_token_ids = torch.randint(0, 10240, (batch_size, seq_len)).tolist()
 sampling_params = SamplingParams(temperature=0.6, ignore_eos=True, max_tokens=max_tokens)
 
 t = time.time()
-completions = llm.generate(prompt_token_ids, sampling_params)
+llm.generate(prompt_token_ids, sampling_params)
 throughput = batch_size * max_tokens / (time.time() - t)
 print(f"Throughput: {throughput: .2f}")
diff --git a/nanovllm/engine/llm_engine.py b/nanovllm/engine/llm_engine.py
index 0194e6a..8464885 100644
--- a/nanovllm/engine/llm_engine.py
+++ b/nanovllm/engine/llm_engine.py
@@ -52,7 +52,7 @@ class LLMEngine:
                 desc="Generating",
                 dynamic_ncols=True,
             )
-        if not isinstance(SamplingParams, list):
+        if not isinstance(sampling_params, list):
             sampling_params = [sampling_params] * len(prompts)
         for prompt, sp in zip(prompts, sampling_params):
             self.add_request(prompt, sp)
diff --git a/nanovllm/engine/model_runner.py b/nanovllm/engine/model_runner.py
index b803278..c95d120 100644
--- a/nanovllm/engine/model_runner.py
+++ b/nanovllm/engine/model_runner.py
@@ -170,7 +170,7 @@ class ModelRunner:
         context_lens = torch.zeros(max_bs, dtype=torch.int32)
         block_tables = torch.zeros(max_bs, max_num_blocks, dtype=torch.int32)
         outputs = torch.zeros(max_bs, hf_config.hidden_size)
-        self.graph_bs = [1, 2, 4, 8, 16] + list(range(16, max_bs + 1, 16))
+        self.graph_bs = [1, 2, 4, 8] + list(range(16, max_bs + 1, 16))
         self.graphs = {}
         self.graph_pool = None
 
diff --git a/nanovllm/utils/timer.py b/nanovllm/utils/timer.py
deleted file mode 100644
index 0e8b1bc..0000000
--- a/nanovllm/utils/timer.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from contextlib import contextmanager
-from collections import defaultdict
-import torch
-
-
-class CUDATimer:
-
-    def __init__(self):
-        self.events = defaultdict(list)
-
-    @contextmanager
-    def record(self, name, enabled=True):
-        if not enabled:
-            yield
-        else:
-            start, end = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
-            self.events[name].append((start, end))
-            start.record()
-            yield
-            end.record()
-
-    def log(self):
-        torch.cuda.synchronize()
-        ret = []
-        for name, events in self.events.items():
-            total = 0
-            count = len(self.events)
-            for start, end in events:
-                total += start.elapsed_time(end)
-            ret.append(f"{name} {total:.2f}ms/{count}times")
-        return ", ".join(ret)
diff --git a/requirements.txt b/requirements.txt
index b1f4fd9..402eb01 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
 torch
 triton
 transformers
-cmake
-ninja
\ No newline at end of file
+flash-attn
\ No newline at end of file