init commit

This commit is contained in:
GeeeekExplorer
2025-06-10 00:23:23 +08:00
commit a5a4909e6a
26 changed files with 1677 additions and 0 deletions

28
nanovllm/utils/context.py Normal file
View File

@@ -0,0 +1,28 @@
from contextlib import contextmanager
from dataclasses import dataclass
import torch
@dataclass
class Context:
is_prefill: bool = False
cu_seqlens_q: torch.Tensor | None = None
cu_seqlens_k: torch.Tensor | None = None
max_seqlen_q: int = 0
max_seqlen_k: int = 0
slot_mapping: torch.Tensor | None = None
context_lens: torch.Tensor | None = None
block_tables: torch.Tensor | None = None
_CONTEXT = Context()
def get_context():
return _CONTEXT
def set_context(is_prefill, cu_seqlens_q=None, cu_seqlens_k=None, max_seqlen_q=0, max_seqlen_k=0, slot_mapping=None, context_lens=None, block_tables=None, ):
global _CONTEXT
_CONTEXT = Context(is_prefill, cu_seqlens_q, cu_seqlens_k, max_seqlen_q, max_seqlen_k, slot_mapping, context_lens, block_tables)
def reset_context():
global _CONTEXT
_CONTEXT = Context()

14
nanovllm/utils/memory.py Normal file
View File

@@ -0,0 +1,14 @@
import os
import subprocess
import torch
def get_gpu_memory(device_id: int = 0):
torch.cuda.synchronize()
result = subprocess.check_output(
['nvidia-smi', '-i', str(device_id), '--query-gpu=memory.total,memory.used,memory.free', '--format=csv,nounits,noheader'],
encoding='utf-8'
)
total_memory, used_memory, free_memory = [int(x) for x in result.strip().split(', ')]
return total_memory, used_memory, free_memory

31
nanovllm/utils/timer.py Normal file
View File

@@ -0,0 +1,31 @@
from contextlib import contextmanager
from collections import defaultdict
import torch
class CUDATimer:
def __init__(self):
self.events = defaultdict(list)
@contextmanager
def record(self, name, enabled=True):
if not enabled:
yield
else:
start, end = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
self.events[name].append((start, end))
start.record()
yield
end.record()
def log(self):
torch.cuda.synchronize()
ret = []
for name, events in self.events.items():
total = 0
count = len(self.events)
for start, end in events:
total += start.elapsed_time(end)
ret.append(f"{name} {total:.2f}ms/{count}times")
return ", ".join(ret)