[WIP] Added sgDMA operator for scatter kvcache communication.

2025-12-24 23:48:52 +08:00
parent 6ec1b23982
commit cf5e7df093
9 changed files with 1061 additions and 1 deletions
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,41 @@
+from setuptools import setup, find_packages
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+import os
+
+# Get the absolute path to the project root
+project_root = os.path.dirname(os.path.abspath(__file__))
+
+setup(
+    name='nano-vllm',
+    version='0.2.0',
+    author='Zijie Tian',
+    description='A lightweight vLLM implementation with CUDA sgDMA support',
+    packages=find_packages(),
+    ext_modules=[
+        CUDAExtension(
+            name='nanovllm.comm._sgdma_cuda',
+            sources=[
+                os.path.join(project_root, 'csrc', 'sgdma.cpp'),
+                os.path.join(project_root, 'csrc', 'sgdma_kernel.cu'),
+            ],
+            extra_compile_args={
+                'cxx': ['-O3', '-std=c++17'],
+                'nvcc': ['-O3', '--use_fast_math', '-std=c++17']
+            },
+            include_dirs=[
+                os.path.join(project_root, 'csrc'),
+            ],
+        )
+    ],
+    cmdclass={
+        'build_ext': BuildExtension
+    },
+    python_requires='>=3.10,<3.13',
+    install_requires=[
+        'torch>=2.4.0',
+        'triton>=3.0.0',
+        'transformers>=4.51.0',
+        'flash-attn',
+        'xxhash',
+    ],
+)