nano-vllm/setup.py

from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CUDAExtension

setup(
    name='nano-vllm',
    version='0.2.0',
    author='Zijie Tian',
    description='A lightweight vLLM implementation with CUDA sgDMA support',
    packages=find_packages(),
    ext_modules=[
        CUDAExtension(
            name='nanovllm.comm._sgdma_cuda',
            sources=[
                'csrc/sgdma.cpp',
                'csrc/sgdma_kernel.cu',
            ],
            extra_compile_args={
                'cxx': ['-O3', '-std=c++17'],
                'nvcc': ['-O3', '--use_fast_math', '-std=c++17']
            },
            include_dirs=[
                'csrc',
            ],
        )
    ],
    cmdclass={
        'build_ext': BuildExtension
    },
    python_requires='>=3.10,<3.13',
    install_requires=[
        'torch>=2.4.0',
        'triton>=3.0.0',
        'transformers>=4.51.0',
        'flash-attn',
        'xxhash',
    ],
)