nano-vllm/nanovllm/ops/__init__.py

"""
Operators module for nano-vLLM.

This module contains low-level attention operators and kernels.
"""

from nanovllm.ops.chunked_attention import (
    flash_attn_with_lse,
    merge_attention_outputs,
    chunked_attention_varlen,
    ChunkedPrefillState,
)

from nanovllm.ops.xattn import (
    xattn_estimate,
    xattn_estimate_chunked,
    flat_group_gemm_fuse_reshape,
    softmax_fuse_block_sum,
    find_blocks_chunked,
    create_causal_mask,
    compute_sparsity,
)

__all__ = [
    # chunked_attention
    "flash_attn_with_lse",
    "merge_attention_outputs",
    "chunked_attention_varlen",
    "ChunkedPrefillState",
    # xattn
    "xattn_estimate",
    "xattn_estimate_chunked",
    "flat_group_gemm_fuse_reshape",
    "softmax_fuse_block_sum",
    "find_blocks_chunked",
    "create_causal_mask",
    "compute_sparsity",
]