Compare commits

2 Commits

Author SHA1 Message Date
Zijie Tian
b760de84c5 feat: add context length and error handling to profile_offload.sh
- Add --ctx-len parameter (32k/64k/128k) for context length selection
- Auto-configure max-model-len and data-dir based on context length
- Add error handling to delete .nsys-rep file on test failure
- Remove set -e to allow proper error handling
- Update output filename format to include context length

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
2026-01-28 00:28:37 +08:00
Zijie Tian
f81b5ae8a9 feat: enhance profile_offload.sh with policy, block-size parameters
- Add --policy parameter for sparse attention policy selection (full/xattn)
- Add --block-size parameter (default 4096) for KV cache block size
- Add --gpu-util parameter for GPU memory utilization control
- Improve output filename format: <policy>_<gpuonly|offload>_blk<size>_<timestamp>
- Map user-friendly policy names to internal enum (xattn -> XATTN_BSA)

Generated with [Claude Code](https://claude.ai/code)
via [Happy](https://happy.engineering)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Happy <yesreply@happy.engineering>
2026-01-27 23:23:20 +08:00

View File

@@ -6,33 +6,45 @@
# bash scripts/profile_offload.sh [options] # bash scripts/profile_offload.sh [options]
# #
# Options: # Options:
# --policy POLICY Sparse policy name (default: full)
# --ctx-len LENGTH Context length: 32k, 64k, 128k (default: 64k)
# --dataset DATASET Task name (default: niah_single_1) # --dataset DATASET Task name (default: niah_single_1)
# --sample INDEX Sample index (default: 0) # --sample INDEX Sample index (default: 0)
# --gpu GPU_ID GPU to use (default: 0) # --gpu GPU_ID GPU to use (default: 0)
# --num-gpu-blocks N Number of GPU blocks/slots (default: 4) # --num-gpu-blocks N Number of GPU blocks/slots (default: 4)
# --block-size SIZE KV cache block size (default: 4096)
# --no-offload Disable CPU offload # --no-offload Disable CPU offload
# #
# Output: # Output:
# results/nsys/ruler_<dataset>_sample<index>_<timestamp>.nsys-rep # results/nsys/<policy>_<gpuonly|offload>_<ctx-len>_blk<size>_<timestamp>.nsys-rep
# #
# Examples: # Examples:
# bash scripts/profile_offload.sh # bash scripts/profile_offload.sh
# bash scripts/profile_offload.sh --dataset niah_single_1 --sample 5 # bash scripts/profile_offload.sh --policy xattn --ctx-len 128k --no-offload
# bash scripts/profile_offload.sh --gpu 1 --no-offload # bash scripts/profile_offload.sh --policy full --ctx-len 32k --num-gpu-blocks 8
# bash scripts/profile_offload.sh --num-gpu-blocks 8
set -e
# Default configuration # Default configuration
POLICY="full"
CTX_LEN="64k"
DATASET="niah_single_1" DATASET="niah_single_1"
SAMPLE_INDEX="0" SAMPLE_INDEX="0"
GPU_ID="0" GPU_ID="0"
NUM_GPU_BLOCKS="4" NUM_GPU_BLOCKS="4"
BLOCK_SIZE="4096"
GPU_UTIL="0.9"
ENABLE_OFFLOAD="--enable-offload" ENABLE_OFFLOAD="--enable-offload"
# Parse arguments # Parse arguments
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
--policy)
POLICY="$2"
shift 2
;;
--ctx-len)
CTX_LEN="$2"
shift 2
;;
--dataset) --dataset)
DATASET="$2" DATASET="$2"
shift 2 shift 2
@@ -53,13 +65,25 @@ while [[ $# -gt 0 ]]; do
NUM_GPU_BLOCKS="$2" NUM_GPU_BLOCKS="$2"
shift 2 shift 2
;; ;;
--gpu-util)
GPU_UTIL="$2"
shift 2
;;
--block-size)
BLOCK_SIZE="$2"
shift 2
;;
-h|--help) -h|--help)
echo "Usage: $0 [options]" echo "Usage: $0 [options]"
echo "" echo ""
echo "Options:" echo "Options:"
echo " --policy POLICY Sparse policy name (default: full)"
echo " --ctx-len LENGTH Context length: 32k, 64k, 128k (default: 64k)"
echo " --block-size SIZE KV cache block size (default: 4096)"
echo " --dataset DATASET Task name (default: niah_single_1)" echo " --dataset DATASET Task name (default: niah_single_1)"
echo " --sample INDEX Sample index (default: 0)" echo " --sample INDEX Sample index (default: 0)"
echo " --gpu GPU_ID GPU to use (default: 0)" echo " --gpu GPU_ID GPU to use (default: 0)"
echo " --gpu-util UTIL GPU memory utilization (default: 0.9)"
echo " --no-offload Disable CPU offload" echo " --no-offload Disable CPU offload"
echo " --num-gpu-blocks N Number of GPU blocks/slots (default: 4)" echo " --num-gpu-blocks N Number of GPU blocks/slots (default: 4)"
exit 0 exit 0
@@ -76,27 +100,48 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
OUTPUT_DIR="$PROJECT_ROOT/results/nsys" OUTPUT_DIR="$PROJECT_ROOT/results/nsys"
TEST_SCRIPT="$PROJECT_ROOT/tests/test_ruler.py" TEST_SCRIPT="$PROJECT_ROOT/tests/test_ruler.py"
DATA_DIR="$PROJECT_ROOT/tests/data/ruler_${CTX_LEN}"
# Set max-model-len based on context length
case "$CTX_LEN" in
32k)
MAX_MODEL_LEN=36000
;;
64k)
MAX_MODEL_LEN=72000
;;
128k)
MAX_MODEL_LEN=144000
;;
*)
MAX_MODEL_LEN=72000
;;
esac
# Create output directory if needed # Create output directory if needed
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
# Generate timestamp for unique filename # Generate timestamp for unique filename
TIMESTAMP=$(date +%Y%m%d_%H%M%S) TIMESTAMP=$(date +%Y%m%d_%H%M%S)
OFFLOAD_SUFFIX=""
if [ -n "$ENABLE_OFFLOAD" ]; then if [ -n "$ENABLE_OFFLOAD" ]; then
OFFLOAD_SUFFIX="_offload_${NUM_GPU_BLOCKS}slots" OFFLOAD_TAG="offload"
else
OFFLOAD_TAG="gpuonly"
fi fi
OUTPUT_FILE="$OUTPUT_DIR/ruler_${DATASET}_sample${SAMPLE_INDEX}${OFFLOAD_SUFFIX}_${TIMESTAMP}" OUTPUT_FILE="$OUTPUT_DIR/${POLICY}_${OFFLOAD_TAG}_${CTX_LEN}_blk${BLOCK_SIZE}_${TIMESTAMP}"
echo "============================================================" echo "============================================================"
echo "NVIDIA Nsight Systems Profiling" echo "NVIDIA Nsight Systems Profiling"
echo "============================================================" echo "============================================================"
echo "Test script: $TEST_SCRIPT" echo "Policy: $POLICY"
echo "Offload: $OFFLOAD_TAG"
echo "Context: $CTX_LEN"
echo "Block Size: $BLOCK_SIZE"
echo "Dataset: $DATASET" echo "Dataset: $DATASET"
echo "Sample: $SAMPLE_INDEX" echo "Sample: $SAMPLE_INDEX"
echo "GPU: $GPU_ID" echo "GPU: $GPU_ID"
echo "GPU Blocks: $NUM_GPU_BLOCKS" echo "GPU Blocks: $NUM_GPU_BLOCKS"
echo "Offload: ${ENABLE_OFFLOAD:-disabled}" echo "Data Dir: $DATA_DIR"
echo "Output file: $OUTPUT_FILE.nsys-rep" echo "Output file: $OUTPUT_FILE.nsys-rep"
echo "" echo ""
@@ -110,17 +155,52 @@ echo ""
echo "Running nsys profile..." echo "Running nsys profile..."
echo "" echo ""
# Map policy name to internal enum name
# User-friendly name -> SparsePolicyType enum name
case "$POLICY" in
xattn)
POLICY_ENUM="XATTN_BSA"
;;
*)
POLICY_ENUM="$POLICY"
;;
esac
# Build sparse policy argument
SPARSE_POLICY_ARG=""
if [ -n "$POLICY_ENUM" ] && [ "$POLICY_ENUM" != "full" ]; then
SPARSE_POLICY_ARG="--sparse-policy $POLICY_ENUM"
fi
# Run nsys profile and capture exit code
CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \ CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \
nsys profile \ nsys profile \
--trace=cuda,nvtx \ --trace=cuda,nvtx \
--force-overwrite=true \ --force-overwrite=true \
--output="$OUTPUT_FILE" \ --output="$OUTPUT_FILE" \
python "$TEST_SCRIPT" \ python "$TEST_SCRIPT" \
--data-dir "$DATA_DIR" \
--datasets "$DATASET" \ --datasets "$DATASET" \
--sample-indices "$SAMPLE_INDEX" \ --sample-indices "$SAMPLE_INDEX" \
--num-gpu-blocks "$NUM_GPU_BLOCKS" \ --num-gpu-blocks "$NUM_GPU_BLOCKS" \
--block-size "$BLOCK_SIZE" \
--max-model-len "$MAX_MODEL_LEN" \
--gpu-utilization "$GPU_UTIL" \
$ENABLE_OFFLOAD \ $ENABLE_OFFLOAD \
$SPARSE_POLICY_ARG \
--quiet --quiet
EXIT_CODE=$?
# If test failed, delete the output file
if [ $EXIT_CODE -ne 0 ]; then
echo ""
echo "============================================================"
echo "Test FAILED! Cleaning up..."
echo "============================================================"
rm -f "$OUTPUT_FILE.nsys-rep"
echo "Deleted: $OUTPUT_FILE.nsys-rep"
exit $EXIT_CODE
fi
echo "" echo ""
echo "============================================================" echo "============================================================"