From b760de84c5d13f29a43c4b6f6973ef49d78e3bb6 Mon Sep 17 00:00:00 2001 From: Zijie Tian Date: Wed, 28 Jan 2026 00:28:37 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20add=20context=20length=20an?= =?UTF-8?q?d=20error=20handling=20to=20profile=5Foffload.sh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add --ctx-len parameter (32k/64k/128k) for context length selection - Auto-configure max-model-len and data-dir based on context length - Add error handling to delete .nsys-rep file on test failure - Remove set -e to allow proper error handling - Update output filename format to include context length Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude Co-Authored-By: Happy --- scripts/profile_offload.sh | 54 +++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/scripts/profile_offload.sh b/scripts/profile_offload.sh index 45656c9..77f5e16 100755 --- a/scripts/profile_offload.sh +++ b/scripts/profile_offload.sh @@ -7,24 +7,25 @@ # # Options: # --policy POLICY Sparse policy name (default: full) +# --ctx-len LENGTH Context length: 32k, 64k, 128k (default: 64k) # --dataset DATASET Task name (default: niah_single_1) # --sample INDEX Sample index (default: 0) # --gpu GPU_ID GPU to use (default: 0) # --num-gpu-blocks N Number of GPU blocks/slots (default: 4) +# --block-size SIZE KV cache block size (default: 4096) # --no-offload Disable CPU offload # # Output: -# results/nsys/__.nsys-rep +# results/nsys/___blk_.nsys-rep # # Examples: # bash scripts/profile_offload.sh -# bash scripts/profile_offload.sh --policy xattn --no-offload -# bash scripts/profile_offload.sh --policy full --num-gpu-blocks 8 - -set -e +# bash scripts/profile_offload.sh --policy xattn --ctx-len 128k --no-offload +# bash scripts/profile_offload.sh --policy full --ctx-len 32k --num-gpu-blocks 8 # Default configuration POLICY="full" +CTX_LEN="64k" DATASET="niah_single_1" SAMPLE_INDEX="0" GPU_ID="0" @@ -40,6 +41,10 @@ while [[ $# -gt 0 ]]; do POLICY="$2" shift 2 ;; + --ctx-len) + CTX_LEN="$2" + shift 2 + ;; --dataset) DATASET="$2" shift 2 @@ -73,9 +78,12 @@ while [[ $# -gt 0 ]]; do echo "" echo "Options:" echo " --policy POLICY Sparse policy name (default: full)" + echo " --ctx-len LENGTH Context length: 32k, 64k, 128k (default: 64k)" + echo " --block-size SIZE KV cache block size (default: 4096)" echo " --dataset DATASET Task name (default: niah_single_1)" echo " --sample INDEX Sample index (default: 0)" echo " --gpu GPU_ID GPU to use (default: 0)" + echo " --gpu-util UTIL GPU memory utilization (default: 0.9)" echo " --no-offload Disable CPU offload" echo " --num-gpu-blocks N Number of GPU blocks/slots (default: 4)" exit 0 @@ -92,6 +100,23 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" OUTPUT_DIR="$PROJECT_ROOT/results/nsys" TEST_SCRIPT="$PROJECT_ROOT/tests/test_ruler.py" +DATA_DIR="$PROJECT_ROOT/tests/data/ruler_${CTX_LEN}" + +# Set max-model-len based on context length +case "$CTX_LEN" in + 32k) + MAX_MODEL_LEN=36000 + ;; + 64k) + MAX_MODEL_LEN=72000 + ;; + 128k) + MAX_MODEL_LEN=144000 + ;; + *) + MAX_MODEL_LEN=72000 + ;; +esac # Create output directory if needed mkdir -p "$OUTPUT_DIR" @@ -103,18 +128,20 @@ if [ -n "$ENABLE_OFFLOAD" ]; then else OFFLOAD_TAG="gpuonly" fi -OUTPUT_FILE="$OUTPUT_DIR/${POLICY}_${OFFLOAD_TAG}_blk${BLOCK_SIZE}_${TIMESTAMP}" +OUTPUT_FILE="$OUTPUT_DIR/${POLICY}_${OFFLOAD_TAG}_${CTX_LEN}_blk${BLOCK_SIZE}_${TIMESTAMP}" echo "============================================================" echo "NVIDIA Nsight Systems Profiling" echo "============================================================" echo "Policy: $POLICY" echo "Offload: $OFFLOAD_TAG" +echo "Context: $CTX_LEN" echo "Block Size: $BLOCK_SIZE" echo "Dataset: $DATASET" echo "Sample: $SAMPLE_INDEX" echo "GPU: $GPU_ID" echo "GPU Blocks: $NUM_GPU_BLOCKS" +echo "Data Dir: $DATA_DIR" echo "Output file: $OUTPUT_FILE.nsys-rep" echo "" @@ -145,20 +172,35 @@ if [ -n "$POLICY_ENUM" ] && [ "$POLICY_ENUM" != "full" ]; then SPARSE_POLICY_ARG="--sparse-policy $POLICY_ENUM" fi +# Run nsys profile and capture exit code CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \ nsys profile \ --trace=cuda,nvtx \ --force-overwrite=true \ --output="$OUTPUT_FILE" \ python "$TEST_SCRIPT" \ + --data-dir "$DATA_DIR" \ --datasets "$DATASET" \ --sample-indices "$SAMPLE_INDEX" \ --num-gpu-blocks "$NUM_GPU_BLOCKS" \ --block-size "$BLOCK_SIZE" \ + --max-model-len "$MAX_MODEL_LEN" \ --gpu-utilization "$GPU_UTIL" \ $ENABLE_OFFLOAD \ $SPARSE_POLICY_ARG \ --quiet +EXIT_CODE=$? + +# If test failed, delete the output file +if [ $EXIT_CODE -ne 0 ]; then + echo "" + echo "============================================================" + echo "Test FAILED! Cleaning up..." + echo "============================================================" + rm -f "$OUTPUT_FILE.nsys-rep" + echo "Deleted: $OUTPUT_FILE.nsys-rep" + exit $EXIT_CODE +fi echo "" echo "============================================================"