✨ feat: add context length and error handling to profile_offload.sh
- Add --ctx-len parameter (32k/64k/128k) for context length selection - Auto-configure max-model-len and data-dir based on context length - Add error handling to delete .nsys-rep file on test failure - Remove set -e to allow proper error handling - Update output filename format to include context length Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -7,24 +7,25 @@
|
||||
#
|
||||
# Options:
|
||||
# --policy POLICY Sparse policy name (default: full)
|
||||
# --ctx-len LENGTH Context length: 32k, 64k, 128k (default: 64k)
|
||||
# --dataset DATASET Task name (default: niah_single_1)
|
||||
# --sample INDEX Sample index (default: 0)
|
||||
# --gpu GPU_ID GPU to use (default: 0)
|
||||
# --num-gpu-blocks N Number of GPU blocks/slots (default: 4)
|
||||
# --block-size SIZE KV cache block size (default: 4096)
|
||||
# --no-offload Disable CPU offload
|
||||
#
|
||||
# Output:
|
||||
# results/nsys/<policy>_<gpuonly|offload>_<timestamp>.nsys-rep
|
||||
# results/nsys/<policy>_<gpuonly|offload>_<ctx-len>_blk<size>_<timestamp>.nsys-rep
|
||||
#
|
||||
# Examples:
|
||||
# bash scripts/profile_offload.sh
|
||||
# bash scripts/profile_offload.sh --policy xattn --no-offload
|
||||
# bash scripts/profile_offload.sh --policy full --num-gpu-blocks 8
|
||||
|
||||
set -e
|
||||
# bash scripts/profile_offload.sh --policy xattn --ctx-len 128k --no-offload
|
||||
# bash scripts/profile_offload.sh --policy full --ctx-len 32k --num-gpu-blocks 8
|
||||
|
||||
# Default configuration
|
||||
POLICY="full"
|
||||
CTX_LEN="64k"
|
||||
DATASET="niah_single_1"
|
||||
SAMPLE_INDEX="0"
|
||||
GPU_ID="0"
|
||||
@@ -40,6 +41,10 @@ while [[ $# -gt 0 ]]; do
|
||||
POLICY="$2"
|
||||
shift 2
|
||||
;;
|
||||
--ctx-len)
|
||||
CTX_LEN="$2"
|
||||
shift 2
|
||||
;;
|
||||
--dataset)
|
||||
DATASET="$2"
|
||||
shift 2
|
||||
@@ -73,9 +78,12 @@ while [[ $# -gt 0 ]]; do
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --policy POLICY Sparse policy name (default: full)"
|
||||
echo " --ctx-len LENGTH Context length: 32k, 64k, 128k (default: 64k)"
|
||||
echo " --block-size SIZE KV cache block size (default: 4096)"
|
||||
echo " --dataset DATASET Task name (default: niah_single_1)"
|
||||
echo " --sample INDEX Sample index (default: 0)"
|
||||
echo " --gpu GPU_ID GPU to use (default: 0)"
|
||||
echo " --gpu-util UTIL GPU memory utilization (default: 0.9)"
|
||||
echo " --no-offload Disable CPU offload"
|
||||
echo " --num-gpu-blocks N Number of GPU blocks/slots (default: 4)"
|
||||
exit 0
|
||||
@@ -92,6 +100,23 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
|
||||
OUTPUT_DIR="$PROJECT_ROOT/results/nsys"
|
||||
TEST_SCRIPT="$PROJECT_ROOT/tests/test_ruler.py"
|
||||
DATA_DIR="$PROJECT_ROOT/tests/data/ruler_${CTX_LEN}"
|
||||
|
||||
# Set max-model-len based on context length
|
||||
case "$CTX_LEN" in
|
||||
32k)
|
||||
MAX_MODEL_LEN=36000
|
||||
;;
|
||||
64k)
|
||||
MAX_MODEL_LEN=72000
|
||||
;;
|
||||
128k)
|
||||
MAX_MODEL_LEN=144000
|
||||
;;
|
||||
*)
|
||||
MAX_MODEL_LEN=72000
|
||||
;;
|
||||
esac
|
||||
|
||||
# Create output directory if needed
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
@@ -103,18 +128,20 @@ if [ -n "$ENABLE_OFFLOAD" ]; then
|
||||
else
|
||||
OFFLOAD_TAG="gpuonly"
|
||||
fi
|
||||
OUTPUT_FILE="$OUTPUT_DIR/${POLICY}_${OFFLOAD_TAG}_blk${BLOCK_SIZE}_${TIMESTAMP}"
|
||||
OUTPUT_FILE="$OUTPUT_DIR/${POLICY}_${OFFLOAD_TAG}_${CTX_LEN}_blk${BLOCK_SIZE}_${TIMESTAMP}"
|
||||
|
||||
echo "============================================================"
|
||||
echo "NVIDIA Nsight Systems Profiling"
|
||||
echo "============================================================"
|
||||
echo "Policy: $POLICY"
|
||||
echo "Offload: $OFFLOAD_TAG"
|
||||
echo "Context: $CTX_LEN"
|
||||
echo "Block Size: $BLOCK_SIZE"
|
||||
echo "Dataset: $DATASET"
|
||||
echo "Sample: $SAMPLE_INDEX"
|
||||
echo "GPU: $GPU_ID"
|
||||
echo "GPU Blocks: $NUM_GPU_BLOCKS"
|
||||
echo "Data Dir: $DATA_DIR"
|
||||
echo "Output file: $OUTPUT_FILE.nsys-rep"
|
||||
echo ""
|
||||
|
||||
@@ -145,20 +172,35 @@ if [ -n "$POLICY_ENUM" ] && [ "$POLICY_ENUM" != "full" ]; then
|
||||
SPARSE_POLICY_ARG="--sparse-policy $POLICY_ENUM"
|
||||
fi
|
||||
|
||||
# Run nsys profile and capture exit code
|
||||
CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \
|
||||
nsys profile \
|
||||
--trace=cuda,nvtx \
|
||||
--force-overwrite=true \
|
||||
--output="$OUTPUT_FILE" \
|
||||
python "$TEST_SCRIPT" \
|
||||
--data-dir "$DATA_DIR" \
|
||||
--datasets "$DATASET" \
|
||||
--sample-indices "$SAMPLE_INDEX" \
|
||||
--num-gpu-blocks "$NUM_GPU_BLOCKS" \
|
||||
--block-size "$BLOCK_SIZE" \
|
||||
--max-model-len "$MAX_MODEL_LEN" \
|
||||
--gpu-utilization "$GPU_UTIL" \
|
||||
$ENABLE_OFFLOAD \
|
||||
$SPARSE_POLICY_ARG \
|
||||
--quiet
|
||||
EXIT_CODE=$?
|
||||
|
||||
# If test failed, delete the output file
|
||||
if [ $EXIT_CODE -ne 0 ]; then
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
echo "Test FAILED! Cleaning up..."
|
||||
echo "============================================================"
|
||||
rm -f "$OUTPUT_FILE.nsys-rep"
|
||||
echo "Deleted: $OUTPUT_FILE.nsys-rep"
|
||||
exit $EXIT_CODE
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "============================================================"
|
||||
|
||||
Reference in New Issue
Block a user