diff --git a/scripts/profile_offload.sh b/scripts/profile_offload.sh index 86d7d80..45656c9 100755 --- a/scripts/profile_offload.sh +++ b/scripts/profile_offload.sh @@ -6,6 +6,7 @@ # bash scripts/profile_offload.sh [options] # # Options: +# --policy POLICY Sparse policy name (default: full) # --dataset DATASET Task name (default: niah_single_1) # --sample INDEX Sample index (default: 0) # --gpu GPU_ID GPU to use (default: 0) @@ -13,26 +14,32 @@ # --no-offload Disable CPU offload # # Output: -# results/nsys/ruler__sample_.nsys-rep +# results/nsys/__.nsys-rep # # Examples: # bash scripts/profile_offload.sh -# bash scripts/profile_offload.sh --dataset niah_single_1 --sample 5 -# bash scripts/profile_offload.sh --gpu 1 --no-offload -# bash scripts/profile_offload.sh --num-gpu-blocks 8 +# bash scripts/profile_offload.sh --policy xattn --no-offload +# bash scripts/profile_offload.sh --policy full --num-gpu-blocks 8 set -e # Default configuration +POLICY="full" DATASET="niah_single_1" SAMPLE_INDEX="0" GPU_ID="0" NUM_GPU_BLOCKS="4" +BLOCK_SIZE="4096" +GPU_UTIL="0.9" ENABLE_OFFLOAD="--enable-offload" # Parse arguments while [[ $# -gt 0 ]]; do case $1 in + --policy) + POLICY="$2" + shift 2 + ;; --dataset) DATASET="$2" shift 2 @@ -53,10 +60,19 @@ while [[ $# -gt 0 ]]; do NUM_GPU_BLOCKS="$2" shift 2 ;; + --gpu-util) + GPU_UTIL="$2" + shift 2 + ;; + --block-size) + BLOCK_SIZE="$2" + shift 2 + ;; -h|--help) echo "Usage: $0 [options]" echo "" echo "Options:" + echo " --policy POLICY Sparse policy name (default: full)" echo " --dataset DATASET Task name (default: niah_single_1)" echo " --sample INDEX Sample index (default: 0)" echo " --gpu GPU_ID GPU to use (default: 0)" @@ -82,21 +98,23 @@ mkdir -p "$OUTPUT_DIR" # Generate timestamp for unique filename TIMESTAMP=$(date +%Y%m%d_%H%M%S) -OFFLOAD_SUFFIX="" if [ -n "$ENABLE_OFFLOAD" ]; then - OFFLOAD_SUFFIX="_offload_${NUM_GPU_BLOCKS}slots" + OFFLOAD_TAG="offload" +else + OFFLOAD_TAG="gpuonly" fi -OUTPUT_FILE="$OUTPUT_DIR/ruler_${DATASET}_sample${SAMPLE_INDEX}${OFFLOAD_SUFFIX}_${TIMESTAMP}" +OUTPUT_FILE="$OUTPUT_DIR/${POLICY}_${OFFLOAD_TAG}_blk${BLOCK_SIZE}_${TIMESTAMP}" echo "============================================================" echo "NVIDIA Nsight Systems Profiling" echo "============================================================" -echo "Test script: $TEST_SCRIPT" +echo "Policy: $POLICY" +echo "Offload: $OFFLOAD_TAG" +echo "Block Size: $BLOCK_SIZE" echo "Dataset: $DATASET" echo "Sample: $SAMPLE_INDEX" echo "GPU: $GPU_ID" echo "GPU Blocks: $NUM_GPU_BLOCKS" -echo "Offload: ${ENABLE_OFFLOAD:-disabled}" echo "Output file: $OUTPUT_FILE.nsys-rep" echo "" @@ -110,6 +128,23 @@ echo "" echo "Running nsys profile..." echo "" +# Map policy name to internal enum name +# User-friendly name -> SparsePolicyType enum name +case "$POLICY" in + xattn) + POLICY_ENUM="XATTN_BSA" + ;; + *) + POLICY_ENUM="$POLICY" + ;; +esac + +# Build sparse policy argument +SPARSE_POLICY_ARG="" +if [ -n "$POLICY_ENUM" ] && [ "$POLICY_ENUM" != "full" ]; then + SPARSE_POLICY_ARG="--sparse-policy $POLICY_ENUM" +fi + CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \ nsys profile \ --trace=cuda,nvtx \ @@ -119,7 +154,10 @@ nsys profile \ --datasets "$DATASET" \ --sample-indices "$SAMPLE_INDEX" \ --num-gpu-blocks "$NUM_GPU_BLOCKS" \ + --block-size "$BLOCK_SIZE" \ + --gpu-utilization "$GPU_UTIL" \ $ENABLE_OFFLOAD \ + $SPARSE_POLICY_ARG \ --quiet echo ""