✨ feat: enhance profile_offload.sh with policy, block-size parameters
- Add --policy parameter for sparse attention policy selection (full/xattn) - Add --block-size parameter (default 4096) for KV cache block size - Add --gpu-util parameter for GPU memory utilization control - Improve output filename format: <policy>_<gpuonly|offload>_blk<size>_<timestamp> - Map user-friendly policy names to internal enum (xattn -> XATTN_BSA) Generated with [Claude Code](https://claude.ai/code) via [Happy](https://happy.engineering) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: Happy <yesreply@happy.engineering>
This commit is contained in:
@@ -6,6 +6,7 @@
|
|||||||
# bash scripts/profile_offload.sh [options]
|
# bash scripts/profile_offload.sh [options]
|
||||||
#
|
#
|
||||||
# Options:
|
# Options:
|
||||||
|
# --policy POLICY Sparse policy name (default: full)
|
||||||
# --dataset DATASET Task name (default: niah_single_1)
|
# --dataset DATASET Task name (default: niah_single_1)
|
||||||
# --sample INDEX Sample index (default: 0)
|
# --sample INDEX Sample index (default: 0)
|
||||||
# --gpu GPU_ID GPU to use (default: 0)
|
# --gpu GPU_ID GPU to use (default: 0)
|
||||||
@@ -13,26 +14,32 @@
|
|||||||
# --no-offload Disable CPU offload
|
# --no-offload Disable CPU offload
|
||||||
#
|
#
|
||||||
# Output:
|
# Output:
|
||||||
# results/nsys/ruler_<dataset>_sample<index>_<timestamp>.nsys-rep
|
# results/nsys/<policy>_<gpuonly|offload>_<timestamp>.nsys-rep
|
||||||
#
|
#
|
||||||
# Examples:
|
# Examples:
|
||||||
# bash scripts/profile_offload.sh
|
# bash scripts/profile_offload.sh
|
||||||
# bash scripts/profile_offload.sh --dataset niah_single_1 --sample 5
|
# bash scripts/profile_offload.sh --policy xattn --no-offload
|
||||||
# bash scripts/profile_offload.sh --gpu 1 --no-offload
|
# bash scripts/profile_offload.sh --policy full --num-gpu-blocks 8
|
||||||
# bash scripts/profile_offload.sh --num-gpu-blocks 8
|
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
# Default configuration
|
# Default configuration
|
||||||
|
POLICY="full"
|
||||||
DATASET="niah_single_1"
|
DATASET="niah_single_1"
|
||||||
SAMPLE_INDEX="0"
|
SAMPLE_INDEX="0"
|
||||||
GPU_ID="0"
|
GPU_ID="0"
|
||||||
NUM_GPU_BLOCKS="4"
|
NUM_GPU_BLOCKS="4"
|
||||||
|
BLOCK_SIZE="4096"
|
||||||
|
GPU_UTIL="0.9"
|
||||||
ENABLE_OFFLOAD="--enable-offload"
|
ENABLE_OFFLOAD="--enable-offload"
|
||||||
|
|
||||||
# Parse arguments
|
# Parse arguments
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case $1 in
|
case $1 in
|
||||||
|
--policy)
|
||||||
|
POLICY="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
--dataset)
|
--dataset)
|
||||||
DATASET="$2"
|
DATASET="$2"
|
||||||
shift 2
|
shift 2
|
||||||
@@ -53,10 +60,19 @@ while [[ $# -gt 0 ]]; do
|
|||||||
NUM_GPU_BLOCKS="$2"
|
NUM_GPU_BLOCKS="$2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--gpu-util)
|
||||||
|
GPU_UTIL="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--block-size)
|
||||||
|
BLOCK_SIZE="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
-h|--help)
|
-h|--help)
|
||||||
echo "Usage: $0 [options]"
|
echo "Usage: $0 [options]"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Options:"
|
echo "Options:"
|
||||||
|
echo " --policy POLICY Sparse policy name (default: full)"
|
||||||
echo " --dataset DATASET Task name (default: niah_single_1)"
|
echo " --dataset DATASET Task name (default: niah_single_1)"
|
||||||
echo " --sample INDEX Sample index (default: 0)"
|
echo " --sample INDEX Sample index (default: 0)"
|
||||||
echo " --gpu GPU_ID GPU to use (default: 0)"
|
echo " --gpu GPU_ID GPU to use (default: 0)"
|
||||||
@@ -82,21 +98,23 @@ mkdir -p "$OUTPUT_DIR"
|
|||||||
|
|
||||||
# Generate timestamp for unique filename
|
# Generate timestamp for unique filename
|
||||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||||
OFFLOAD_SUFFIX=""
|
|
||||||
if [ -n "$ENABLE_OFFLOAD" ]; then
|
if [ -n "$ENABLE_OFFLOAD" ]; then
|
||||||
OFFLOAD_SUFFIX="_offload_${NUM_GPU_BLOCKS}slots"
|
OFFLOAD_TAG="offload"
|
||||||
|
else
|
||||||
|
OFFLOAD_TAG="gpuonly"
|
||||||
fi
|
fi
|
||||||
OUTPUT_FILE="$OUTPUT_DIR/ruler_${DATASET}_sample${SAMPLE_INDEX}${OFFLOAD_SUFFIX}_${TIMESTAMP}"
|
OUTPUT_FILE="$OUTPUT_DIR/${POLICY}_${OFFLOAD_TAG}_blk${BLOCK_SIZE}_${TIMESTAMP}"
|
||||||
|
|
||||||
echo "============================================================"
|
echo "============================================================"
|
||||||
echo "NVIDIA Nsight Systems Profiling"
|
echo "NVIDIA Nsight Systems Profiling"
|
||||||
echo "============================================================"
|
echo "============================================================"
|
||||||
echo "Test script: $TEST_SCRIPT"
|
echo "Policy: $POLICY"
|
||||||
|
echo "Offload: $OFFLOAD_TAG"
|
||||||
|
echo "Block Size: $BLOCK_SIZE"
|
||||||
echo "Dataset: $DATASET"
|
echo "Dataset: $DATASET"
|
||||||
echo "Sample: $SAMPLE_INDEX"
|
echo "Sample: $SAMPLE_INDEX"
|
||||||
echo "GPU: $GPU_ID"
|
echo "GPU: $GPU_ID"
|
||||||
echo "GPU Blocks: $NUM_GPU_BLOCKS"
|
echo "GPU Blocks: $NUM_GPU_BLOCKS"
|
||||||
echo "Offload: ${ENABLE_OFFLOAD:-disabled}"
|
|
||||||
echo "Output file: $OUTPUT_FILE.nsys-rep"
|
echo "Output file: $OUTPUT_FILE.nsys-rep"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
@@ -110,6 +128,23 @@ echo ""
|
|||||||
echo "Running nsys profile..."
|
echo "Running nsys profile..."
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
# Map policy name to internal enum name
|
||||||
|
# User-friendly name -> SparsePolicyType enum name
|
||||||
|
case "$POLICY" in
|
||||||
|
xattn)
|
||||||
|
POLICY_ENUM="XATTN_BSA"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
POLICY_ENUM="$POLICY"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Build sparse policy argument
|
||||||
|
SPARSE_POLICY_ARG=""
|
||||||
|
if [ -n "$POLICY_ENUM" ] && [ "$POLICY_ENUM" != "full" ]; then
|
||||||
|
SPARSE_POLICY_ARG="--sparse-policy $POLICY_ENUM"
|
||||||
|
fi
|
||||||
|
|
||||||
CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \
|
CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \
|
||||||
nsys profile \
|
nsys profile \
|
||||||
--trace=cuda,nvtx \
|
--trace=cuda,nvtx \
|
||||||
@@ -119,7 +154,10 @@ nsys profile \
|
|||||||
--datasets "$DATASET" \
|
--datasets "$DATASET" \
|
||||||
--sample-indices "$SAMPLE_INDEX" \
|
--sample-indices "$SAMPLE_INDEX" \
|
||||||
--num-gpu-blocks "$NUM_GPU_BLOCKS" \
|
--num-gpu-blocks "$NUM_GPU_BLOCKS" \
|
||||||
|
--block-size "$BLOCK_SIZE" \
|
||||||
|
--gpu-utilization "$GPU_UTIL" \
|
||||||
$ENABLE_OFFLOAD \
|
$ENABLE_OFFLOAD \
|
||||||
|
$SPARSE_POLICY_ARG \
|
||||||
--quiet
|
--quiet
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
|
|||||||
Reference in New Issue
Block a user