[refactor] Refactor the profile_offload.sh

This commit is contained in:
Zijie Tian
2026-01-29 08:39:34 +08:00
parent e436ec861f
commit 4484a1482c

View File

@@ -33,6 +33,8 @@ NUM_GPU_BLOCKS="4"
BLOCK_SIZE="4096" BLOCK_SIZE="4096"
GPU_UTIL="0.9" GPU_UTIL="0.9"
ENABLE_OFFLOAD="--enable-offload" ENABLE_OFFLOAD="--enable-offload"
MODEL=""
DATA_DIR_OVERRIDE=""
# Parse arguments # Parse arguments
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
@@ -73,6 +75,14 @@ while [[ $# -gt 0 ]]; do
BLOCK_SIZE="$2" BLOCK_SIZE="$2"
shift 2 shift 2
;; ;;
--model)
MODEL="$2"
shift 2
;;
--data-dir)
DATA_DIR_OVERRIDE="$2"
shift 2
;;
-h|--help) -h|--help)
echo "Usage: $0 [options]" echo "Usage: $0 [options]"
echo "" echo ""
@@ -113,11 +123,25 @@ case "$CTX_LEN" in
128k) 128k)
MAX_MODEL_LEN=144000 MAX_MODEL_LEN=144000
;; ;;
256k)
MAX_MODEL_LEN=288000
;;
512k)
MAX_MODEL_LEN=576000
;;
1m)
MAX_MODEL_LEN=1100000
;;
*) *)
MAX_MODEL_LEN=72000 MAX_MODEL_LEN=72000
;; ;;
esac esac
# Override DATA_DIR if specified
if [ -n "$DATA_DIR_OVERRIDE" ]; then
DATA_DIR="$DATA_DIR_OVERRIDE"
fi
# Create output directory if needed # Create output directory if needed
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
@@ -172,6 +196,12 @@ if [ -n "$POLICY_ENUM" ] && [ "$POLICY_ENUM" != "full" ]; then
SPARSE_POLICY_ARG="--sparse-policy $POLICY_ENUM" SPARSE_POLICY_ARG="--sparse-policy $POLICY_ENUM"
fi fi
# Build model argument
MODEL_ARG=""
if [ -n "$MODEL" ]; then
MODEL_ARG="--model $MODEL"
fi
# Run nsys profile and capture exit code # Run nsys profile and capture exit code
CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \ CUDA_VISIBLE_DEVICES=$GPU_ID PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH" \
nsys profile \ nsys profile \
@@ -188,6 +218,7 @@ nsys profile \
--gpu-utilization "$GPU_UTIL" \ --gpu-utilization "$GPU_UTIL" \
$ENABLE_OFFLOAD \ $ENABLE_OFFLOAD \
$SPARSE_POLICY_ARG \ $SPARSE_POLICY_ARG \
$MODEL_ARG \
--quiet --quiet
EXIT_CODE=$? EXIT_CODE=$?