记一次尝试Ubuntu 24.04.3 + TensorRT-LLM + Qwen2.5-72B


尝试一下部署TensorRT-LLM + Qwen2.5-72B
配置:Ubuntu 24.04.3,L20 48GB ×2,Xeon 6530 ×2,256GB 内存

一、前置工作

1.备份还原点

未思胜先思败,先整一下备份用于搞坏了还原。

登录并挂载 iSCSI 磁盘:

sudo iscsiadm -m discovery -t sendtargets -p ip:3260
sudo iscsiadm -m node --login
sudo iscsiadm -m node --op update -n node.startup -v automatic
lsblk

配置CHAP(如无可忽略)

sudo nano /etc/iscsi/iscsid.conf

#找到并修改

node.session.auth.authmethod = CHAP
node.session.auth.username = 你的CHAP用户名
node.session.auth.password = 你的CHAP密码

sudo systemctl restart open-iscsi

格式化挂载硬盘

# 格式化为 ext4 文件系统
sudo mkfs.ext4 /dev/sdb
# 创建挂载目录并挂载
sudo mkdir -p /mnt/iscsi_backup
sudo mount /dev/sdb /mnt/iscsi_backup
df -Th /mnt/iscsi_backup

备份

#!/usr/bin/env bash
# system_backup.sh
# 全量 + 增量 备份到 /mnt/iscsi_backup,采用 rsync + 硬链接快照
# 需要 root 运行:sudo /usr/local/bin/system_backup.sh

set -euo pipefail

### ===== 配置区 =====
SOURCE="/"                                   # 备份源(系统根)
TARGET_MOUNT="/mnt/iscsi_backup"             # iSCSI 挂载点
SNAPSHOT_DIR="${TARGET_MOUNT}/system_snapshots"  # 备份根目录
LOG_FILE="/var/log/system_backup.log"        # 日志
LOCK_FILE="/var/run/system_backup.lock"      # 防重入锁
RETAIN=3                                   # 保留最近 N 个快照(设 0 不清理)
DATE_TAG="$(date +%Y%m%d-%H%M%S)"            # 快照后缀
NEW_SNAP="${SNAPSHOT_DIR}/snap_${DATE_TAG}"  # 新快照目录
RSYNC_BIN="$(command -v rsync)"
SHA256_BIN="$(command -v sha256sum || true)" # 可选:生成校验清单
PIGZ_BIN="$(command -v pigz || true)"        # 仅供你后续扩展压缩使用

# 排除清单(按需增删)
RSYNC_EXCLUDES=(
  "--exclude=${TARGET_MOUNT}"
  "--exclude=/proc"
  "--exclude=/sys"
  "--exclude=/dev"
  "--exclude=/run"
  "--exclude=/tmp"
  "--exclude=/lost+found"
  "--exclude=/swapfile"
  "--exclude=/var/tmp"
  # 若用 Docker/容器,可考虑排除镜像缓存:
  "--exclude=/var/lib/docker/overlay2"
  "--exclude=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs"
)

# 可选:备份前停止/静默数据库等,避免不一致(按需启用)
PRE_HOOK() {
  :
  # 例:systemctl stop mysql || true
  # 例:docker exec -t pg_container pg_dumpall -U postgres > "${TARGET_MOUNT}/pg_dump_${DATE_TAG}.sql" || true
}

# 可选:备份后恢复服务(与 PRE_HOOK 对应)
POST_HOOK() {
  :
  # 例:systemctl start mysql || true
}
### ===== 配置区结束 =====


log() {
  echo "[$(date '+%F %T')] $*" | tee -a "$LOG_FILE"
}

cleanup() {
  if [[ -f "$LOCK_FILE" ]]; then
    rm -f "$LOCK_FILE"
  fi
}
trap cleanup EXIT

require_root() {
  if [[ $EUID -ne 0 ]]; then
    echo "请使用 root 运行:sudo $0" >&2
    exit 1
  fi
}

check_env() {
  require_root
  if [[ -f "$LOCK_FILE" ]]; then
    log "检测到已有运行锁($LOCK_FILE),退出。"
    exit 1
  fi
  touch "$LOCK_FILE"

  if [[ -z "$RSYNC_BIN" ]]; then
    log "未找到 rsync,正在安装..."
    apt-get update -y && apt-get install -y rsync
  fi

  if ! mountpoint -q "$TARGET_MOUNT"; then
    log "错误:$TARGET_MOUNT 未挂载,请先确认 iSCSI 已挂载。"
    exit 1
  fi

  mkdir -p "$SNAPSHOT_DIR"
  touch "$LOG_FILE" || { echo "无法写入日志 $LOG_FILE,请检查权限" >&2; exit 1; }

  # 写入测试
  local testfile="${TARGET_MOUNT}/.backup_write_test_${DATE_TAG}"
  echo "write test $(date)" > "$testfile" || { log "错误:无法写入 $TARGET_MOUNT"; exit 1; }
  rm -f "$testfile"

  # 简单空间检查(源大小估算困难,这里只检查目标可用空间 > 5GB)
  local avail_kb
  avail_kb=$(df -Pk "$TARGET_MOUNT" | awk 'NR==2{print $4}')
  if [[ "$avail_kb" -lt 5242880 ]]; then
    log "警告:目标盘可用空间 < 5GB,可能不足。"
  fi
}

latest_snapshot() {
  # 返回最新快照的绝对路径(无则空)
  local latest
  latest=$(ls -1dt "${SNAPSHOT_DIR}"/snap_* 2>/dev/null | head -n1 || true)
  [[ -n "$latest" ]] && readlink -f "$latest" || echo ""
}

do_backup() {
  local last_snap
  last_snap="$(latest_snapshot)"

  log "开始备份:源=${SOURCE}"
  log "目标快照目录:${NEW_SNAP}"
  mkdir -p "$NEW_SNAP"

  PRE_HOOK || true

  # 组装 rsync 参数
  local -a ARGS=(
    -aAXHvv                         # 权限/ACL/硬链接/高详细
    --numeric-ids                   # 保持 UID/GID 数字
    --delete                        # 使快照与源一致(仅作用于当次快照目录)
    --inplace                       # 更新大文件时原地写,减少空间需求
    --partial                       # 支持断点续传
    --info=STATS2,PROGRESS2
  )

  # 增量:使用 --link-dest 指向“上一次快照”,未变化文件硬链接过去
  if [[ -n "$last_snap" ]]; then
    ARGS+=( "--link-dest=${last_snap}" )
    log "检测到上一次快照:${last_snap},启用增量模式(硬链接)。"
  else
    log "未检测到历史快照,本次将执行全量备份。"
  fi

  # 加入排除参数
  ARGS+=( "${RSYNC_EXCLUDES[@]}" )

  # 执行 rsync
  log "执行 rsync 同步中..."
  "$RSYNC_BIN" "${ARGS[@]}" "$SOURCE" "$NEW_SNAP" | tee -a "$LOG_FILE"

  POST_HOOK || true

  # 生成可选校验清单(耗时,按需启用)
  if [[ -n "$SHA256_BIN" ]]; then
    log "生成快照校验清单(可能较耗时)..."
    (cd "$NEW_SNAP" && find . -type f -print0 | xargs -0 "$SHA256_BIN" > "MANIFEST_${DATE_TAG}.sha256") || true
  fi

  log "备份完成:${NEW_SNAP}"
}

prune_old() {
  [[ "$RETAIN" -le 0 ]] && return 0
  local snaps
  mapfile -t snaps < <(ls -1dt "${SNAPSHOT_DIR}"/snap_* 2>/dev/null || true)
  local count="${#snaps[@]}"
  if (( count > RETAIN )); then
    log "开始清理旧快照,保留最近 ${RETAIN} 个。"
    for ((i=RETAIN; i<count; i++)); do
      log "删除旧快照:${snaps[$i]}"
      rm -rf --one-file-system "${snaps[$i]}" || true
    done
  fi
}

main() {
  check_env
  do_backup
  prune_old
  log "全部完成。"
}

main "$@"

设置定时任务

sudo crontab -e
30 2 * * * /usr/local/bin/system_backup.sh >/dev/null 2>&1

恢复

脚本需要在 LiveCD/救援环境 下以 root 运行。
把脚本保存为 /usr/local/bin/system_restore.sh,赋予执行权限:

sudo mkdir -p /usr/local/bin
sudo nano /usr/local/bin/system_restore.sh   # 粘贴脚本
sudo chmod +x /usr/local/bin/system_restore.sh
#!/usr/bin/env bash
# system_restore.sh
# 从 /mnt/iscsi_backup/system_snapshots/ 选择一次快照,恢复到本机系统盘
# 需在 LiveCD/救援环境中以 root 运行

set -euo pipefail

### ===== 配置区(按需修改) =====
# 目标系统盘与分区(你当前机器的实际布局)
DISK="/dev/nvme0n1"
PART_EFI="/dev/nvme0n1p1"
PART_BOOT="/dev/nvme0n1p2"
LV_ROOT="/dev/mapper/ubuntu--vg-ubuntu--lv"   # 根 LV(已存在的 LVM 卷)

# 备份盘(iSCSI 或本地)挂载点与快照目录
BACKUP_MNT="/mnt/iscsi_backup"
SNAPSHOT_DIR="${BACKUP_MNT}/system_snapshots"

# 恢复目标挂载点
TARGET="/mnt/restore"

# rsync 选项(务必保留 A X H numeric-ids 和 --delete)
RSYNC_ARGS=(-aAXHvv --numeric-ids --delete --info=STATS2,PROGRESS2)

# 若你需要跳过某些目录(一般不需要),可在这里追加:
EXCLUDES=( )
### ===== 配置区结束 =====

log(){ echo "[$(date '+%F %T')] $*"; }

require_root(){
  if [[ $EUID -ne 0 ]]; then
    echo "请使用 root 运行:sudo $0" >&2
    exit 1
  fi
}

confirm(){
  read -r -p "⚠️ 本操作将用所选快照覆盖 ${TARGET}(实际为你的系统盘挂载点)。确认继续?(yes/NO): " ans
  [[ "${ans:-}" == "yes" ]] || { echo "已取消。"; exit 1; }
}

activate_lvm(){
  log "激活 LVM 卷组..."
  vgchange -ay >/dev/null || true
}

mount_targets(){
  log "创建挂载点..."
  mkdir -p "$TARGET" "$TARGET/boot" "$TARGET/boot/efi"

  if ! mountpoint -q "$TARGET"; then
    log "挂载根卷 ${LV_ROOT} 到 ${TARGET} ..."
    mount "$LV_ROOT" "$TARGET"
  fi

  if ! mountpoint -q "$TARGET/boot"; then
    log "挂载 /boot 分区 ${PART_BOOT} ..."
    mount "$PART_BOOT" "$TARGET/boot"
  fi

  if ! mountpoint -q "$TARGET/boot/efi"; then
    log "挂载 EFI 分区 ${PART_EFI} ..."
    mount "$PART_EFI" "$TARGET/boot/efi"
  fi
}

mount_backup(){
  if ! mountpoint -q "$BACKUP_MNT"; then
    log "尝试挂载备份盘到 ${BACKUP_MNT}(如果是本地 /dev/sdX 盘请自行 mount)..."
    mkdir -p "$BACKUP_MNT"
    # 如需自动挂载本地盘,可在此添加:mount /dev/sdX "$BACKUP_MNT"
  fi
  if [[ ! -d "$SNAPSHOT_DIR" ]]; then
    echo "未找到快照目录:$SNAPSHOT_DIR" >&2
    exit 1
  fi
}

pick_snapshot(){
  local latest
  latest=$(ls -1dt "${SNAPSHOT_DIR}"/snap_* 2>/dev/null | head -n1 || true)
  if [[ -z "$latest" ]]; then
    echo "未发现任何快照:$SNAPSHOT_DIR" >&2
    exit 1
  fi

  echo "检测到以下快照(最新在前):"
  ls -1dt "${SNAPSHOT_DIR}"/snap_* | head -n 20
  echo
  read -r -p "输入要恢复的快照完整路径(直接回车使用最新:${latest}): " chosen
  SNAP="${chosen:-$latest}"

  if [[ ! -d "$SNAP" ]]; then
    echo "快照目录无效:$SNAP" >&2
    exit 1
  fi
  log "使用快照:$SNAP"
}

do_restore(){
  confirm

  log "开始 rsync 同步(这将覆盖目标系统)..."
  local args=( "${RSYNC_ARGS[@]}" )
  for e in "${EXCLUDES[@]:-}"; do args+=( "--exclude=$e" ); done

  rsync "${args[@]}" \
    "$SNAP"/ "$TARGET"/

  log "同步完成。"
}

chroot_fix_boot(){
  log "绑定必要的伪文件系统..."
  mount --bind /dev  "$TARGET/dev"
  mount --bind /proc "$TARGET/proc"
  mount --bind /sys  "$TARGET/sys"

  log "进入 chroot 修复引导与内核镜像..."
  chroot "$TARGET" bash -c "
    set -e
    echo '更新 initramfs...'
    update-initramfs -u

    if [[ -d /sys/firmware/efi ]]; then
      echo '检测到 EFI 引导,安装/修复 grub-efi...'
      grub-install --target=x86_64-efi --efi-directory=/boot/efi --bootloader-id=ubuntu --recheck
    else
      echo 'BIOS/Legacy 引导,安装 grub 到磁盘...'
      grub-install ${DISK}
    fi

    echo '更新 grub 配置...'
    update-grub
  "

  log "解除绑定..."
  umount -lf "$TARGET/dev" || true
  umount -lf "$TARGET/proc" || true
  umount -lf "$TARGET/sys"  || true
}

summary(){
  log "恢复完成!你可以执行:"
  echo "  umount -R $TARGET"
  echo "  reboot"
}

main(){
  require_root
  activate_lvm
  mount_targets
  mount_backup
  pick_snapshot
  do_restore
  chroot_fix_boot
  summary
}

main "$@"

2.安装NVIDIA Container Toolkit & TensorRT-LLM

NVIDIA Container Toolkit

# 1.Configure the production repository:
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
  && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

# Optionally, configure the repository to use experimental packages:
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list

# 2.Update the packages list from the repository:
sudo apt-get update

# 3.Install the NVIDIA Container Toolkit packages:
export NVIDIA_CONTAINER_TOOLKIT_VERSION=1.17.8-1
  sudo apt-get install -y \
      nvidia-container-toolkit=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
      nvidia-container-toolkit-base=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
      libnvidia-container-tools=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
      libnvidia-container1=${NVIDIA_CONTAINER_TOOLKIT_VERSION}

TensorRT-LLM

# 获取TensorRT-LLM:
#https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags
sudo docker pull nvcr.io/nvidia/tensorrt-llm/release:1.0.0rc6
#!/bin/bash
# start_tensorrt_llm.sh

# 容器名称
CONTAINER_NAME="tensorrt_llm"

# 挂载路径
HOST_DATA_DIR="/data"
CONTAINER_DATA_DIR="/data"

# 镜像
IMAGE_NAME="nvcr.io/nvidia/tensorrt-llm/release:1.0.0rc6"

# 启动容器
sudo docker run -it \
  --gpus all \
  --name ${CONTAINER_NAME} \
  --shm-size=256g \
  --ulimit memlock=-1 \
  --ulimit stack=67108864 \
  -e NVIDIA_DRIVER_CAPABILITIES=all \
  -v ${HOST_DATA_DIR}:${CONTAINER_DATA_DIR}:rw \
  ${IMAGE_NAME}

3.TensorRT_LLM权重转换

参考链接
使用modelscope下载模型

pip install modelscope
modelscope download --model Qwen/Qwen2.5-72B-Instruct --local_dir /data
cd /app/tensorrt_llm/examples/models/core/qwen

INT8 权重

1.将 HF 权重转为 TensorRT-LLMcheckpoint(INT8 权重):

PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
HF_TORCH_LOAD_EAGER=1 \
python3 /app/tensorrt_llm/examples/models/core/qwen/convert_checkpoint.py \
  --model_dir  /data/models/Qwen2.5-72B-Instruct \
  --output_dir /data/tensorrt_llm/ckpt/qwen2_5_72b_int8_tp2_noKV \
  --tp_size 2 --pp_size 1 \
  --dtype bfloat16 \
  --use_weight_only --weight_only_precision int8 \
  --workers 2

2.用 trtllm-build 构建 TensorRT 引擎

export CUDA_VISIBLE_DEVICES=0,1
trtllm-build \
  --checkpoint_dir /data/tensorrt_llm/ckpt/qwen2_5_72b_int8_tp2_noKV \
  --output_dir     /data/tensorrt_llm/engines/qwen2_5_72b_INT8_tp2_seqlen8k_b1 \
  --max_seq_len 8192 \
  --max_batch_size 1 \
  --kv_cache_type paged \
  --workers 2 \
  --gpt_attention_plugin bfloat16 \
  --gemm_plugin bfloat16

3.测试

export CUDA_VISIBLE_DEVICES=0,1
export OMPI_ALLOW_RUN_AS_ROOT=1
export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1

mpirun --allow-run-as-root -np 2 --bind-to none --map-by slot \
  -x CUDA_VISIBLE_DEVICES -x NCCL_DEBUG=WARN -x NCCL_IB_DISABLE=1 -x NCCL_P2P_DISABLE=0 -x NCCL_SHM_DISABLE=0 \
  python3 /app/tensorrt_llm/examples/run.py \
    --engine_dir /data/tensorrt_llm/engines/qwen2_5_72b_INT8_tp2_seqlen8k_b1 \
    --tokenizer_dir /data/models/Qwen2.5-72B-Instruct \
    --input_text "你好,请简单介绍一下西安" \
    --max_output_len 128

4.启动服务

export CUDA_VISIBLE_DEVICES=0,1

trtllm-serve serve \
  --host 0.0.0.0 \
  --port 8000 \
  --backend trt \
  --tokenizer /data/models/Qwen2.5-72B-Instruct \
  --tp_size 2 --pp_size 1 --gpus_per_node 2 \
  --max_batch_size 1 \
  --max_num_tokens 8192 \
  --kv_cache_free_gpu_memory_fraction 0.85 \
  /data/tensorrt_llm/engines/qwen2_5_72b_INT8_tp2_seqlen8k_b1

服务启动成功后会绑定并监听 0.0.0.0:8000,TensorRT-LLM 提供了如下请求接口:

def register_routes(self):
    self.app.add_api_route("/health", self.health, methods=["GET"])
    self.app.add_api_route("/version", self.version, methods=["GET"])
    self.app.add_api_route("/v1/models", self.get_model, methods=["GET"])
    self.app.add_api_route("/metrics", self.get_iteration_stats, methods=["GET"])
    self.app.add_api_route("/kv_cache_events", self.get_kv_cache_events, methods=["POST"])
    self.app.add_api_route("/v1/completions", self.openai_completion, methods=["POST"])
    self.app.add_api_route("/v1/chat/completions", self.openai_chat, methods=["POST"])

文章作者: 你的朋友
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 你的朋友 !