尝试一下部署TensorRT-LLM + Qwen2.5-72B
配置:Ubuntu 24.04.3,L20 48GB ×2,Xeon 6530 ×2,256GB 内存
一、前置工作
1.备份还原点
未思胜先思败,先整一下备份用于搞坏了还原。
登录并挂载 iSCSI 磁盘:
sudo iscsiadm -m discovery -t sendtargets -p ip:3260
sudo iscsiadm -m node --login
sudo iscsiadm -m node --op update -n node.startup -v automatic
lsblk
配置CHAP(如无可忽略)
sudo nano /etc/iscsi/iscsid.conf
#找到并修改
node.session.auth.authmethod = CHAP
node.session.auth.username = 你的CHAP用户名
node.session.auth.password = 你的CHAP密码
sudo systemctl restart open-iscsi
格式化挂载硬盘
# 格式化为 ext4 文件系统
sudo mkfs.ext4 /dev/sdb
# 创建挂载目录并挂载
sudo mkdir -p /mnt/iscsi_backup
sudo mount /dev/sdb /mnt/iscsi_backup
df -Th /mnt/iscsi_backup
备份
#!/usr/bin/env bash
# system_backup.sh
# 全量 + 增量 备份到 /mnt/iscsi_backup,采用 rsync + 硬链接快照
# 需要 root 运行:sudo /usr/local/bin/system_backup.sh
set -euo pipefail
### ===== 配置区 =====
SOURCE="/" # 备份源(系统根)
TARGET_MOUNT="/mnt/iscsi_backup" # iSCSI 挂载点
SNAPSHOT_DIR="${TARGET_MOUNT}/system_snapshots" # 备份根目录
LOG_FILE="/var/log/system_backup.log" # 日志
LOCK_FILE="/var/run/system_backup.lock" # 防重入锁
RETAIN=3 # 保留最近 N 个快照(设 0 不清理)
DATE_TAG="$(date +%Y%m%d-%H%M%S)" # 快照后缀
NEW_SNAP="${SNAPSHOT_DIR}/snap_${DATE_TAG}" # 新快照目录
RSYNC_BIN="$(command -v rsync)"
SHA256_BIN="$(command -v sha256sum || true)" # 可选:生成校验清单
PIGZ_BIN="$(command -v pigz || true)" # 仅供你后续扩展压缩使用
# 排除清单(按需增删)
RSYNC_EXCLUDES=(
"--exclude=${TARGET_MOUNT}"
"--exclude=/proc"
"--exclude=/sys"
"--exclude=/dev"
"--exclude=/run"
"--exclude=/tmp"
"--exclude=/lost+found"
"--exclude=/swapfile"
"--exclude=/var/tmp"
# 若用 Docker/容器,可考虑排除镜像缓存:
"--exclude=/var/lib/docker/overlay2"
"--exclude=/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs"
)
# 可选:备份前停止/静默数据库等,避免不一致(按需启用)
PRE_HOOK() {
:
# 例:systemctl stop mysql || true
# 例:docker exec -t pg_container pg_dumpall -U postgres > "${TARGET_MOUNT}/pg_dump_${DATE_TAG}.sql" || true
}
# 可选:备份后恢复服务(与 PRE_HOOK 对应)
POST_HOOK() {
:
# 例:systemctl start mysql || true
}
### ===== 配置区结束 =====
log() {
echo "[$(date '+%F %T')] $*" | tee -a "$LOG_FILE"
}
cleanup() {
if [[ -f "$LOCK_FILE" ]]; then
rm -f "$LOCK_FILE"
fi
}
trap cleanup EXIT
require_root() {
if [[ $EUID -ne 0 ]]; then
echo "请使用 root 运行:sudo $0" >&2
exit 1
fi
}
check_env() {
require_root
if [[ -f "$LOCK_FILE" ]]; then
log "检测到已有运行锁($LOCK_FILE),退出。"
exit 1
fi
touch "$LOCK_FILE"
if [[ -z "$RSYNC_BIN" ]]; then
log "未找到 rsync,正在安装..."
apt-get update -y && apt-get install -y rsync
fi
if ! mountpoint -q "$TARGET_MOUNT"; then
log "错误:$TARGET_MOUNT 未挂载,请先确认 iSCSI 已挂载。"
exit 1
fi
mkdir -p "$SNAPSHOT_DIR"
touch "$LOG_FILE" || { echo "无法写入日志 $LOG_FILE,请检查权限" >&2; exit 1; }
# 写入测试
local testfile="${TARGET_MOUNT}/.backup_write_test_${DATE_TAG}"
echo "write test $(date)" > "$testfile" || { log "错误:无法写入 $TARGET_MOUNT"; exit 1; }
rm -f "$testfile"
# 简单空间检查(源大小估算困难,这里只检查目标可用空间 > 5GB)
local avail_kb
avail_kb=$(df -Pk "$TARGET_MOUNT" | awk 'NR==2{print $4}')
if [[ "$avail_kb" -lt 5242880 ]]; then
log "警告:目标盘可用空间 < 5GB,可能不足。"
fi
}
latest_snapshot() {
# 返回最新快照的绝对路径(无则空)
local latest
latest=$(ls -1dt "${SNAPSHOT_DIR}"/snap_* 2>/dev/null | head -n1 || true)
[[ -n "$latest" ]] && readlink -f "$latest" || echo ""
}
do_backup() {
local last_snap
last_snap="$(latest_snapshot)"
log "开始备份:源=${SOURCE}"
log "目标快照目录:${NEW_SNAP}"
mkdir -p "$NEW_SNAP"
PRE_HOOK || true
# 组装 rsync 参数
local -a ARGS=(
-aAXHvv # 权限/ACL/硬链接/高详细
--numeric-ids # 保持 UID/GID 数字
--delete # 使快照与源一致(仅作用于当次快照目录)
--inplace # 更新大文件时原地写,减少空间需求
--partial # 支持断点续传
--info=STATS2,PROGRESS2
)
# 增量:使用 --link-dest 指向“上一次快照”,未变化文件硬链接过去
if [[ -n "$last_snap" ]]; then
ARGS+=( "--link-dest=${last_snap}" )
log "检测到上一次快照:${last_snap},启用增量模式(硬链接)。"
else
log "未检测到历史快照,本次将执行全量备份。"
fi
# 加入排除参数
ARGS+=( "${RSYNC_EXCLUDES[@]}" )
# 执行 rsync
log "执行 rsync 同步中..."
"$RSYNC_BIN" "${ARGS[@]}" "$SOURCE" "$NEW_SNAP" | tee -a "$LOG_FILE"
POST_HOOK || true
# 生成可选校验清单(耗时,按需启用)
if [[ -n "$SHA256_BIN" ]]; then
log "生成快照校验清单(可能较耗时)..."
(cd "$NEW_SNAP" && find . -type f -print0 | xargs -0 "$SHA256_BIN" > "MANIFEST_${DATE_TAG}.sha256") || true
fi
log "备份完成:${NEW_SNAP}"
}
prune_old() {
[[ "$RETAIN" -le 0 ]] && return 0
local snaps
mapfile -t snaps < <(ls -1dt "${SNAPSHOT_DIR}"/snap_* 2>/dev/null || true)
local count="${#snaps[@]}"
if (( count > RETAIN )); then
log "开始清理旧快照,保留最近 ${RETAIN} 个。"
for ((i=RETAIN; i<count; i++)); do
log "删除旧快照:${snaps[$i]}"
rm -rf --one-file-system "${snaps[$i]}" || true
done
fi
}
main() {
check_env
do_backup
prune_old
log "全部完成。"
}
main "$@"
设置定时任务
sudo crontab -e
30 2 * * * /usr/local/bin/system_backup.sh >/dev/null 2>&1
恢复
脚本需要在 LiveCD/救援环境 下以 root 运行。
把脚本保存为 /usr/local/bin/system_restore.sh,赋予执行权限:
sudo mkdir -p /usr/local/bin
sudo nano /usr/local/bin/system_restore.sh # 粘贴脚本
sudo chmod +x /usr/local/bin/system_restore.sh
#!/usr/bin/env bash
# system_restore.sh
# 从 /mnt/iscsi_backup/system_snapshots/ 选择一次快照,恢复到本机系统盘
# 需在 LiveCD/救援环境中以 root 运行
set -euo pipefail
### ===== 配置区(按需修改) =====
# 目标系统盘与分区(你当前机器的实际布局)
DISK="/dev/nvme0n1"
PART_EFI="/dev/nvme0n1p1"
PART_BOOT="/dev/nvme0n1p2"
LV_ROOT="/dev/mapper/ubuntu--vg-ubuntu--lv" # 根 LV(已存在的 LVM 卷)
# 备份盘(iSCSI 或本地)挂载点与快照目录
BACKUP_MNT="/mnt/iscsi_backup"
SNAPSHOT_DIR="${BACKUP_MNT}/system_snapshots"
# 恢复目标挂载点
TARGET="/mnt/restore"
# rsync 选项(务必保留 A X H numeric-ids 和 --delete)
RSYNC_ARGS=(-aAXHvv --numeric-ids --delete --info=STATS2,PROGRESS2)
# 若你需要跳过某些目录(一般不需要),可在这里追加:
EXCLUDES=( )
### ===== 配置区结束 =====
log(){ echo "[$(date '+%F %T')] $*"; }
require_root(){
if [[ $EUID -ne 0 ]]; then
echo "请使用 root 运行:sudo $0" >&2
exit 1
fi
}
confirm(){
read -r -p "⚠️ 本操作将用所选快照覆盖 ${TARGET}(实际为你的系统盘挂载点)。确认继续?(yes/NO): " ans
[[ "${ans:-}" == "yes" ]] || { echo "已取消。"; exit 1; }
}
activate_lvm(){
log "激活 LVM 卷组..."
vgchange -ay >/dev/null || true
}
mount_targets(){
log "创建挂载点..."
mkdir -p "$TARGET" "$TARGET/boot" "$TARGET/boot/efi"
if ! mountpoint -q "$TARGET"; then
log "挂载根卷 ${LV_ROOT} 到 ${TARGET} ..."
mount "$LV_ROOT" "$TARGET"
fi
if ! mountpoint -q "$TARGET/boot"; then
log "挂载 /boot 分区 ${PART_BOOT} ..."
mount "$PART_BOOT" "$TARGET/boot"
fi
if ! mountpoint -q "$TARGET/boot/efi"; then
log "挂载 EFI 分区 ${PART_EFI} ..."
mount "$PART_EFI" "$TARGET/boot/efi"
fi
}
mount_backup(){
if ! mountpoint -q "$BACKUP_MNT"; then
log "尝试挂载备份盘到 ${BACKUP_MNT}(如果是本地 /dev/sdX 盘请自行 mount)..."
mkdir -p "$BACKUP_MNT"
# 如需自动挂载本地盘,可在此添加:mount /dev/sdX "$BACKUP_MNT"
fi
if [[ ! -d "$SNAPSHOT_DIR" ]]; then
echo "未找到快照目录:$SNAPSHOT_DIR" >&2
exit 1
fi
}
pick_snapshot(){
local latest
latest=$(ls -1dt "${SNAPSHOT_DIR}"/snap_* 2>/dev/null | head -n1 || true)
if [[ -z "$latest" ]]; then
echo "未发现任何快照:$SNAPSHOT_DIR" >&2
exit 1
fi
echo "检测到以下快照(最新在前):"
ls -1dt "${SNAPSHOT_DIR}"/snap_* | head -n 20
echo
read -r -p "输入要恢复的快照完整路径(直接回车使用最新:${latest}): " chosen
SNAP="${chosen:-$latest}"
if [[ ! -d "$SNAP" ]]; then
echo "快照目录无效:$SNAP" >&2
exit 1
fi
log "使用快照:$SNAP"
}
do_restore(){
confirm
log "开始 rsync 同步(这将覆盖目标系统)..."
local args=( "${RSYNC_ARGS[@]}" )
for e in "${EXCLUDES[@]:-}"; do args+=( "--exclude=$e" ); done
rsync "${args[@]}" \
"$SNAP"/ "$TARGET"/
log "同步完成。"
}
chroot_fix_boot(){
log "绑定必要的伪文件系统..."
mount --bind /dev "$TARGET/dev"
mount --bind /proc "$TARGET/proc"
mount --bind /sys "$TARGET/sys"
log "进入 chroot 修复引导与内核镜像..."
chroot "$TARGET" bash -c "
set -e
echo '更新 initramfs...'
update-initramfs -u
if [[ -d /sys/firmware/efi ]]; then
echo '检测到 EFI 引导,安装/修复 grub-efi...'
grub-install --target=x86_64-efi --efi-directory=/boot/efi --bootloader-id=ubuntu --recheck
else
echo 'BIOS/Legacy 引导,安装 grub 到磁盘...'
grub-install ${DISK}
fi
echo '更新 grub 配置...'
update-grub
"
log "解除绑定..."
umount -lf "$TARGET/dev" || true
umount -lf "$TARGET/proc" || true
umount -lf "$TARGET/sys" || true
}
summary(){
log "恢复完成!你可以执行:"
echo " umount -R $TARGET"
echo " reboot"
}
main(){
require_root
activate_lvm
mount_targets
mount_backup
pick_snapshot
do_restore
chroot_fix_boot
summary
}
main "$@"
2.安装NVIDIA Container Toolkit & TensorRT-LLM
NVIDIA Container Toolkit
# 1.Configure the production repository:
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
# Optionally, configure the repository to use experimental packages:
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list
# 2.Update the packages list from the repository:
sudo apt-get update
# 3.Install the NVIDIA Container Toolkit packages:
export NVIDIA_CONTAINER_TOOLKIT_VERSION=1.17.8-1
sudo apt-get install -y \
nvidia-container-toolkit=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
nvidia-container-toolkit-base=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
libnvidia-container-tools=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
libnvidia-container1=${NVIDIA_CONTAINER_TOOLKIT_VERSION}
TensorRT-LLM
# 获取TensorRT-LLM:
#https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags
sudo docker pull nvcr.io/nvidia/tensorrt-llm/release:1.0.0rc6
#!/bin/bash
# start_tensorrt_llm.sh
# 容器名称
CONTAINER_NAME="tensorrt_llm"
# 挂载路径
HOST_DATA_DIR="/data"
CONTAINER_DATA_DIR="/data"
# 镜像
IMAGE_NAME="nvcr.io/nvidia/tensorrt-llm/release:1.0.0rc6"
# 启动容器
sudo docker run -it \
--gpus all \
--name ${CONTAINER_NAME} \
--shm-size=256g \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
-e NVIDIA_DRIVER_CAPABILITIES=all \
-v ${HOST_DATA_DIR}:${CONTAINER_DATA_DIR}:rw \
${IMAGE_NAME}
3.TensorRT_LLM权重转换
参考链接
使用modelscope下载模型
pip install modelscope
modelscope download --model Qwen/Qwen2.5-72B-Instruct --local_dir /data
cd /app/tensorrt_llm/examples/models/core/qwen
INT8 权重
1.将 HF 权重转为 TensorRT-LLMcheckpoint(INT8 权重):
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
HF_TORCH_LOAD_EAGER=1 \
python3 /app/tensorrt_llm/examples/models/core/qwen/convert_checkpoint.py \
--model_dir /data/models/Qwen2.5-72B-Instruct \
--output_dir /data/tensorrt_llm/ckpt/qwen2_5_72b_int8_tp2_noKV \
--tp_size 2 --pp_size 1 \
--dtype bfloat16 \
--use_weight_only --weight_only_precision int8 \
--workers 2
2.用 trtllm-build 构建 TensorRT 引擎
export CUDA_VISIBLE_DEVICES=0,1
trtllm-build \
--checkpoint_dir /data/tensorrt_llm/ckpt/qwen2_5_72b_int8_tp2_noKV \
--output_dir /data/tensorrt_llm/engines/qwen2_5_72b_INT8_tp2_seqlen8k_b1 \
--max_seq_len 8192 \
--max_batch_size 1 \
--kv_cache_type paged \
--workers 2 \
--gpt_attention_plugin bfloat16 \
--gemm_plugin bfloat16
3.测试
export CUDA_VISIBLE_DEVICES=0,1
export OMPI_ALLOW_RUN_AS_ROOT=1
export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
mpirun --allow-run-as-root -np 2 --bind-to none --map-by slot \
-x CUDA_VISIBLE_DEVICES -x NCCL_DEBUG=WARN -x NCCL_IB_DISABLE=1 -x NCCL_P2P_DISABLE=0 -x NCCL_SHM_DISABLE=0 \
python3 /app/tensorrt_llm/examples/run.py \
--engine_dir /data/tensorrt_llm/engines/qwen2_5_72b_INT8_tp2_seqlen8k_b1 \
--tokenizer_dir /data/models/Qwen2.5-72B-Instruct \
--input_text "你好,请简单介绍一下西安" \
--max_output_len 128
4.启动服务
export CUDA_VISIBLE_DEVICES=0,1
trtllm-serve serve \
--host 0.0.0.0 \
--port 8000 \
--backend trt \
--tokenizer /data/models/Qwen2.5-72B-Instruct \
--tp_size 2 --pp_size 1 --gpus_per_node 2 \
--max_batch_size 1 \
--max_num_tokens 8192 \
--kv_cache_free_gpu_memory_fraction 0.85 \
/data/tensorrt_llm/engines/qwen2_5_72b_INT8_tp2_seqlen8k_b1
服务启动成功后会绑定并监听 0.0.0.0:8000,TensorRT-LLM 提供了如下请求接口:
def register_routes(self):
self.app.add_api_route("/health", self.health, methods=["GET"])
self.app.add_api_route("/version", self.version, methods=["GET"])
self.app.add_api_route("/v1/models", self.get_model, methods=["GET"])
self.app.add_api_route("/metrics", self.get_iteration_stats, methods=["GET"])
self.app.add_api_route("/kv_cache_events", self.get_kv_cache_events, methods=["POST"])
self.app.add_api_route("/v1/completions", self.openai_completion, methods=["POST"])
self.app.add_api_route("/v1/chat/completions", self.openai_chat, methods=["POST"])