1297 lines
50 KiB
Bash
1297 lines
50 KiB
Bash
#!/bin/bash
|
||
set -euo pipefail
|
||
|
||
# ====================== 核心配置区域 ======================
|
||
# 本地已解压好的node_exporter目录路径
|
||
LOCAL_EXTRACT_DIR="/root/node_exporter-1.8.2.linux-amd64"
|
||
# Node Exporter监听端口
|
||
NODE_EXPORTER_PORT="9100"
|
||
# 外部IP配置文件路径(每行一个主机,格式:IP 或 IP:端口,支持#注释)
|
||
HOSTS_FILE="./node_exporter_hosts.txt"
|
||
# 远程登录用户名(批量安装默认)
|
||
REMOTE_USER="root"
|
||
# 远程登录密码(批量安装默认,如果使用密钥认证则留空)
|
||
REMOTE_PASSWORD="hp93000"
|
||
# 远程临时目录
|
||
REMOTE_TMP_DIR="/tmp/node_exporter_install"
|
||
# SSH连接超时时间(秒)
|
||
SSH_CONNECT_TIMEOUT=20
|
||
# SSH服务器存活检测间隔(秒)
|
||
SSH_SERVER_ALIVE_INTERVAL=15
|
||
# 远程命令执行超时时间(秒)
|
||
REMOTE_CMD_TIMEOUT=180
|
||
# 文件传输最大重试次数
|
||
MAX_TRANSFER_RETRIES=3
|
||
|
||
# ====================== Prometheus配置 ======================
|
||
# Prometheus配置文件完整路径
|
||
PROMETHEUS_CONFIG_FILE="/root/promethesu/conf/prometheus.yml"
|
||
# docker-compose.yml文件完整路径
|
||
DOCKER_COMPOSE_PATH="/root/promethesu/docker-compose.yml"
|
||
# 安装/卸载成功后是否自动重启Prometheus
|
||
AUTO_RESTART_PROMETHEUS=true
|
||
# 修改配置前是否自动备份Prometheus配置文件
|
||
AUTO_BACKUP_CONFIG=true
|
||
# 添加配置后是否自动检查Prometheus配置语法
|
||
AUTO_CHECK_CONFIG=true
|
||
# =====================================================================
|
||
|
||
# 颜色定义
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# 结果统计
|
||
SUCCESS_COUNT=0
|
||
FAILURE_COUNT=0
|
||
FAILURE_HOSTS=()
|
||
# Prometheus重启标记
|
||
NEED_RESTART_PROMETHEUS=false
|
||
# 最后一次备份的配置文件路径
|
||
LAST_BACKUP_FILE=""
|
||
|
||
# 打印带颜色的信息
|
||
info() { echo -e "${GREEN}[INFO]${NC} $1"; }
|
||
warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||
error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
||
blue() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||
|
||
# 生成示例IP配置文件
|
||
generate_sample_hosts_file() {
|
||
info "生成示例IP配置文件: $HOSTS_FILE"
|
||
|
||
# 纯echo方式生成示例文件
|
||
echo "# Node Exporter批量安装主机列表" > "$HOSTS_FILE"
|
||
echo "# 格式:每行一个主机,支持以下格式:" >> "$HOSTS_FILE"
|
||
echo "# 1. 仅IP地址(默认端口22)" >> "$HOSTS_FILE"
|
||
echo "# 2. IP:端口(指定SSH端口)" >> "$HOSTS_FILE"
|
||
echo "# 3. #开头的行是注释,会被忽略" >> "$HOSTS_FILE"
|
||
echo "# 4. 空行会被忽略" >> "$HOSTS_FILE"
|
||
echo "" >> "$HOSTS_FILE"
|
||
echo "# 示例:" >> "$HOSTS_FILE"
|
||
echo "# 192.168.1.10" >> "$HOSTS_FILE"
|
||
echo "# 192.168.1.11:2222" >> "$HOSTS_FILE"
|
||
echo "# 192.168.1.12" >> "$HOSTS_FILE"
|
||
echo "" >> "$HOSTS_FILE"
|
||
echo "# 请在下方添加您的主机:" >> "$HOSTS_FILE"
|
||
echo "10.150.10.83" >> "$HOSTS_FILE"
|
||
echo "10.150.10.86" >> "$HOSTS_FILE"
|
||
echo "10.150.10.87" >> "$HOSTS_FILE"
|
||
|
||
info "示例IP配置文件已生成"
|
||
info "请编辑 $HOSTS_FILE 添加您的主机,然后重新运行脚本"
|
||
exit 0
|
||
}
|
||
|
||
# 读取IP配置文件
|
||
read_hosts_file() {
|
||
info "读取IP配置文件: $HOSTS_FILE"
|
||
|
||
if [ ! -f "$HOSTS_FILE" ]; then
|
||
warn "IP配置文件不存在: $HOSTS_FILE"
|
||
read -p "是否生成示例配置文件? (y/n): " generate_answer
|
||
if [[ "$generate_answer" == "y" || "$generate_answer" == "Y" ]]; then
|
||
generate_sample_hosts_file
|
||
else
|
||
error "请创建IP配置文件或修改脚本中的HOSTS_FILE配置项"
|
||
exit 1
|
||
fi
|
||
fi
|
||
|
||
# 读取文件,过滤空行和注释行
|
||
REMOTE_HOSTS=()
|
||
while IFS= read -r line; do
|
||
# 跳过空行和注释行
|
||
if [[ -z "$line" || "$line" =~ ^# ]]; then
|
||
continue
|
||
fi
|
||
# 去除首尾空格
|
||
line=$(echo "$line" | xargs)
|
||
if [[ -n "$line" ]]; then
|
||
REMOTE_HOSTS+=("$line")
|
||
fi
|
||
done < "$HOSTS_FILE"
|
||
|
||
if [ ${#REMOTE_HOSTS[@]} -eq 0 ]; then
|
||
error "IP配置文件中没有有效的主机"
|
||
info "请编辑 $HOSTS_FILE 添加您的主机"
|
||
exit 1
|
||
fi
|
||
|
||
info "成功读取 ${#REMOTE_HOSTS[@]} 个主机"
|
||
}
|
||
|
||
# 检查本地依赖
|
||
check_dependencies() {
|
||
info "检查本地依赖..."
|
||
local deps=("scp" "ssh" "tar" "timeout" "stat" "md5sum" "docker-compose")
|
||
for dep in "${deps[@]}"; do
|
||
if ! command -v "$dep" &> /dev/null; then
|
||
if [ "$dep" = "docker-compose" ]; then
|
||
warn "未找到docker-compose命令,将无法自动重启Prometheus"
|
||
else
|
||
error "未找到命令: $dep"
|
||
exit 1
|
||
fi
|
||
fi
|
||
done
|
||
|
||
# 检查promtool(用于配置语法检查)
|
||
if command -v promtool &> /dev/null; then
|
||
PROMTOOL_AVAILABLE=true
|
||
info "检测到promtool,将自动检查Prometheus配置语法"
|
||
else
|
||
PROMTOOL_AVAILABLE=false
|
||
warn "未找到promtool,将跳过配置语法检查"
|
||
info "安装promtool: yum install -y prometheus2"
|
||
fi
|
||
|
||
# 检查sshpass(密码认证需要,密钥认证不需要)
|
||
if ! command -v sshpass &> /dev/null; then
|
||
warn "未找到sshpass命令"
|
||
info "如果您使用密码认证,请先安装sshpass:"
|
||
info " CentOS/RHEL 7/8/9: yum install -y epel-release && yum install -y sshpass"
|
||
info " CentOS/RHEL 6: yum install -y https://dl.fedoraproject.org/pub/e pel/epel-release-latest-6.noarch.rpm && yum install -y sshpass"
|
||
info ""
|
||
info "如果您使用SSH密钥认证,则无需安装sshpass"
|
||
read -p "是否继续执行? (y/n): " continue_answer
|
||
if [[ "$continue_answer" != "y" && "$continue_answer" != "Y" ]]; then
|
||
info "退出脚本"
|
||
exit 0
|
||
fi
|
||
else
|
||
info "sshpass已安装 (版本: $(sshpass -V 2>&1 | head -1 | awk '{print $2} '))"
|
||
fi
|
||
|
||
info "所有依赖检查通过"
|
||
}
|
||
|
||
# 验证本地已解压目录
|
||
verify_local_directory() {
|
||
info "验证本地已解压目录: $LOCAL_EXTRACT_DIR"
|
||
|
||
if [ ! -d "$LOCAL_EXTRACT_DIR" ]; then
|
||
error "本地目录不存在: $LOCAL_EXTRACT_DIR"
|
||
info "请将解压好的node_exporter目录放在上述路径,或修改脚本中的LOCAL_EXT RACT_DIR配置项"
|
||
exit 1
|
||
fi
|
||
|
||
if [ ! -f "$LOCAL_EXTRACT_DIR/node_exporter" ]; then
|
||
error "本地目录中缺少node_exporter二进制文件"
|
||
info "请确保您的目录结构正确,包含node_exporter可执行文件"
|
||
exit 1
|
||
fi
|
||
|
||
# 计算关键文件MD5用于远程校验
|
||
LOCAL_BINARY_MD5=$(md5sum "$LOCAL_EXTRACT_DIR/node_exporter" | awk '{print $ 1}')
|
||
info "本地二进制文件MD5: $LOCAL_BINARY_MD5"
|
||
info "本地目录验证通过"
|
||
}
|
||
|
||
# 验证Prometheus配置文件
|
||
verify_prometheus_config() {
|
||
info "验证Prometheus配置文件..."
|
||
|
||
if [ ! -f "$PROMETHEUS_CONFIG_FILE" ]; then
|
||
error "Prometheus配置文件不存在: $PROMETHEUS_CONFIG_FILE"
|
||
info "请修改脚本中的PROMETHEUS_CONFIG_FILE配置项为正确路径"
|
||
exit 1
|
||
fi
|
||
|
||
if [ ! -f "$DOCKER_COMPOSE_PATH" ]; then
|
||
error "docker-compose文件不存在: $DOCKER_COMPOSE_PATH"
|
||
info "请修改脚本中的DOCKER_COMPOSE_PATH配置项为正确路径"
|
||
exit 1
|
||
fi
|
||
|
||
# 检查配置文件中是否包含scrape_configs部分
|
||
if ! grep -q "^scrape_configs:" "$PROMETHEUS_CONFIG_FILE"; then
|
||
error "Prometheus配置文件中未找到scrape_configs部分"
|
||
exit 1
|
||
fi
|
||
|
||
# 检查配置文件是否可写
|
||
if [ ! -w "$PROMETHEUS_CONFIG_FILE" ]; then
|
||
error "Prometheus配置文件不可写: $PROMETHEUS_CONFIG_FILE"
|
||
info "请检查文件权限"
|
||
exit 1
|
||
fi
|
||
|
||
# 检查文件末尾是否有换行符
|
||
if [ -n "$(tail -c1 "$PROMETHEUS_CONFIG_FILE")" ]; then
|
||
warn "检测到配置文件末尾没有换行符,将自动修复"
|
||
echo "" >> "$PROMETHEUS_CONFIG_FILE"
|
||
info "已添加换行符到配置文件末尾"
|
||
fi
|
||
|
||
info "Prometheus配置文件验证通过"
|
||
}
|
||
|
||
# 备份Prometheus配置文件
|
||
backup_prometheus_config() {
|
||
if [ "$AUTO_BACKUP_CONFIG" != true ]; then
|
||
return
|
||
fi
|
||
|
||
LAST_BACKUP_FILE="${PROMETHEUS_CONFIG_FILE}.bak.$(date +%Y%m%d_%H%M%S)"
|
||
info "备份Prometheus配置文件到: $LAST_BACKUP_FILE"
|
||
|
||
if cp "$PROMETHEUS_CONFIG_FILE" "$LAST_BACKUP_FILE"; then
|
||
info "配置文件备份成功"
|
||
else
|
||
warn "配置文件备份失败,继续执行..."
|
||
LAST_BACKUP_FILE=""
|
||
fi
|
||
}
|
||
|
||
# 回滚到上一次备份的配置文件
|
||
rollback_prometheus_config() {
|
||
if [ -z "$LAST_BACKUP_FILE" ] || [ ! -f "$LAST_BACKUP_FILE" ]; then
|
||
error "没有可用的备份文件,无法回滚"
|
||
return 1
|
||
fi
|
||
|
||
warn "检测到配置错误,正在回滚到备份文件: $LAST_BACKUP_FILE"
|
||
|
||
if cp "$LAST_BACKUP_FILE" "$PROMETHEUS_CONFIG_FILE"; then
|
||
info "配置文件已回滚"
|
||
return 0
|
||
else
|
||
error "回滚失败"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# 检查Prometheus配置语法
|
||
check_prometheus_config_syntax() {
|
||
if [ "$AUTO_CHECK_CONFIG" != true ] || [ "$PROMTOOL_AVAILABLE" != true ]; th en
|
||
return 0
|
||
fi
|
||
|
||
info "检查Prometheus配置语法..."
|
||
|
||
if promtool check config "$PROMETHEUS_CONFIG_FILE" &> /dev/null; then
|
||
info "Prometheus配置语法正确"
|
||
return 0
|
||
else
|
||
error "Prometheus配置语法错误"
|
||
promtool check config "$PROMETHEUS_CONFIG_FILE"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# ==============================================
|
||
# 纯echo追加版配置添加函数(核心修改)
|
||
# ==============================================
|
||
update_prometheus_config() {
|
||
local hostname="$1"
|
||
local ip="$2"
|
||
local target="${ip}:${NODE_EXPORTER_PORT}"
|
||
|
||
info "更新Prometheus配置文件(纯echo追加方式)..."
|
||
|
||
# 检查是否已经存在相同的job_name
|
||
if grep -q "job_name: \"$hostname\"" "$PROMETHEUS_CONFIG_FILE"; then
|
||
warn "Prometheus配置中已存在job_name: $hostname,跳过添加"
|
||
return 0
|
||
fi
|
||
|
||
# 检查是否已经存在相同的target
|
||
if grep -q "targets: \[\"$target\"\]" "$PROMETHEUS_CONFIG_FILE"; then
|
||
warn "Prometheus配置中已存在target: $target,跳过添加"
|
||
return 0
|
||
fi
|
||
|
||
# 备份配置文件
|
||
backup_prometheus_config
|
||
|
||
# 确保文件末尾有换行符(关键前置步骤)
|
||
if [ -n "$(tail -c1 "$PROMETHEUS_CONFIG_FILE")" ]; then
|
||
warn "检测到配置文件末尾没有换行符,自动添加"
|
||
echo "" >> "$PROMETHEUS_CONFIG_FILE"
|
||
fi
|
||
|
||
# 纯echo逐行追加 - 最可靠的YAML写入方式
|
||
info "开始逐行添加配置..."
|
||
|
||
# 第一行:job_name(2个空格缩进)
|
||
echo " - job_name: \"$hostname\"" >> "$PROMETHEUS_CONFIG_FILE"
|
||
if grep -q "job_name: \"$hostname\"" "$PROMETHEUS_CONFIG_FILE"; then
|
||
info " ✅ 第1行写入成功: job_name: \"$hostname\""
|
||
else
|
||
error " ❌ 第1行写入失败"
|
||
return 1
|
||
fi
|
||
|
||
# 第二行:static_configs(4个空格缩进)
|
||
echo " static_configs:" >> "$PROMETHEUS_CONFIG_FILE"
|
||
if grep -A1 "job_name: \"$hostname\"" "$PROMETHEUS_CONFIG_FILE" | grep -q "s tatic_configs:"; then
|
||
info " ✅ 第2行写入成功: static_configs:"
|
||
else
|
||
error " ❌ 第2行写入失败"
|
||
# 回滚:删除刚才添加的第一行
|
||
sed -i "/job_name: \"$hostname\"/d" "$PROMETHEUS_CONFIG_FILE"
|
||
return 1
|
||
fi
|
||
|
||
# 第三行:targets(6个空格缩进)
|
||
echo " - targets: [\"$target\"]" >> "$PROMETHEUS_CONFIG_FILE"
|
||
if grep -A2 "job_name: \"$hostname\"" "$PROMETHEUS_CONFIG_FILE" | grep -q "t argets: \[\"$target\"\]"; then
|
||
info " ✅ 第3行写入成功: targets: [\"$target\"]"
|
||
else
|
||
error " ❌ 第3行写入失败"
|
||
# 回滚:删除刚才添加的前两行
|
||
sed -i "/job_name: \"$hostname\"/{N;d;}" "$PROMETHEUS_CONFIG_FILE"
|
||
return 1
|
||
fi
|
||
|
||
info "✅ 所有配置行写入成功!"
|
||
info "已添加的完整配置块:"
|
||
info " - job_name: \"$hostname\""
|
||
info " static_configs:"
|
||
info " - targets: [\"$target\"]"
|
||
|
||
# 检查配置语法
|
||
if check_prometheus_config_syntax; then
|
||
NEED_RESTART_PROMETHEUS=true
|
||
return 0
|
||
else
|
||
# 语法错误,完整回滚
|
||
rollback_prometheus_config
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# 从Prometheus配置中移除监控节点(也使用纯echo方式重建)
|
||
remove_from_prometheus_config() {
|
||
local identifier="$1" # 可以是hostname或ip:port
|
||
|
||
info "从Prometheus配置中移除监控节点: $identifier"
|
||
|
||
# 备份配置文件
|
||
backup_prometheus_config
|
||
|
||
# 尝试按job_name移除
|
||
if grep -q "job_name: \"$identifier\"" "$PROMETHEUS_CONFIG_FILE"; then
|
||
info "找到job_name: $identifier,正在移除..."
|
||
# 使用sed删除对应的job块(匹配job_name行及其后两行)
|
||
sed -i "/job_name: \"$identifier\"/{N;N;d;}" "$PROMETHEUS_CONFIG_FILE"
|
||
|
||
# 检查是否移除成功
|
||
if ! grep -q "job_name: \"$identifier\"" "$PROMETHEUS_CONFIG_FILE"; then
|
||
info "✅ 已从Prometheus配置中移除job: $identifier"
|
||
|
||
# 检查配置语法
|
||
if check_prometheus_config_syntax; then
|
||
NEED_RESTART_PROMETHEUS=true
|
||
return 0
|
||
else
|
||
rollback_prometheus_config
|
||
return 1
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
# 尝试按target移除
|
||
local target_pattern="targets: \[\"$identifier\"\]"
|
||
if grep -q "$target_pattern" "$PROMETHEUS_CONFIG_FILE"; then
|
||
info "找到target: $identifier,正在移除..."
|
||
# 使用sed删除对应的job块(向上查找两行找到job_name,然后删除三行)
|
||
sed -i "/$target_pattern/{N;N;N;d;}" "$PROMETHEUS_CONFIG_FILE"
|
||
|
||
# 检查是否移除成功
|
||
if ! grep -q "$target_pattern" "$PROMETHEUS_CONFIG_FILE"; then
|
||
info "✅ 已从Prometheus配置中移除target: $identifier"
|
||
|
||
# 检查配置语法
|
||
if check_prometheus_config_syntax; then
|
||
NEED_RESTART_PROMETHEUS=true
|
||
return 0
|
||
else
|
||
rollback_prometheus_config
|
||
return 1
|
||
fi
|
||
fi
|
||
fi
|
||
|
||
warn "在Prometheus配置中未找到节点: $identifier"
|
||
return 1
|
||
}
|
||
|
||
# 重启Prometheus服务
|
||
restart_prometheus() {
|
||
if [ "$AUTO_RESTART_PROMETHEUS" != true ] || [ "$NEED_RESTART_PROMETHEUS" != true ]; then
|
||
return
|
||
fi
|
||
|
||
info "重启Prometheus服务使配置生效..."
|
||
|
||
# 切换到docker-compose所在目录
|
||
local compose_dir=$(dirname "$DOCKER_COMPOSE_PATH")
|
||
cd "$compose_dir"
|
||
|
||
if docker-compose restart prometheus; then
|
||
info "✅ Prometheus重启成功,新配置已生效"
|
||
# 等待Prometheus启动完成
|
||
sleep 10
|
||
# 检查Prometheus是否正常运行
|
||
if docker-compose ps | grep -q "prometheus.*Up"; then
|
||
info "Prometheus服务运行正常"
|
||
else
|
||
warn "Prometheus服务可能未正常启动,请手动检查"
|
||
fi
|
||
else
|
||
error "Prometheus重启失败,请手动执行以下命令检查:"
|
||
error " cd $compose_dir"
|
||
error " docker-compose logs -f prometheus"
|
||
fi
|
||
}
|
||
|
||
# 远程执行命令
|
||
remote_exec() {
|
||
local host="$1"
|
||
local port="$2"
|
||
local user="$3"
|
||
local password="$4"
|
||
local cmd="$5"
|
||
|
||
# SSH通用选项:完全静默警告、禁用主机密钥检查、设置超时
|
||
local ssh_options="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/nu ll"
|
||
ssh_options="$ssh_options -o LogLevel=ERROR"
|
||
ssh_options="$ssh_options -o ConnectTimeout=$SSH_CONNECT_TIMEOUT"
|
||
ssh_options="$ssh_options -o ServerAliveInterval=$SSH_SERVER_ALIVE_INTERVAL"
|
||
ssh_options="$ssh_options -o ServerAliveCountMax=3"
|
||
ssh_options="$ssh_options -o TCPKeepAlive=yes"
|
||
|
||
# 使用timeout命令防止远程执行无限卡住
|
||
local exit_code=0
|
||
if [ -n "$password" ]; then
|
||
# 使用环境变量传递密码,避免特殊字符问题
|
||
SSHPASS="$password" sshpass -e ssh $ssh_options -p "$port" "${user}@${ho st}" "$cmd"
|
||
exit_code=$?
|
||
else
|
||
# 使用密钥认证
|
||
ssh $ssh_options -p "$port" "${user}@${host}" "$cmd"
|
||
exit_code=$?
|
||
fi
|
||
|
||
# 正确的错误代码解释
|
||
if [ $exit_code -ne 0 ]; then
|
||
if [ $exit_code -eq 124 ]; then
|
||
error "远程命令执行超时(超过${REMOTE_CMD_TIMEOUT}秒)"
|
||
elif [ $exit_code -eq 255 ]; then
|
||
error "SSH连接失败:网络问题或远程主机不可达"
|
||
else
|
||
# 1-254都是远程命令本身的退出代码
|
||
error "远程命令执行失败,退出代码: $exit_code"
|
||
fi
|
||
fi
|
||
|
||
return $exit_code
|
||
}
|
||
|
||
# 远程复制目录(增强版,带完整性校验和重试机制)
|
||
remote_copy_dir_with_verify() {
|
||
local host="$1"
|
||
local port="$2"
|
||
local user="$3"
|
||
local password="$4"
|
||
local src_dir="$5"
|
||
local dest_dir="$6"
|
||
local expected_md5="$7"
|
||
|
||
# SCP通用选项:完全静默警告
|
||
local scp_options="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/nu ll"
|
||
scp_options="$scp_options -o LogLevel=ERROR"
|
||
scp_options="$scp_options -o ConnectTimeout=$SSH_CONNECT_TIMEOUT"
|
||
scp_options="$scp_options -o ServerAliveInterval=$SSH_SERVER_ALIVE_INTERVAL"
|
||
scp_options="$scp_options -o ServerAliveCountMax=3"
|
||
scp_options="$scp_options -o TCPKeepAlive=yes"
|
||
|
||
local retry_count=0
|
||
local success=0
|
||
|
||
while [ $retry_count -lt $MAX_TRANSFER_RETRIES ]; do
|
||
if [ $retry_count -gt 0 ]; then
|
||
warn "目录传输重试 $retry_count/$MAX_TRANSFER_RETRIES..."
|
||
# 先删除可能不完整的远程目录
|
||
remote_exec "$host" "$port" "$user" "$password" "rm -rf '$dest_dir'" &> /dev/null
|
||
sleep 2
|
||
fi
|
||
|
||
info "正在传输目录: $src_dir -> $dest_dir"
|
||
|
||
if [ -n "$password" ]; then
|
||
SSHPASS="$password" sshpass -e scp -r $scp_options -P "$port" "$src_ dir"/* "${user}@${host}:${dest_dir}/"
|
||
else
|
||
scp -r $scp_options -P "$port" "$src_dir"/* "${user}@${host}:${dest_ dir}/"
|
||
fi
|
||
|
||
if [ $? -eq 0 ]; then
|
||
# 验证远程二进制文件MD5
|
||
info "验证远程文件完整性..."
|
||
local remote_md5=$(remote_exec "$host" "$port" "$user" "$password" " md5sum '$dest_dir/node_exporter' 2>/dev/null | cut -d' ' -f1 || echo 'ERROR_MD5_ FAILED'")
|
||
|
||
if [ "$remote_md5" = "$expected_md5" ]; then
|
||
info "✅ 目录传输成功,MD5校验通过: $remote_md5"
|
||
success=1
|
||
break
|
||
else
|
||
error "MD5校验不匹配!本地: $expected_md5,远程: $remote_md5"
|
||
fi
|
||
else
|
||
error "目录传输失败"
|
||
fi
|
||
|
||
((retry_count++))
|
||
done
|
||
|
||
if [ $success -eq 0 ]; then
|
||
error "目录传输失败,已重试 $MAX_TRANSFER_RETRIES 次"
|
||
return 1
|
||
fi
|
||
|
||
return 0
|
||
}
|
||
|
||
# SSH连接诊断函数
|
||
diagnose_ssh_connection() {
|
||
local host="$1"
|
||
local port="$2"
|
||
local user="$3"
|
||
local password="$4"
|
||
|
||
info "开始SSH连接诊断..."
|
||
|
||
# 测试网络连通性
|
||
info "1. 测试网络连通性..."
|
||
if ping -c 5 -W 2 "$host" &> /dev/null; then
|
||
info " ✓ 网络连通性正常"
|
||
else
|
||
error " ✗ 网络连通性失败,无法ping通主机"
|
||
fi
|
||
|
||
# 测试端口是否开放
|
||
info "2. 测试SSH端口 $port 是否开放..."
|
||
if command -v nc &> /dev/null; then
|
||
if nc -z -w 10 "$host" "$port"; then
|
||
info " ✓ SSH端口 $port 开放"
|
||
else
|
||
error " ✗ SSH端口 $port 关闭或被防火墙阻止"
|
||
fi
|
||
else
|
||
warn " 未找到nc命令,跳过端口测试"
|
||
fi
|
||
|
||
# 测试SSH服务版本
|
||
info "3. 测试SSH服务响应..."
|
||
if timeout 10 bash -c "echo '' | telnet $host $port 2>&1 | grep -i 'ssh'" &> /dev/null; then
|
||
info " ✓ SSH服务正在运行"
|
||
else
|
||
error " ✗ SSH服务未响应"
|
||
fi
|
||
|
||
# 测试身份验证
|
||
info "4. 测试身份验证..."
|
||
if [ -n "$password" ]; then
|
||
info " 使用密码认证方式"
|
||
SSHPASS="$password" sshpass -e ssh -o StrictHostKeyChecking=no -o LogLev el=ERROR -o ConnectTimeout=10 -p "$port" "${user}@${host}" "echo '认证成功'" 2>& 1
|
||
else
|
||
info " 使用密钥认证方式"
|
||
ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR -o ConnectTimeout=10 - p "$port" "${user}@${host}" "echo '认证成功'" 2>&1
|
||
fi
|
||
|
||
if [ $? -eq 0 ]; then
|
||
info " ✓ 身份验证成功"
|
||
else
|
||
error " ✗ 身份验证失败"
|
||
info ""
|
||
info "常见解决方案:"
|
||
info " 1. 检查用户名和密码是否正确"
|
||
info " 2. 确认远程主机允许密码认证(PasswordAuthentication yes)"
|
||
info " 3. 确认远程主机允许root登录(PermitRootLogin yes)"
|
||
info " 4. 检查远程主机的防火墙和SELinux设置"
|
||
info " 5. 查看远程主机的SSH日志:tail -f /var/log/secure"
|
||
fi
|
||
|
||
# 测试远程磁盘空间(使用cut代替awk)
|
||
info "5. 测试远程磁盘空间..."
|
||
local disk_space=$(remote_exec "$host" "$port" "$user" "$password" "df -P /t mp | tail -1 | cut -d' ' -f4")
|
||
if [ -n "$disk_space" ] && [ "$disk_space" -gt 10240 ]; then
|
||
info " ✓ 远程/tmp目录可用空间: $(($disk_space / 1024)) MB"
|
||
else
|
||
error " ✗ 远程/tmp目录空间不足或无法访问"
|
||
fi
|
||
|
||
info "诊断完成"
|
||
}
|
||
|
||
# 在单个主机上安装Node Exporter
|
||
install_on_host() {
|
||
local host_port="$1"
|
||
local user="$2"
|
||
local password="$3"
|
||
local host="${host_port%:*}"
|
||
local port="22"
|
||
|
||
# 提取端口号
|
||
if [[ "$host_port" == *:* ]]; then
|
||
port="${host_port#*:}"
|
||
fi
|
||
|
||
info "============================================="
|
||
info "开始处理主机: ${host}:${port}"
|
||
|
||
# 测试SSH连接
|
||
info "测试SSH连接..."
|
||
if ! remote_exec "$host" "$port" "$user" "$password" "echo 'SSH连接成功'" &> /dev/null; then
|
||
error "无法连接到主机 ${host}:${port}"
|
||
|
||
# 询问是否进行诊断
|
||
read -p "是否进行SSH连接诊断? (y/n): " diagnose_answer
|
||
if [[ "$diagnose_answer" == "y" || "$diagnose_answer" == "Y" ]]; then
|
||
diagnose_ssh_connection "$host" "$port" "$user" "$password"
|
||
fi
|
||
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
return 1
|
||
fi
|
||
|
||
info "✅ SSH连接成功"
|
||
|
||
# 获取系统版本信息
|
||
info "检测系统版本..."
|
||
local os_release
|
||
os_release=$(remote_exec "$host" "$port" "$user" "$password" "cat /etc/redha t-release 2>/dev/null || cat /etc/issue 2>/dev/null | head -1") || {
|
||
error "无法获取系统版本信息"
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
return 1
|
||
}
|
||
info "检测到系统: $os_release"
|
||
|
||
# 确定系统类型和服务管理方式
|
||
local system_type="unknown"
|
||
local service_manager="unknown"
|
||
|
||
if echo "$os_release" | grep -qiE "centos.*6|red.*hat.*6|rhel.*6"; then
|
||
system_type="rhel6"
|
||
service_manager="sysvinit"
|
||
elif echo "$os_release" | grep -qiE "centos.*5|red.*hat.*5|rhel.*5"; then
|
||
system_type="rhel5"
|
||
service_manager="sysvinit"
|
||
elif echo "$os_release" | grep -qiE "centos.*7|red.*hat.*7|rhel.*7"; then
|
||
system_type="rhel7"
|
||
service_manager="systemd"
|
||
elif echo "$os_release" | grep -qiE "red.*hat.*9|rhel.*9"; then
|
||
system_type="rhel9"
|
||
service_manager="systemd"
|
||
else
|
||
warn "不支持的系统类型,尝试使用通用方式安装"
|
||
# 尝试检测服务管理器
|
||
if remote_exec "$host" "$port" "$user" "$password" "command -v systemctl &> /dev/null"; then
|
||
service_manager="systemd"
|
||
else
|
||
service_manager="sysvinit"
|
||
fi
|
||
fi
|
||
|
||
info "系统类型: $system_type, 服务管理器: $service_manager"
|
||
|
||
# 清理并创建远程临时目录
|
||
info "清理并创建远程临时目录..."
|
||
remote_exec "$host" "$port" "$user" "$password" "rm -rf $REMOTE_TMP_DIR && m kdir -p $REMOTE_TMP_DIR"
|
||
|
||
# 传输本地已解压目录到远程主机(带MD5完整性校验)
|
||
info "传输安装文件到远程主机..."
|
||
|
||
if ! remote_copy_dir_with_verify "$host" "$port" "$user" "$password" "$LOCAL _EXTRACT_DIR" "$REMOTE_TMP_DIR" "$LOCAL_BINARY_MD5"; then
|
||
error "传输安装文件失败"
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
return 1
|
||
fi
|
||
|
||
# 远程执行安装脚本
|
||
info "开始远程安装..."
|
||
local install_script=$(cat << EOF
|
||
#!/bin/bash
|
||
set -euo pipefail
|
||
|
||
cd "$REMOTE_TMP_DIR"
|
||
|
||
# 验证文件完整性
|
||
echo "验证远程文件完整性..."
|
||
if ! md5sum "node_exporter" | cut -d' ' -f1 | grep -q "$LOCAL_BINARY_MD5"; then
|
||
echo "ERROR: 远程文件MD5校验失败!"
|
||
echo "本地MD5: $LOCAL_BINARY_MD5"
|
||
echo "远程MD5: \$(md5sum "node_exporter" | cut -d' ' -f1)"
|
||
exit 1
|
||
fi
|
||
echo "文件完整性验证通过"
|
||
|
||
# 创建node_exporter用户(如果不存在)
|
||
if ! id node_exporter &> /dev/null; then
|
||
useradd -M -s /sbin/nologin node_exporter 2>/dev/null || adduser -M -s /sbin /nologin node_exporter
|
||
fi
|
||
|
||
# 安装二进制文件
|
||
cp node_exporter /usr/local/bin/
|
||
chmod +x /usr/local/bin/node_exporter
|
||
chown node_exporter:node_exporter /usr/local/bin/node_exporter
|
||
|
||
# 创建数据目录
|
||
mkdir -p /var/lib/node_exporter
|
||
chown node_exporter:node_exporter /var/lib/node_exporter
|
||
|
||
# RHEL9专属优化:配置SELinux允许node_exporter运行
|
||
if [ "$system_type" = "rhel9" ]; then
|
||
echo "配置RHEL9 SELinux规则..."
|
||
# 允许node_exporter绑定到任何端口
|
||
semanage port -a -t http_port_t -p tcp ${NODE_EXPORTER_PORT} 2>/dev/null || true
|
||
# 允许node_exporter读取系统信息
|
||
setsebool -P domain_can_mmap_files 1 2>/dev/null || true
|
||
fi
|
||
|
||
# 安装服务文件
|
||
if [ "$service_manager" = "systemd" ]; then
|
||
# Systemd服务文件
|
||
cat > /etc/systemd/system/node_exporter.service << 'SERVICE_EOF'
|
||
[Unit]
|
||
Description=Prometheus Node Exporter
|
||
After=network.target
|
||
|
||
[Service]
|
||
User=node_exporter
|
||
Group=node_exporter
|
||
Type=simple
|
||
ExecStart=/usr/local/bin/node_exporter \
|
||
--collector.systemd \
|
||
--collector.processes \
|
||
--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc|run|va r/lib/docker)($|/)" \
|
||
--collector.cpu.info \
|
||
--collector.meminfo \
|
||
--collector.loadavg \
|
||
--collector.diskstats \
|
||
--collector.netdev \
|
||
--web.listen-address=:${NODE_EXPORTER_PORT}
|
||
Restart=always
|
||
RestartSec=5
|
||
Delegate=yes
|
||
ProtectSystem=strict
|
||
ProtectHome=yes
|
||
PrivateTmp=yes
|
||
ProtectKernelTunables=no
|
||
ProtectControlGroups=no
|
||
|
||
[Install]
|
||
WantedBy=multi-user.target
|
||
SERVICE_EOF
|
||
|
||
systemctl daemon-reload
|
||
systemctl enable node_exporter
|
||
systemctl start node_exporter
|
||
else
|
||
# SysVinit服务文件
|
||
cat > /etc/init.d/node_exporter << 'SERVICE_EOF'
|
||
#!/bin/bash
|
||
# chkconfig: 2345 90 10
|
||
# description: Prometheus Node Exporter
|
||
|
||
NAME="node_exporter"
|
||
DAEMON="/usr/local/bin/\${NAME}"
|
||
PIDFILE="/var/run/\${NAME}.pid"
|
||
USER="node_exporter"
|
||
OPTIONS="--collector.systemd --collector.processes --collector.filesystem.ignore d-mount-points=\"^/(sys|proc|dev|host|etc)(\\\$|/)\" --web.listen-address=:${NOD E_EXPORTER_PORT}"
|
||
|
||
start() {
|
||
if [ -f "\$PIDFILE" ]; then
|
||
echo "\$NAME is already running"
|
||
exit 1
|
||
fi
|
||
echo "Starting \$NAME..."
|
||
su -s /bin/sh \$USER -c "\$DAEMON \$OPTIONS &"
|
||
echo \$! > "\$PIDFILE"
|
||
echo "\$NAME started"
|
||
}
|
||
|
||
stop() {
|
||
if [ ! -f "\$PIDFILE" ]; then
|
||
echo "\$NAME is not running"
|
||
exit 1
|
||
fi
|
||
echo "Stopping \$NAME..."
|
||
kill \$(cat "\$PIDFILE")
|
||
rm -f "\$PIDFILE"
|
||
echo "\$NAME stopped"
|
||
}
|
||
|
||
status() {
|
||
if [ -f "\$PIDFILE" ]; then
|
||
echo "\$NAME is running (PID: \$(cat "\$PIDFILE"))"
|
||
else
|
||
echo "\$NAME is not running"
|
||
fi
|
||
}
|
||
|
||
restart() {
|
||
stop
|
||
sleep 2
|
||
start
|
||
}
|
||
|
||
case "\$1" in
|
||
start) start ;;
|
||
stop) stop ;;
|
||
status) status ;;
|
||
restart) restart ;;
|
||
*) echo "Usage: \$0 {start|stop|status|restart}"; exit 1 ;;
|
||
esac
|
||
SERVICE_EOF
|
||
|
||
chmod +x /etc/init.d/node_exporter
|
||
chkconfig --add node_exporter 2>/dev/null || true
|
||
chkconfig node_exporter on 2>/dev/null || true
|
||
service node_exporter start
|
||
fi
|
||
|
||
# 配置防火墙
|
||
if [ "$system_type" = "rhel5" ] || [ "$system_type" = "rhel6" ]; then
|
||
if command -v iptables &> /dev/null; then
|
||
iptables -I INPUT -p tcp --dport ${NODE_EXPORTER_PORT} -j ACCEPT
|
||
if [ -f /etc/sysconfig/iptables ]; then
|
||
service iptables save 2>/dev/null || true
|
||
fi
|
||
fi
|
||
elif [ "$system_type" = "rhel7" ] || [ "$system_type" = "rhel9" ]; then
|
||
if command -v firewall-cmd &> /dev/null; then
|
||
firewall-cmd --permanent --add-port=${NODE_EXPORTER_PORT}/tcp 2>/dev/nul l || true
|
||
firewall-cmd --reload 2>/dev/null || true
|
||
fi
|
||
fi
|
||
|
||
# 验证安装
|
||
echo "等待服务启动..."
|
||
sleep 5
|
||
|
||
for i in 1 2 3; do
|
||
echo "验证尝试 \$i/3..."
|
||
if command -v curl &> /dev/null; then
|
||
if curl -s --connect-timeout 5 http://localhost:${NODE_EXPORTER_PORT}/me trics &> /dev/null; then
|
||
echo "SUCCESS: Node Exporter安装成功并正在运行"
|
||
exit 0
|
||
fi
|
||
elif command -v wget &> /dev/null; then
|
||
if wget -q -T 5 -O /dev/null http://localhost:${NODE_EXPORTER_PORT}/metr ics; then
|
||
echo "SUCCESS: Node Exporter安装成功并正在运行"
|
||
exit 0
|
||
fi
|
||
else
|
||
if ps aux | grep -v grep | grep node_exporter &> /dev/null; then
|
||
echo "SUCCESS: Node Exporter进程已启动"
|
||
exit 0
|
||
fi
|
||
fi
|
||
sleep 3
|
||
done
|
||
|
||
echo "ERROR: Node Exporter安装失败"
|
||
systemctl status node_exporter 2>/dev/null || service node_exporter status 2>/de v/null
|
||
exit 1
|
||
EOF
|
||
)
|
||
|
||
# 将安装脚本写入远程主机并执行
|
||
echo "$install_script" > "/tmp/install_remote.sh"
|
||
|
||
# 传输安装脚本
|
||
if ! remote_exec "$host" "$port" "$user" "$password" "cat > '$REMOTE_TMP_DIR /install_remote.sh'" < "/tmp/install_remote.sh"; then
|
||
error "传输安装脚本失败"
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
return 1
|
||
fi
|
||
|
||
remote_exec "$host" "$port" "$user" "$password" "chmod +x $REMOTE_TMP_DIR/in stall_remote.sh"
|
||
|
||
info "执行远程安装脚本..."
|
||
if remote_exec "$host" "$port" "$user" "$password" "$REMOTE_TMP_DIR/install_ remote.sh"; then
|
||
info "✅ 主机 ${host}:${port} 安装成功"
|
||
((SUCCESS_COUNT++))
|
||
|
||
# 获取远程主机名并更新Prometheus配置
|
||
info "获取远程主机名..."
|
||
local remote_hostname=$(remote_exec "$host" "$port" "$user" "$password" "hostname -s 2>/dev/null || hostname")
|
||
if [ -z "$remote_hostname" ] || [ "$remote_hostname" = "ERROR_MD5_FAILED " ]; then
|
||
warn "无法获取远程主机名,使用IP作为job_name"
|
||
remote_hostname="$host"
|
||
fi
|
||
info "远程主机名: $remote_hostname"
|
||
|
||
# 更新Prometheus配置(纯echo方式)
|
||
update_prometheus_config "$remote_hostname" "$host"
|
||
else
|
||
error "主机 ${host}:${port} 安装失败"
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
fi
|
||
|
||
# 清理远程临时文件
|
||
info "清理远程临时文件..."
|
||
remote_exec "$host" "$port" "$user" "$password" "rm -rf $REMOTE_TMP_DIR" &> /dev/null || true
|
||
|
||
return 0
|
||
}
|
||
|
||
# 在单个主机上卸载Node Exporter
|
||
uninstall_on_host() {
|
||
local host_port="$1"
|
||
local user="$2"
|
||
local password="$3"
|
||
local host="${host_port%:*}"
|
||
local port="22"
|
||
|
||
# 提取端口号
|
||
if [[ "$host_port" == *:* ]]; then
|
||
port="${host_port#*:}"
|
||
fi
|
||
|
||
info "============================================="
|
||
info "开始卸载主机: ${host}:${port}"
|
||
|
||
# 测试SSH连接
|
||
info "测试SSH连接..."
|
||
if ! remote_exec "$host" "$port" "$user" "$password" "echo 'SSH连接成功'" &> /dev/null; then
|
||
error "无法连接到主机 ${host}:${port}"
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
return 1
|
||
fi
|
||
|
||
info "✅ SSH连接成功"
|
||
|
||
# 获取远程主机名用于移除Prometheus配置
|
||
info "获取远程主机名..."
|
||
local remote_hostname=$(remote_exec "$host" "$port" "$user" "$password" "hos tname -s 2>/dev/null || hostname")
|
||
if [ -z "$remote_hostname" ] || [ "$remote_hostname" = "ERROR_MD5_FAILED" ]; then
|
||
warn "无法获取远程主机名,将使用IP:PORT从Prometheus配置中移除"
|
||
remote_hostname="${host}:${NODE_EXPORTER_PORT}"
|
||
fi
|
||
info "远程主机名: $remote_hostname"
|
||
|
||
# 远程执行卸载脚本
|
||
info "开始远程卸载..."
|
||
local uninstall_script=$(cat << EOF
|
||
#!/bin/bash
|
||
set -euo pipefail
|
||
|
||
echo "停止node_exporter服务..."
|
||
if command -v systemctl &> /dev/null; then
|
||
systemctl stop node_exporter 2>/dev/null || true
|
||
systemctl disable node_exporter 2>/dev/null || true
|
||
rm -f /etc/systemd/system/node_exporter.service
|
||
systemctl daemon-reload
|
||
else
|
||
service node_exporter stop 2>/dev/null || true
|
||
chkconfig node_exporter off 2>/dev/null || true
|
||
chkconfig --del node_exporter 2>/dev/null || true
|
||
rm -f /etc/init.d/node_exporter
|
||
fi
|
||
|
||
echo "删除node_exporter二进制文件..."
|
||
rm -f /usr/local/bin/node_exporter
|
||
|
||
echo "删除node_exporter用户和数据目录..."
|
||
userdel node_exporter 2>/dev/null || true
|
||
rm -rf /var/lib/node_exporter
|
||
|
||
echo "删除防火墙规则..."
|
||
if command -v firewall-cmd &> /dev/null; then
|
||
firewall-cmd --permanent --remove-port=${NODE_EXPORTER_PORT}/tcp 2>/dev/null || true
|
||
firewall-cmd --reload 2>/dev/null || true
|
||
elif command -v iptables &> /dev/null; then
|
||
iptables -D INPUT -p tcp --dport ${NODE_EXPORTER_PORT} -j ACCEPT 2>/dev/null || true
|
||
if [ -f /etc/sysconfig/iptables ]; then
|
||
service iptables save 2>/dev/null || true
|
||
fi
|
||
fi
|
||
|
||
echo "SUCCESS: Node Exporter卸载成功"
|
||
exit 0
|
||
EOF
|
||
)
|
||
|
||
# 将卸载脚本写入远程主机并执行
|
||
echo "$uninstall_script" > "/tmp/uninstall_remote.sh"
|
||
|
||
# 传输卸载脚本
|
||
if ! remote_exec "$host" "$port" "$user" "$password" "cat > '/tmp/uninstall_ remote.sh'" < "/tmp/uninstall_remote.sh"; then
|
||
error "传输卸载脚本失败"
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
return 1
|
||
fi
|
||
|
||
remote_exec "$host" "$port" "$user" "$password" "chmod +x /tmp/uninstall_rem ote.sh"
|
||
|
||
info "执行远程卸载脚本..."
|
||
if remote_exec "$host" "$port" "$user" "$password" "/tmp/uninstall_remote.sh "; then
|
||
info "✅ 主机 ${host}:${port} 卸载成功"
|
||
((SUCCESS_COUNT++))
|
||
|
||
# 从Prometheus配置中移除该节点
|
||
remove_from_prometheus_config "$remote_hostname"
|
||
if [ $? -ne 0 ]; then
|
||
remove_from_prometheus_config "${host}:${NODE_EXPORTER_PORT}"
|
||
fi
|
||
else
|
||
error "主机 ${host}:${port} 卸载失败"
|
||
((FAILURE_COUNT++))
|
||
FAILURE_HOSTS+=("$host_port")
|
||
fi
|
||
|
||
# 清理远程临时文件
|
||
remote_exec "$host" "$port" "$user" "$password" "rm -f /tmp/uninstall_remote .sh" &> /dev/null || true
|
||
|
||
return 0
|
||
}
|
||
|
||
# 批量安装模式
|
||
batch_install() {
|
||
read_hosts_file
|
||
|
||
info "进入批量安装模式"
|
||
info "将安装到以下 ${#REMOTE_HOSTS[@]} 个主机:"
|
||
for host in "${REMOTE_HOSTS[@]}"; do
|
||
blue " - $host"
|
||
done
|
||
|
||
read -p "确认开始批量安装? (y/n): " confirm
|
||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||
info "取消批量安装"
|
||
return
|
||
fi
|
||
|
||
# 遍历所有主机进行安装
|
||
for host_port in "${REMOTE_HOSTS[@]}"; do
|
||
install_on_host "$host_port" "$REMOTE_USER" "$REMOTE_PASSWORD"
|
||
done
|
||
}
|
||
|
||
# 批量卸载模式
|
||
batch_uninstall() {
|
||
read_hosts_file
|
||
|
||
info "进入批量卸载模式"
|
||
info "将从以下 ${#REMOTE_HOSTS[@]} 个主机卸载:"
|
||
for host in "${REMOTE_HOSTS[@]}"; do
|
||
blue " - $host"
|
||
done
|
||
|
||
read -p "确认开始批量卸载? (y/n): " confirm
|
||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||
info "取消批量卸载"
|
||
return
|
||
fi
|
||
|
||
# 遍历所有主机进行卸载
|
||
for host_port in "${REMOTE_HOSTS[@]}"; do
|
||
uninstall_on_host "$host_port" "$REMOTE_USER" "$REMOTE_PASSWORD"
|
||
done
|
||
}
|
||
|
||
# 指定单主机安装模式
|
||
single_install() {
|
||
info "进入指定单主机安装模式"
|
||
|
||
read -p "请输入目标主机IP地址: " target_host
|
||
read -p "请输入SSH端口号(默认22): " target_port
|
||
target_port=${target_port:-22}
|
||
read -p "请输入登录用户名(默认root): " target_user
|
||
target_user=${target_user:-root}
|
||
read -s -p "请输入登录密码(留空使用密钥认证): " target_password
|
||
echo ""
|
||
|
||
info "您输入的信息:"
|
||
blue " 主机: ${target_host}:${target_port}"
|
||
blue " 用户: ${target_user}"
|
||
if [ -z "$target_password" ]; then
|
||
blue " 认证方式: SSH密钥认证"
|
||
else
|
||
blue " 认证方式: 密码认证"
|
||
fi
|
||
|
||
read -p "确认信息正确并开始安装? (y/n): " confirm
|
||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||
info "取消安装"
|
||
return
|
||
fi
|
||
|
||
install_on_host "${target_host}:${target_port}" "$target_user" "$target_pass word"
|
||
}
|
||
|
||
# 指定单主机卸载模式
|
||
single_uninstall() {
|
||
info "进入指定单主机卸载模式"
|
||
|
||
read -p "请输入目标主机IP地址: " target_host
|
||
read -p "请输入SSH端口号(默认22): " target_port
|
||
target_port=${target_port:-22}
|
||
read -p "请输入登录用户名(默认root): " target_user
|
||
target_user=${target_user:-root}
|
||
read -s -p "请输入登录密码(留空使用密钥认证): " target_password
|
||
echo ""
|
||
|
||
info "您输入的信息:"
|
||
blue " 主机: ${target_host}:${target_port}"
|
||
blue " 用户: ${target_user}"
|
||
if [ -z "$target_password" ]; then
|
||
blue " 认证方式: SSH密钥认证"
|
||
else
|
||
blue " 认证方式: 密码认证"
|
||
fi
|
||
|
||
read -p "确认信息正确并开始卸载? (y/n): " confirm
|
||
if [[ "$confirm" != "y" && "$confirm" != "Y" ]]; then
|
||
info "取消卸载"
|
||
return
|
||
fi
|
||
|
||
uninstall_on_host "${target_host}:${target_port}" "$target_user" "$target_pa ssword"
|
||
}
|
||
|
||
# 手动添加Prometheus监控节点(纯echo方式)
|
||
manual_add_prometheus() {
|
||
info "进入手动添加Prometheus监控节点模式"
|
||
|
||
read -p "请输入job_name: " job_name
|
||
read -p "请输入target (格式: IP:端口): " target
|
||
|
||
update_prometheus_config "$job_name" "${target%:*}"
|
||
|
||
# 重启Prometheus
|
||
restart_prometheus
|
||
|
||
info "✅ 手动添加完成"
|
||
}
|
||
|
||
# 显示操作结果汇总
|
||
show_results() {
|
||
info "============================================="
|
||
info "操作结果汇总:"
|
||
info "总主机数: $((SUCCESS_COUNT + FAILURE_COUNT))"
|
||
info "成功: ${SUCCESS_COUNT}"
|
||
info "失败: ${FAILURE_COUNT}"
|
||
|
||
if [ ${FAILURE_COUNT} -gt 0 ]; then
|
||
error "失败的主机:"
|
||
for host in "${FAILURE_HOSTS[@]}"; do
|
||
error " - $host"
|
||
done
|
||
else
|
||
info "✅ 所有主机操作成功!"
|
||
fi
|
||
|
||
if [ "$NEED_RESTART_PROMETHEUS" = true ]; then
|
||
info ""
|
||
info "Prometheus配置已更新"
|
||
fi
|
||
}
|
||
|
||
# 主函数
|
||
main() {
|
||
info "Node Exporter远程管理脚本(纯echo追加版)"
|
||
info "使用本地已解压目录: $LOCAL_EXTRACT_DIR"
|
||
info "IP配置文件: $HOSTS_FILE"
|
||
info "Prometheus配置添加方式: 纯echo逐行追加"
|
||
info "============================================="
|
||
|
||
# 验证Prometheus配置
|
||
verify_prometheus_config
|
||
|
||
# 显示操作选择菜单
|
||
echo ""
|
||
blue "请选择操作:"
|
||
echo " 1. 批量安装(从IP配置文件读取主机列表)"
|
||
echo " 2. 指定安装(手动输入单台主机信息)"
|
||
echo " 3. 批量卸载(从IP配置文件读取主机列表)"
|
||
echo " 4. 指定卸载(手动输入单台主机信息)"
|
||
echo " 5. 手动添加Prometheus监控节点"
|
||
echo " 6. 生成示例IP配置文件"
|
||
echo " 7. SSH连接诊断工具"
|
||
echo " 8. 重新验证本地目录和Prometheus配置"
|
||
echo " 9. 退出"
|
||
echo ""
|
||
|
||
read -p "请输入选项(1-9): " operation_mode
|
||
|
||
case "$operation_mode" in
|
||
1)
|
||
verify_local_directory
|
||
batch_install
|
||
;;
|
||
2)
|
||
verify_local_directory
|
||
single_install
|
||
;;
|
||
3)
|
||
batch_uninstall
|
||
;;
|
||
4)
|
||
single_uninstall
|
||
;;
|
||
5)
|
||
manual_add_prometheus
|
||
exit 0
|
||
;;
|
||
6)
|
||
generate_sample_hosts_file
|
||
;;
|
||
7)
|
||
info "进入SSH连接诊断工具"
|
||
read -p "请输入目标主机IP地址: " diag_host
|
||
read -p "请输入SSH端口号(默认22): " diag_port
|
||
diag_port=${diag_port:-22}
|
||
read -p "请输入登录用户名(默认root): " diag_user
|
||
diag_user=${diag_user:-root}
|
||
read -s -p "请输入登录密码(留空使用密钥认证): " diag_password
|
||
echo ""
|
||
diagnose_ssh_connection "$diag_host" "$diag_port" "$diag_user" "$dia g_password"
|
||
exit 0
|
||
;;
|
||
8)
|
||
info "重新验证本地目录和Prometheus配置..."
|
||
verify_local_directory
|
||
verify_prometheus_config
|
||
info "✅ 所有验证完成,请重新运行脚本"
|
||
exit 0
|
||
;;
|
||
9)
|
||
info "退出脚本"
|
||
exit 0
|
||
;;
|
||
*)
|
||
error "无效的选项"
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
# 显示结果
|
||
show_results
|
||
|
||
# 重启Prometheus(如果需要)
|
||
restart_prometheus
|
||
|
||
# 清理本地临时文件
|
||
info "清理本地临时文件..."
|
||
rm -f "/tmp/install_remote.sh" "/tmp/uninstall_remote.sh"
|
||
|
||
info "✅ 脚本执行完成"
|
||
}
|
||
|
||
# 运行主函数
|
||
check_dependencies
|
||
main "$@" |