您好,登录后才能下订单哦!
密码登录
登录注册
点击 登录注册 即表示同意《亿速云用户服务条款》
# 如何编写一键统计数据库临时表空间和阻塞lock信息的Shell脚本
## 前言
在数据库运维工作中,临时表空间使用情况和阻塞锁信息是DBA需要重点关注的两个关键指标。临时表空间的异常增长可能导致数据库性能下降甚至服务中断,而阻塞锁则是引发应用超时和死锁的常见原因。本文将详细介绍如何编写一个全功能的Shell脚本,实现一键统计Oracle数据库的临时表空间使用情况和阻塞锁信息。
本脚本将包含以下核心功能:
1. 临时表空间使用率统计
2. 阻塞锁链分析
3. 结果格式化输出
4. 邮件报警功能
5. 日志记录机制
6. 参数化配置
## 一、环境准备
### 1.1 基础环境要求
- 操作系统:Linux/Unix (本文以CentOS 7为例)
- 数据库:Oracle 11g/12c/19c
- 必要工具:
- sqlplus命令行工具
- mailx邮件客户端
- awk/sed文本处理工具
### 1.2 目录结构设计
建议采用以下目录结构:
~/db_monitor/
├── bin/ # 脚本目录
│ └── check_db_space_lock.sh
├── conf/ # 配置文件
│ └── dbmonitor.conf
├── logs/ # 日志目录
│ ├── space\((date +%Y%m%d).log
│ └── lock_\)(date +%Y%m%d).log
└── tmp/ # 临时文件
## 二、脚本框架设计
### 2.1 基础框架
```bash
#!/bin/bash
# ----------------------------------------------------------------------
# 名称:check_db_space_lock.sh
# 功能:Oracle数据库临时表空间和阻塞锁检查脚本
# 作者:DBA运维团队
# 版本:1.2.0
# ----------------------------------------------------------------------
# 加载配置文件
SCRIPT_DIR=$(cd $(dirname $0) && pwd)
source ${SCRIPT_DIR}/../conf/db_monitor.conf
# 初始化变量
TIMESTAMP=$(date "+%Y-%m-%d %H:%M:%S")
LOG_DIR="${SCRIPT_DIR}/../logs"
TMP_DIR="${SCRIPT_DIR}/../tmp"
# 日志记录函数
function log() {
local level=$1
local msg=$2
local log_file="${LOG_DIR}/db_monitor_$(date +%Y%m%d).log"
echo "[${TIMESTAMP}] [${level}] ${msg}" | tee -a ${log_file}
# 错误日志额外处理
if [ "${level}" == "ERROR" ]; then
echo "[${TIMESTAMP}] [${level}] ${msg}" >> "${LOG_DIR}/error_$(date +%Y%m%d).log"
fi
}
# 获取临时表空间使用情况
function check_temp_space() {
local sqlplus_script=$(cat <<EOF
set heading off
set feedback off
set pagesize 0
select
d.tablespace_name "Tablespace",
d.status "Status",
d.contents "Type",
d.extent_management "Extent Mgmt",
round(NVL(a.bytes/1024/1024,0),2) "Total(MB)",
round(NVL(t.bytes/1024/1024,0),2) "Used(MB)",
round(NVL(a.bytes-NVL(t.bytes,0)/1024/1024,0),2) "Free(MB)",
round(NVL(t.bytes/a.bytes*100,0),2) "Pct Used"
from
dba_tablespaces d,
(select tablespace_name, sum(bytes) bytes from dba_temp_files group by tablespace_name) a,
(select tablespace_name, sum(bytes_used) bytes from v\$temp_space_header group by tablespace_name) t
where
d.tablespace_name = a.tablespace_name(+)
and d.tablespace_name = t.tablespace_name(+)
and d.contents = 'TEMPORARY'
order by 8 desc;
EOF
)
sqlplus -S "${DB_USER}/${DB_PASS}@${DB_SID}" <<< "${sqlplus_script}" > "${TMP_DIR}/temp_space.tmp"
}
# 分析临时表空间结果
function analyze_temp_space() {
local warning_threshold=85
local critical_threshold=95
while read line; do
tablespace=$(echo $line | awk '{print $1}')
pct_used=$(echo $line | awk '{print $8}')
if (( $(echo "${pct_used} > ${critical_threshold}" | bc -l) )); then
log "CRITICAL" "临时表空间 ${tablespace} 使用率 ${pct_used}% 超过临界值 ${critical_threshold}%"
send_alert "CRITICAL: 临时表空间告警" "${tablespace} 使用率过高: ${pct_used}%"
elif (( $(echo "${pct_used} > ${warning_threshold}" | bc -l) )); then
log "WARNING" "临时表空间 ${tablespace} 使用率 ${pct_used}% 超过警告值 ${warning_threshold}%"
fi
done < "${TMP_DIR}/temp_space.tmp"
# 生成报告
generate_temp_report
}
# 检查阻塞锁
function check_blocking_locks() {
local sqlplus_script=$(cat <<EOF
set linesize 200
set pagesize 100
col blocker_sid for a10
col waiter_sid for a10
col blocker_user for a15
col waiter_user for a15
col blocker_sql for a30
col waiter_sql for a30
col object_name for a30
col lock_type for a20
select
lb.sid blocker_sid,
lw.sid waiter_sid,
sb.username blocker_user,
sw.username waiter_user,
sb.sql_id blocker_sql,
sw.sql_id waiter_sql,
o.object_name,
lw.lock_type,
lw.mode_held,
lw.mode_requested,
round(lw.seconds_in_wait/60,2) wait_mins
from
dba_blockers db,
v\$session sb,
v\$session sw,
v\$lock lb,
v\$lock lw,
dba_objects o
where
db.holding_session = sb.sid
and sb.sid = lb.sid
and sw.sid = lw.sid
and lb.id1 = lw.id1
and lb.id2 = lw.id2
and lw.type = lb.type
and lw.id1 = o.object_id(+)
order by
wait_mins desc;
EOF
)
sqlplus -S "${DB_USER}/${DB_PASS}@${DB_SID}" <<< "${sqlplus_script}" > "${TMP_DIR}/blocking_locks.tmp"
}
# 分析锁链关系
function analyze_lock_chains() {
local lock_file="${TMP_DIR}/blocking_locks.tmp"
local lock_chains=()
local chain_count=0
# 识别锁链头
while read line; do
blocker_sid=$(echo $line | awk '{print $1}')
waiter_sid=$(echo $line | awk '{print $2}')
# 检查是否是锁链头
if ! grep -q "${blocker_sid}" "${lock_file}"; then
chain_count=$((chain_count+1))
lock_chains[$chain_count]="${blocker_sid}"
fi
done < "${lock_file}"
# 绘制锁链关系图
for ((i=1; i<=$chain_count; i++)); do
log "INFO" "锁链 $i 头节点: SID ${lock_chains[$i]}"
draw_lock_chain "${lock_chains[$i]}"
done
}
# 递归绘制锁链
function draw_lock_chain() {
local sid=$1
local level=$2
local indent=""
[ -z "$level" ] && level=0
for ((i=0; i<$level; i++)); do
indent+=" "
done
grep "${sid}" "${TMP_DIR}/blocking_locks.tmp" | while read line; do
waiter_sid=$(echo $line | awk '{print $2}')
wait_mins=$(echo $line | awk '{print $11}')
log "INFO" "${indent}├─ SID ${sid} 阻塞 SID ${waiter_sid} (等待 ${wait_mins} 分钟)"
draw_lock_chain "${waiter_sid}" $((level+1))
done
}
# 生成HTML报告
function generate_html_report() {
local report_file="${TMP_DIR}/db_monitor_report_$(date +%Y%m%d).html"
cat > "${report_file}" <<EOF
<!DOCTYPE html>
<html>
<head>
<title>数据库监控报告 - $(date "+%Y-%m-%d")</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
h1 { color: #333; }
table { border-collapse: collapse; width: 100%; margin-bottom: 20px; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #f2f2f2; }
tr:nth-child(even) { background-color: #f9f9f9; }
.critical { background-color: #ffcccc; }
.warning { background-color: #ffffcc; }
</style>
</head>
<body>
<h1>数据库监控报告 - $(date "+%Y-%m-%d %H:%M:%S")</h1>
<h2>临时表空间使用情况</h2>
$(generate_temp_html)
<h2>阻塞锁分析</h2>
$(generate_lock_html)
</body>
</html>
EOF
log "INFO" "HTML报告已生成: ${report_file}"
}
# 生成临时表空间HTML表格
function generate_temp_html() {
echo "<table>"
echo "<tr><th>表空间名</th><th>状态</th><th>类型</th><th>总大小(MB)</th><th>已用(MB)</th><th>剩余(MB)</th><th>使用率%</th></tr>"
while read line; do
tablespace=$(echo $line | awk '{print $1}')
status=$(echo $line | awk '{print $2}')
type=$(echo $line | awk '{print $3}')
total=$(echo $line | awk '{print $5}')
used=$(echo $line | awk '{print $6}')
free=$(echo $line | awk '{print $7}')
pct_used=$(echo $line | awk '{print $8}')
# 根据阈值添加样式类
if (( $(echo "${pct_used} > 95" | bc -l) )); then
echo "<tr class='critical'>"
elif (( $(echo "${pct_used} > 85" | bc -l) )); then
echo "<tr class='warning'>"
else
echo "<tr>"
fi
echo "<td>${tablespace}</td><td>${status}</td><td>${type}</td>"
echo "<td>${total}</td><td>${used}</td><td>${free}</td><td>${pct_used}</td>"
echo "</tr>"
done < "${TMP_DIR}/temp_space.tmp"
echo "</table>"
}
# 发送邮件通知
function send_email() {
local subject=$1
local content_file=$2
local attachment=$3
if [ -z "${attachment}" ]; then
mailx -s "${subject}" -a "${content_file}" "${ML_TO}" < /dev/null
else
(cat "${content_file}"; uuencode "${attachment}" $(basename "${attachment}")) | mailx -s "${subject}" "${ML_TO}"
fi
if [ $? -eq 0 ]; then
log "INFO" "邮件发送成功: ${subject}"
else
log "ERROR" "邮件发送失败: ${subject}"
fi
}
# 发送报警
function send_alert() {
local alert_type=$1
local alert_msg=$2
# 检查是否启用邮件报警
if [ "${ML_ALERT_ENABLED}" != "true" ]; then
return
fi
# 检查是否在静默时段
local current_hour=$(date +%H)
if [ "${current_hour}" -ge "${SILENT_HOUR_START}" ] && [ "${current_hour}" -lt "${SILENT_HOUR_END}" ]; then
log "INFO" "当前处于静默时段 (${SILENT_HOUR_START}:00-${SILENT_HOUR_END}:00),跳过邮件报警"
return
fi
# 发送报警邮件
local subject="[DB-ALERT] ${alert_type} - ${DB_SID}@$(hostname)"
local temp_file="${TMP_DIR}/alert_$(date +%Y%m%d%H%M%S).txt"
echo "报警时间: $(date)" > "${temp_file}"
echo "数据库: ${DB_SID}" >> "${temp_file}"
echo "主机: $(hostname)" >> "${temp_file}"
echo "报警内容: ${alert_msg}" >> "${temp_file}"
echo "" >> "${temp_file}"
echo "-- 此邮件由数据库监控系统自动发送" >> "${temp_file}"
send_email "${subject}" "${temp_file}"
}
# 主函数
function main() {
# 初始化目录
mkdir -p "${LOG_DIR}" "${TMP_DIR}"
# 检查数据库连接
check_db_connection
# 执行检查任务
log "INFO" "开始检查临时表空间..."
check_temp_space
analyze_temp_space
log "INFO" "开始检查阻塞锁..."
check_blocking_locks
analyze_lock_chains
# 生成报告
generate_html_report
# 发送报告邮件
if [ "${DLY_REPORT_ENABLED}" == "true" ]; then
send_email "数据库每日监控报告 - $(date +%Y-%m-%d)" "${TMP_DIR}/db_monitor_report_$(date +%Y%m%d).html"
fi
# 清理临时文件
cleanup
}
# 数据库连接检查
function check_db_connection() {
local check_sql="select sysdate from dual;"
local result=$(sqlplus -S "${DB_USER}/${DB_PASS}@${DB_SID}" <<< "${check_sql}" | grep -v "^$" | head -1)
if [[ "${result}" =~ [0-9]{2}-[A-Za-z]{3}-[0-9]{2} ]]; then
log "INFO" "数据库连接测试成功"
else
log "ERROR" "数据库连接失败,请检查配置"
exit 1
fi
}
创建 db_monitor.conf
配置文件:
# 数据库连接配置
DB_USER="monitor_user"
DB_PASS="secure_password"
DB_SID="ORCL"
# 邮件配置
ML_TO="dba-team@example.com"
ML_ALERT_ENABLED="true"
DLY_REPORT_ENABLED="true"
# 报警阈值配置
TEMP_SPACE_WARNING=85
TEMP_SPACE_CRITICAL=95
LOCK_WARNING_MINUTES=5
LOCK_CRITICAL_MINUTES=15
# 静默时段配置 (24小时制)
SILENT_HOUR_START=22
SILENT_HOUR_END=08
建议将脚本添加到crontab中定期执行:
# 每天8点至22点,每小时检查一次
0 8-22 * * * /home/oracle/db_monitor/bin/check_db_space_lock.sh >> /home/oracle/db_monitor/logs/cron.log 2>&1
# 每天9点发送汇总报告
0 9 * * * /home/oracle/db_monitor/bin/check_db_space_lock.sh --report-only >> /home/oracle/db_monitor/logs/cron_report.log 2>&1
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。