Nginx 服务器监控实践指南
一 监控目标与关键指标
二 数据采集与可视化方案
server {
listen 80;
server_name localhost;
location /nginx_status {
stub_status on;
access_log off;
allow 127.0.0.1;
allow <监控服务器IP>;
deny all;
}
}
# 重载
sudo nginx -s reload
访问 http://./nginx-prometheus-exporter -nginx.scrape-uri=http://localhost/nginx_status
# 默认 :9113/metrics
scrape_configs:
- job_name: 'nginx'
static_configs:
- targets: ['<exporter-ip>:9113']
三 告警规则示例
- alert: HighNginxErrorRate
expr: rate(nginx_http_requests_total{status=~"5.."}[5m]) / rate(nginx_http_requests_total[5m]) > 0.05
for: 2m
labels: severity: warning
annotations:
summary: "High error rate on Nginx"
description: "More than 5% of requests are returning 5xx errors."
- alert: TooManyActiveConnections
expr: nginx_connections_active > 1000
for: 1m
labels: severity: critical
annotations:
summary: "Too many active connections"
description: "Active connections exceed 1000!"
- alert: NginxConnectionsDropped
expr: increase(nginx_http_requests_total{status!~"4..|5.."}[5m]) < increase(nginx_connections_accepted[5m])
for: 5m
labels: severity: critical
annotations:
summary: "Nginx dropped connections detected"
description: "Accepts grew faster than handled; possible worker_connections limit."
- alert: HighRequestLatency
expr: histogram_quantile(0.99, sum(rate(nginx_http_request_duration_seconds_bucket[5m])) by (le)) > 1
for: 5m
labels: severity: warning
annotations:
summary: "High 99th percentile latency"
description: "99th percentile request latency is above 1s."
建议将告警接入 Alertmanager,通过 邮件、钉钉、企业微信 等渠道通知,并配置 分组、抑制、静默 降低噪音。
四 进阶与排错建议
ab -n 1000 -c 100 http://<域名>/<路径>
观察 Requests per second 与 Time per request,与线上指标对比评估优化效果。