整体思路与准备
log_format main '$remote_addr - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent '
'"$http_referer" "$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
error_log /var/log/nginx/error.log;
log_format json_analytics escape=json '{'
'"msec":"$msec","connection":"$connection",'
'"request_id":"$request_id","request":"$request",'
'"status":$status,"body_bytes_sent":$body_bytes_sent,'
'"http_referer":"$http_referer","http_user_agent":"$http_user_agent",'
'"remote_addr":"$remote_addr","http_x_forwarded_for":"$http_x_forwarded_for"'
'}';
access_log /var/log/nginx/access_json.log json_analytics;
说明:访问日志用于行为与性能分析,错误日志用于定位 4xx/5xx 等问题。JSON 格式便于在 ELK/Loki 等系统中解析与聚合。解析与字段提取
awk '{print $9}' access.log | sort | uniq -c | sort -nr
# Top 10 IP
awk '{print $1}' access.log | sort | uniq -c | sort -nr | head -10
# Top 10 URL(按请求行第7列,适配常见 combined 格式)
awk '{print $7}' access.log | sort | uniq -c | sort -nr | head -10
# 某时间段的请求数(示例:04/Apr/2017:04-05 点)
grep "07/Apr/2017:0[4-5]" access.log | wc -l
e_regex("content",
r'(?P<ip>\S+) - - \[(?P<datetime>[^\]]+)\] '
r'"(?P<verb>\S+) (?P<request>\S+) (?P<protocol>\S+)" '
r'(?P<status>\d+) (?P<body_bytes_sent>\d+) '
r'"(?P<referer>[^"]*)" "(?P<user_agent>[^"]*)"')
说明:GROK 模式库丰富、学习成本低,适合快速解析多变日志;必要时可与正则组合使用。常见分析场景与命令示例
# 全部 404
grep " 404 " access.log | head
# 各状态码计数
awk '{print $9}' access.log | sort | uniq -c | sort -nr
# Top 10 404 路径
awk '$9==404 {print $7}' access.log | sort | uniq -c | sort -nr | head -10
# 最近 10 分钟 5xx 数量
tail -n 100000 access.log | grep "$(date -d '10 minutes ago' '+%d/%b/%Y:%H:%M')" | awk '$9>=500 {count++} END {print count+0}'
# Top IP
awk '{print $1}' access.log | sort | uniq -c | sort -nr | head
# Top URL
awk '{print $7}' access.log | sort | uniq -c | sort -nr | head
# Top Referer / UA(按列位置或解析后字段)
# 假设第10列为 $request_time(单位秒)
awk '$10 > 1.0 {print $7,$10}' access.log | sort -k2 -nr | head
可视化与长期化
# 终端实时
goaccess /var/log/nginx/access.log -f /var/log/nginx/access.log --log-format=COMBINED --real-time-html --ws-url=0.0.0.0:7890
# 导出 HTML 报告
goaccess /var/log/nginx/access.log -o /var/www/html/report.html --log-format=COMBINED