CentOS 上用 Python 做日志分析的实用方案
一 准备与环境
tail -f /var/log/messages、grep "ERROR" /var/log/httpd/error_log、journalctl -u your.service -f。daily、rotate 7、compress、missingok、notifempty。二 读取与解析日志的 Python 方法
def parse_log(file_path, keywords=("ERROR", "WARNING")):
try:
with open(file_path, "r", errors="ignore") as f: # 忽略非法字节
for line in f:
if any(k in line for k in keywords):
print(line.rstrip())
except Exception as e:
print(f"Error reading {file_path}: {e}")
if __name__ == "__main__":
parse_log("/var/log/messages")
import re
from collections import Counter
log_re = re.compile(
r'(?P<ip>\S+) \S+ \S+ \[(?P<time>[^\]]+)\] '
r'"(?P<method>\S+) (?P<url>\S+) HTTP/\d\.\d" '
r'(?P<status>\d{3}) (?P<size>\S+)'
)
def analyze_access_log(path, topn=10):
status_cnt = Counter()
url_cnt = Counter()
with open(path, "r", errors="ignore") as f:
for line in f:
m = log_re.search(line)
if m:
status_cnt[m.group("status")] += 1
url_cnt[m.group("url")] += 1
print("Top status:", status_cnt.most_common(topn))
print("Top URLs:", url_cnt.most_common(topn))
if __name__ == "__main__":
analyze_access_log("/var/log/httpd/access_log")
readlines() 导致内存暴涨。三 进阶分析与可视化
四 生产实践与运维建议
import logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(name)s %(levelname)s %(message)s",
filename="/var/log/myapp/app.log"
)
logger = logging.getLogger(__name__)
logger.info("startup")