Node.js 集群日志监控实战指南
一 核心原则与落地要点
二 采集与架构选型
三 最小落地示例
// logger.js
const { createLogger, format, transports } = require('winston');
const { combine, timestamp, json } = format;
const cluster = require('cluster');
const logger = createLogger({
level: process.env.LOG_LEVEL || 'info',
format: combine(timestamp(), json()),
defaultMeta: {
service: 'order-service',
instance: process.env.HOSTNAME || 'unknown',
pid: process.pid,
workerId: cluster.isWorker ? cluster.worker.id : 'master'
},
transports: [
new transports.Console(),
new transports.File({ filename: 'logs/error.log', level: 'error' }),
new transports.File({ filename: 'logs/combined.log' })
]
});
// 可选:按天轮转(需安装 winston-daily-rotate-file)
// new transports.DailyRotateFile({ filename: 'logs/app-%DATE%.log', datePattern: 'YYYY-MM-DD', zippedArchive: true })
module.exports = logger;
// app.js
const logger = require('./logger');
const cluster = require('cluster');
const express = require('express')();
const app = express();
app.get('/health', (req, res) => {
logger.info('health check ok', { status: 'UP', ts: Date.now() });
res.json({ status: 'UP' });
});
app.get('/error', () => {
logger.error('something went wrong', { path: '/error', code: 500 });
throw new Error('boom');
});
if (cluster.isMaster) {
const numCPUs = require('os').cpus().length;
for (let i = 0; i < numCPUs; i++) cluster.fork();
cluster.on('exit', (worker) => {
logger.warn('worker exited', { workerId: worker.id, pid: worker.process.pid });
});
} else {
app.listen(3000, () => logger.info('worker started', { port: 3000 }));
}
四 集中式监控与告警配置
input {
http {
port => 5044
codec => json
}
}
filter {
mutate { remove_field => ["@version"] }
}
output {
elasticsearch {
hosts => ["http://es:9200"]
index => "nodejs-cluster-%{+YYYY.MM.dd}"
}
}
五 关键指标与优化建议