linux

如何通过Golang日志监控系统健康

小樊
38
2025-12-15 06:36:49
栏目: 编程语言

用结构化日志与集中平台联动,配合指标与告警,形成从发现到定位的闭环。

一、整体架构与关键原则

二、落地步骤

三、最小可用代码示例

package main

import (
	"go.uber.org/zap"
	"go.uber.org/zap/zapcore"
	"os"
	"time"
)

func main() {
	cfg := zap.NewProductionEncoderConfig()
	cfg.EncodeTime = zapcore.ISO8601TimeEncoder
	core := zapcore.NewCore(
		zapcore.NewJSONEncoder(cfg),
		zapcore.AddSync(os.Stdout),
		zapcore.InfoLevel,
	)
	logger := zap.New(core, zap.AddCaller(), zap.AddStacktrace(zapcore.ErrorLevel))
	defer logger.Sync()

	logger.Info("service started",
		zap.String("service", "order"),
		zap.String("version", "v1.2.3"),
		zap.String("env", "prod"),
	)

	// 模拟业务处理
	start := time.Now()
	err := process()
	duration := time.Since(start)
	if err != nil {
		logger.Error("process failed",
			zap.Error(err),
			zap.Duration("duration", duration),
			zap.String("trace_id", "abc-123-def"),
		)
	} else {
		logger.Info("process succeeded",
			zap.Duration("duration", duration),
			zap.Int("count", 42),
		)
	}
}

func process() error {
	// TODO: 业务逻辑
	return nil
}
package main

import (
	"github.com/prometheus/client_golang/prometheus"
	"github.com/prometheus/client_golang/prometheus/promhttp"
	"net/http"
)

var (
	requestsTotal = prometheus.NewCounter(prometheus.CounterOpts{
		Name: "http_requests_total",
		Help: "Total number of HTTP requests.",
	})
	requestDuration = prometheus.NewSummary(prometheus.SummaryOpts{
		Name: "http_request_duration_seconds",
		Help: "Duration of HTTP requests.",
	})
)

func init() {
	prometheus.MustRegister(requestsTotal, requestDuration)
}

func main() {
	http.Handle("/metrics", promhttp.Handler())
	http.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
		w.WriteHeader(http.StatusOK)
		_, _ = w.Write([]byte(`{"status":"ok"}`))
	})
	go http.ListenAndServe(":8080", nil)
	select {}
}
import (
	"gopkg.in/natefinch/lumberjack.v2"
	"github.com/sirupsen/logrus"
)

logrus.SetOutput(&lumberjack.Logger{
	Filename:   "/var/log/myapp.log",
	MaxSize:    10,    // MB
	MaxBackups: 3,     // 保留文件数
	MaxAge:     28,    // 天
	Compress:   true,  // 压缩
})

四、告警规则与SLO示例

五、运维与治理要点

0
看了该问题的人还看了