Ubuntu 下用 Go 做数据分析的实战路线
一 环境准备与工具选型
二 数据处理与统计分析的最小示例
package main
import (
"encoding/csv"
"fmt"
"log"
"os"
"sort"
"strconv"
"gonum.org/v1/gonum/stat"
)
type Record struct {
Region string
Score float64
}
func main() {
f, err := os.Open("data.csv")
if err != nil { log.Fatal(err) }
defer f.Close()
reader := csv.NewReader(f)
records, err := reader.ReadAll()
if err != nil { log.Fatal(err) }
var data []Record
for i, row := range records {
if i == 0 { continue } // 跳过表头
score, err := strconv.ParseFloat(row[1], 64)
if err != nil { continue } // 容错:跳过非法行
data = append(data, Record{Region: row[0], Score: score})
}
// 基础统计
scores := make([]float64, len(data))
for i, r := range data { scores[i] = r.Score }
fmt.Printf("均值: %.2f 方差: %.2f 标准差: %.2f\n",
stat.Mean(scores, nil), stat.Variance(scores, nil), stat.StdDev(scores, nil))
// 分组计数
regionCnt := make(map[string]int)
for _, r := range data { regionCnt[r.Region]++ }
fmt.Println("分组计数:", regionCnt)
// 中位数(简单实现)
sort.Float64s(scores)
n := len(scores)
med := scores[n/2]
if n%2 == 0 { med = (scores[n/2-1] + scores[n/2]) / 2 }
fmt.Printf("中位数: %.2f\n", med)
}
三 可视化与报表输出
package main
import (
"log"
"math/rand"
"time"
"gonum.org/v1/plot"
"gonum.org/v1/plot/plotter"
"gonum.org/v1/plot/vg"
)
func main() {
rand.Seed(time.Now().UnixNano())
vals := make(plotter.Values, 1000)
for i := range vals { vals[i] = rand.NormFloat64() }
hist, err := plotter.NewHist(vals, 20)
if err != nil { log.Fatal(err) }
p, err := plot.New()
if err != nil { log.Fatal(err) }
p.Add(hist)
p.Title.Text = "Normal distribution"
p.X.Label.Text = "X"
p.Y.Label.Text = "Frequency"
if err := p.Save(8*vg.Inch, 4*vg.Inch, "hist.png"); err != nil {
log.Fatal(err)
}
}
四 建模与机器学习入门
package main
import (
"fmt"
"gonum.org/v1/gonum/mat"
"gonum.org/v1/gonum/stat/regression"
)
func main() {
// X: [1, x1; 1, x2; ...], y: 目标
X := mat.NewDense(4, 2, []float64{1, 1, 1, 2, 1, 3, 1, 4})
y := mat.NewVecDense(4, []float64{3, 4, 5, 6})
model := new(regression.Linear)
if err := model.Fit(X, y); err != nil { panic(err) }
xNew := mat.NewVecDense(2, []float64{1, 5})
pred, err := model.Predict(xNew)
if err != nil { panic(err) }
fmt.Printf("预测值: %.2f\n", pred.AtVec(0))
}
package main
import (
"fmt"
"github.com/sjwhitworth/golearn/base"
"github.com/sjwhitworth/golearn/trees"
)
func main() {
// X: [f1, f2], y: 标签
X := [][]float64{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}}
y := []string{"A", "A", "B", "B", "B"}
// 注意:GoLearn 需要 [][]string 特征,演示用 base.ParseCSV 简化
// 实际请按 GoLearn 文档构造 Instances
fmt.Println("GoLearn 决策树示例(演示)")
clf := trees.NewClassifier(trees.NewID3(0.8, 100))
// clf.Fit(XInstances, yInstances) // 按官方 API 构造 Instances 后调用
// preds := clf.Predict(testInstances)
// fmt.Println("预测:", preds)
}
五 性能分析与工程化建议