Python如何实现一个简单三层神经网络

发布时间：2021-09-28 13:37:06 作者：小新
来源：亿速云阅读：218

# Python如何实现一个简单三层神经网络

## 1. 神经网络基础概念

### 1.1 什么是神经网络

神经网络是一种模仿生物神经网络结构和功能的计算模型，由大量的人工神经元相互连接构成。它能够通过调整内部大量节点之间相互连接的关系，从而达到处理信息的目的。

神经网络的基本特点包括：
- 并行分布式处理
- 非线性映射能力
- 强大的自学习能力
- 良好的容错性

### 1.2 三层神经网络结构

一个简单的三层神经网络通常由以下层组成：

1. **输入层(Input Layer)**：接收原始数据输入
2. **隐藏层(Hidden Layer)**：进行特征提取和转换
3. **输出层(Output Layer)**：产生最终预测结果

这种结构也被称为单隐藏层前馈神经网络，是最基础的神经网络架构之一。

## 2. 实现前的准备工作

### 2.1 所需Python库

我们需要以下Python库来实现神经网络：

```python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

numpy：用于高效的矩阵运算
matplotlib：用于可视化数据和结果
sklearn：提供数据集和辅助函数

2.2 数据准备

我们使用make_moons生成一个非线性可分的数据集：

X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

3. 神经网络实现

3.1 网络初始化

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # 初始化权重和偏置
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))

3.2 前向传播

def forward(self, X):
    # 第一层计算
    self.z1 = np.dot(X, self.W1) + self.b1
    self.a1 = self.sigmoid(self.z1)
    
    # 第二层计算
    self.z2 = np.dot(self.a1, self.W2) + self.b2
    self.a2 = self.sigmoid(self.z2)
    
    return self.a2

def sigmoid(self, z):
    return 1 / (1 + np.exp(-z))

3.3 损失函数计算

def compute_loss(self, y, y_hat):
    m = y.shape[0]
    loss = (-1/m) * np.sum(y * np.log(y_hat) + (1-y) * np.log(1-y_hat))
    return loss

3.4 反向传播

def backward(self, X, y, learning_rate):
    m = X.shape[0]
    
    # 输出层梯度
    dz2 = self.a2 - y
    dW2 = (1/m) * np.dot(self.a1.T, dz2)
    db2 = (1/m) * np.sum(dz2, axis=0, keepdims=True)
    
    # 隐藏层梯度
    dz1 = np.dot(dz2, self.W2.T) * self.sigmoid_derivative(self.a1)
    dW1 = (1/m) * np.dot(X.T, dz1)
    db1 = (1/m) * np.sum(dz1, axis=0, keepdims=True)
    
    # 参数更新
    self.W2 -= learning_rate * dW2
    self.b2 -= learning_rate * db2
    self.W1 -= learning_rate * dW1
    self.b1 -= learning_rate * db1

def sigmoid_derivative(self, a):
    return a * (1 - a)

3.5 训练过程

def train(self, X, y, epochs, learning_rate):
    losses = []
    for i in range(epochs):
        # 前向传播
        y_hat = self.forward(X)
        
        # 计算损失
        loss = self.compute_loss(y, y_hat)
        losses.append(loss)
        
        # 反向传播
        self.backward(X, y, learning_rate)
        
        if i % 1000 == 0:
            print(f"Epoch {i}, Loss: {loss}")
    
    return losses

4. 模型训练与评估

4.1 训练模型

# 初始化网络
nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)

# 训练网络
losses = nn.train(X_train, y_train.reshape(-1,1), epochs=10000, learning_rate=0.1)

4.2 可视化训练过程

plt.plot(losses)
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

4.3 模型评估

def predict(self, X):
    y_hat = self.forward(X)
    return (y_hat > 0.5).astype(int)

def accuracy(self, y_true, y_pred):
    return np.mean(y_true == y_pred)

# 测试集预测
y_pred = nn.predict(X_test)
acc = nn.accuracy(y_test.reshape(-1,1), y_pred)
print(f"Test Accuracy: {acc*100:.2f}%")

5. 决策边界可视化

def plot_decision_boundary(model, X, y):
    # 设置网格范围
    x_min, x_max = X[:,0].min()-0.5, X[:,0].max()+0.5
    y_min, y_max = X[:,1].min()-0.5, X[:,1].max()+0.5
    h = 0.01
    
    # 生成网格点
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    
    # 预测每个点
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    # 绘制轮廓和散点图
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.Spectral)
    plt.title("Decision Boundary")
    plt.show()

plot_decision_boundary(nn, X_test, y_test)

6. 超参数调优

6.1 学习率的影响

learning_rates = [0.001, 0.01, 0.1, 1]
for lr in learning_rates:
    nn = NeuralNetwork(2, 4, 1)
    losses = nn.train(X_train, y_train.reshape(-1,1), 5000, lr)
    plt.plot(losses, label=f"LR={lr}")
plt.legend()
plt.show()

6.2 隐藏层大小的影响

hidden_sizes = [2, 4, 8, 16]
for size in hidden_sizes:
    nn = NeuralNetwork(2, size, 1)
    losses = nn.train(X_train, y_train.reshape(-1,1), 5000, 0.1)
    plt.plot(losses, label=f"Hidden Size={size}")
plt.legend()
plt.show()

7. 完整代码整合

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size) * 0.01
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size) * 0.01
        self.b2 = np.zeros((1, output_size))
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def sigmoid_derivative(self, a):
        return a * (1 - a)
    
    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2
    
    def compute_loss(self, y, y_hat):
        m = y.shape[0]
        loss = (-1/m) * np.sum(y * np.log(y_hat) + (1-y) * np.log(1-y_hat))
        return loss
    
    def backward(self, X, y, learning_rate):
        m = X.shape[0]
        dz2 = self.a2 - y
        dW2 = (1/m) * np.dot(self.a1.T, dz2)
        db2 = (1/m) * np.sum(dz2, axis=0, keepdims=True)
        dz1 = np.dot(dz2, self.W2.T) * self.sigmoid_derivative(self.a1)
        dW1 = (1/m) * np.dot(X.T, dz1)
        db1 = (1/m) * np.sum(dz1, axis=0, keepdims=True)
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
    
    def train(self, X, y, epochs, learning_rate):
        losses = []
        for i in range(epochs):
            y_hat = self.forward(X)
            loss = self.compute_loss(y, y_hat)
            losses.append(loss)
            self.backward(X, y, learning_rate)
            if i % 1000 == 0:
                print(f"Epoch {i}, Loss: {loss}")
        return losses
    
    def predict(self, X):
        y_hat = self.forward(X)
        return (y_hat > 0.5).astype(int)
    
    def accuracy(self, y_true, y_pred):
        return np.mean(y_true == y_pred)

# 数据准备
X, y = make_moons(n_samples=1000, noise=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 训练模型
nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)
losses = nn.train(X_train, y_train.reshape(-1,1), epochs=10000, learning_rate=0.1)

# 评估模型
y_pred = nn.predict(X_test)
acc = nn.accuracy(y_test.reshape(-1,1), y_pred)
print(f"Test Accuracy: {acc*100:.2f}%")

# 可视化
plt.plot(losses)
plt.title("Training Loss")
plt.show()

def plot_decision_boundary(model, X, y):
    x_min, x_max = X[:,0].min()-0.5, X[:,0].max()+0.5
    y_min, y_max = X[:,1].min()-0.5, X[:,1].max()+0.5
    h = 0.01
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.Spectral)
    plt.title("Decision Boundary")
    plt.show()

plot_decision_boundary(nn, X_test, y_test)

8. 扩展与改进

8.1 添加正则化

为了防止过拟合，可以添加L2正则化：

def compute_loss(self, y, y_hat, lambd=0.1):
    m = y.shape[0]
    cross_entropy = (-1/m) * np.sum(y * np.log(y_hat) + (1-y) * np.log(1-y_hat))
    l2_reg = (lambd/(2*m)) * (np.sum(np.square(self.W1)) + np.sum(np.square(self.W2)))
    return cross_entropy + l2_reg

8.2 添加动量

动量可以加速训练过程：

def __init__(self, input_size, hidden_size, output_size):
    # 初始化参数
    self.vW1 = np.zeros_like(self.W1)
    self.vb1 = np.zeros_like(self.b1)
    self.vW2 = np.zeros_like(self.W2)
    self.vb2 = np.zeros_like(self.b2)
    self.beta = 0.9  # 动量参数

def backward(self, X, y, learning_rate):
    # 计算梯度...
    # 更新动量
    self.vW2 = self.beta * self.vW2 + (1-self.beta) * dW2
    self.vb2 = self.beta * self.vb2 + (1-self.beta) * db2
    self.vW1 = self.beta * self.vW1 + (1-self.beta) * dW1
    self.vb1 = self.beta * self.vb1 + (1-self.beta) * db1
    # 使用动量更新参数
    self.W2 -= learning_rate * self.vW2
    self.b2 -= learning_rate * self.vb2
    self.W1 -= learning_rate * self.vW1
    self.b1 -= learning_rate * self.vb1

9. 总结

本文详细介绍了如何使用Python实现一个简单的三层神经网络，包括：

神经网络的基本结构和原理
前向传播和反向传播的实现
损失函数的计算
参数更新方法
模型训练和评估
决策边界的可视化
超参数调优技巧
正则化和动量等改进方法

这个简单的神经网络虽然结构简单，但包含了神经网络的核心概念和实现方法。通过这个基础实现，你可以进一步探索更复杂的网络结构和深度学习技术。 “`