| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- import matplotlib.pyplot as plt
- import tensorflow as tf
- import tensorflow.keras.datasets as datasets
- plt.rcParams['font.size'] = 16
- plt.rcParams['font.family'] = ['STKaiti']
- plt.rcParams['axes.unicode_minus'] = False
- def load_data():
- # 加载 MNIST 数据集
- (x, y), (x_val, y_val) = datasets.mnist.load_data()
- # 转换为浮点张量, 并缩放到-1~1
- x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
- # 转换为整形张量
- y = tf.convert_to_tensor(y, dtype=tf.int32)
- # one-hot 编码
- y = tf.one_hot(y, depth=10)
- # 改变视图, [b, 28, 28] => [b, 28*28]
- x = tf.reshape(x, (-1, 28 * 28))
- # 构建数据集对象
- train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
- # 批量训练
- train_dataset = train_dataset.batch(200)
- return train_dataset
- def init_paramaters():
- # 每层的张量都需要被优化,故使用 Variable 类型,并使用截断的正太分布初始化权值张量
- # 偏置向量初始化为 0 即可
- # 第一层的参数
- w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
- b1 = tf.Variable(tf.zeros([256]))
- # 第二层的参数
- w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
- b2 = tf.Variable(tf.zeros([128]))
- # 第三层的参数
- w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
- b3 = tf.Variable(tf.zeros([10]))
- return w1, b1, w2, b2, w3, b3
- def train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001):
- for step, (x, y) in enumerate(train_dataset):
- with tf.GradientTape() as tape:
- # 第一层计算, [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b,256] + [b, 256]
- h1 = x @ w1 + tf.broadcast_to(b1, (x.shape[0], 256))
- h1 = tf.nn.relu(h1) # 通过激活函数
- # 第二层计算, [b, 256] => [b, 128]
- h2 = h1 @ w2 + b2
- h2 = tf.nn.relu(h2)
- # 输出层计算, [b, 128] => [b, 10]
- out = h2 @ w3 + b3
- # 计算网络输出与标签之间的均方差, mse = mean(sum(y-out)^2)
- # [b, 10]
- loss = tf.square(y - out)
- # 误差标量, mean: scalar
- loss = tf.reduce_mean(loss)
- # 自动梯度,需要求梯度的张量有[w1, b1, w2, b2, w3, b3]
- grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
- # 梯度更新, assign_sub 将当前值减去参数值,原地更新
- w1.assign_sub(lr * grads[0])
- b1.assign_sub(lr * grads[1])
- w2.assign_sub(lr * grads[2])
- b2.assign_sub(lr * grads[3])
- w3.assign_sub(lr * grads[4])
- b3.assign_sub(lr * grads[5])
- if step % 100 == 0:
- print(epoch, step, 'loss:', loss.numpy())
- return loss.numpy()
- def train(epochs):
- losses = []
- train_dataset = load_data()
- w1, b1, w2, b2, w3, b3 = init_paramaters()
- for epoch in range(epochs):
- loss = train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001)
- losses.append(loss)
- x = [i for i in range(0, epochs)]
- # 绘制曲线
- plt.plot(x, losses, color='blue', marker='s', label='训练')
- plt.xlabel('Epoch')
- plt.ylabel('MSE')
- plt.legend()
- plt.savefig('MNIST数据集的前向传播训练误差曲线.png')
- plt.close()
- if __name__ == '__main__':
- train(epochs=20)
|