4.10-forward-prop.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import matplotlib.pyplot as plt
  2. import tensorflow as tf
  3. import tensorflow.keras.datasets as datasets
  4. plt.rcParams['font.size'] = 16
  5. plt.rcParams['font.family'] = ['STKaiti']
  6. plt.rcParams['axes.unicode_minus'] = False
  7. def load_data():
  8. # 加载 MNIST 数据集
  9. (x, y), (x_val, y_val) = datasets.mnist.load_data()
  10. # 转换为浮点张量, 并缩放到-1~1
  11. x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
  12. # 转换为整形张量
  13. y = tf.convert_to_tensor(y, dtype=tf.int32)
  14. # one-hot 编码
  15. y = tf.one_hot(y, depth=10)
  16. # 改变视图, [b, 28, 28] => [b, 28*28]
  17. x = tf.reshape(x, (-1, 28 * 28))
  18. # 构建数据集对象
  19. train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
  20. # 批量训练
  21. train_dataset = train_dataset.batch(200)
  22. return train_dataset
  23. def init_paramaters():
  24. # 每层的张量都需要被优化,故使用 Variable 类型,并使用截断的正太分布初始化权值张量
  25. # 偏置向量初始化为 0 即可
  26. # 第一层的参数
  27. w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
  28. b1 = tf.Variable(tf.zeros([256]))
  29. # 第二层的参数
  30. w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
  31. b2 = tf.Variable(tf.zeros([128]))
  32. # 第三层的参数
  33. w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
  34. b3 = tf.Variable(tf.zeros([10]))
  35. return w1, b1, w2, b2, w3, b3
  36. def train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001):
  37. for step, (x, y) in enumerate(train_dataset):
  38. with tf.GradientTape() as tape:
  39. # 第一层计算, [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b,256] + [b, 256]
  40. h1 = x @ w1 + tf.broadcast_to(b1, (x.shape[0], 256))
  41. h1 = tf.nn.relu(h1) # 通过激活函数
  42. # 第二层计算, [b, 256] => [b, 128]
  43. h2 = h1 @ w2 + b2
  44. h2 = tf.nn.relu(h2)
  45. # 输出层计算, [b, 128] => [b, 10]
  46. out = h2 @ w3 + b3
  47. # 计算网络输出与标签之间的均方差, mse = mean(sum(y-out)^2)
  48. # [b, 10]
  49. loss = tf.square(y - out)
  50. # 误差标量, mean: scalar
  51. loss = tf.reduce_mean(loss)
  52. # 自动梯度,需要求梯度的张量有[w1, b1, w2, b2, w3, b3]
  53. grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
  54. # 梯度更新, assign_sub 将当前值减去参数值,原地更新
  55. w1.assign_sub(lr * grads[0])
  56. b1.assign_sub(lr * grads[1])
  57. w2.assign_sub(lr * grads[2])
  58. b2.assign_sub(lr * grads[3])
  59. w3.assign_sub(lr * grads[4])
  60. b3.assign_sub(lr * grads[5])
  61. if step % 100 == 0:
  62. print(epoch, step, 'loss:', loss.numpy())
  63. return loss.numpy()
  64. def train(epochs):
  65. losses = []
  66. train_dataset = load_data()
  67. w1, b1, w2, b2, w3, b3 = init_paramaters()
  68. for epoch in range(epochs):
  69. loss = train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001)
  70. losses.append(loss)
  71. x = [i for i in range(0, epochs)]
  72. # 绘制曲线
  73. plt.plot(x, losses, color='blue', marker='s', label='训练')
  74. plt.xlabel('Epoch')
  75. plt.ylabel('MSE')
  76. plt.legend()
  77. plt.savefig('MNIST数据集的前向传播训练误差曲线.png')
  78. plt.close()
  79. if __name__ == '__main__':
  80. train(epochs=20)