4.10-forward-prop.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. """
  4. @author: HuRuiFeng
  5. @file: 4.10-forward-prop.py
  6. @time: 2020/2/14 23:47
  7. @desc: 4.10 前向传播实战的示例代码
  8. """
  9. import matplotlib.pyplot as plt
  10. import tensorflow as tf
  11. import tensorflow.keras.datasets as datasets
  12. plt.rcParams['font.size'] = 16
  13. plt.rcParams['font.family'] = ['STKaiti']
  14. plt.rcParams['axes.unicode_minus'] = False
  15. def load_data():
  16. # 加载 MNIST 数据集
  17. (x, y), (x_val, y_val) = datasets.mnist.load_data()
  18. # 转换为浮点张量, 并缩放到-1~1
  19. x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
  20. # 转换为整形张量
  21. y = tf.convert_to_tensor(y, dtype=tf.int32)
  22. # one-hot 编码
  23. y = tf.one_hot(y, depth=10)
  24. # 改变视图, [b, 28, 28] => [b, 28*28]
  25. x = tf.reshape(x, (-1, 28 * 28))
  26. # 构建数据集对象
  27. train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
  28. # 批量训练
  29. train_dataset = train_dataset.batch(200)
  30. return train_dataset
  31. def init_paramaters():
  32. # 每层的张量都需要被优化,故使用 Variable 类型,并使用截断的正太分布初始化权值张量
  33. # 偏置向量初始化为 0 即可
  34. # 第一层的参数
  35. w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
  36. b1 = tf.Variable(tf.zeros([256]))
  37. # 第二层的参数
  38. w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
  39. b2 = tf.Variable(tf.zeros([128]))
  40. # 第三层的参数
  41. w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
  42. b3 = tf.Variable(tf.zeros([10]))
  43. return w1, b1, w2, b2, w3, b3
  44. def train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001):
  45. for step, (x, y) in enumerate(train_dataset):
  46. with tf.GradientTape() as tape:
  47. # 第一层计算, [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b,256] + [b, 256]
  48. h1 = x @ w1 + tf.broadcast_to(b1, (x.shape[0], 256))
  49. h1 = tf.nn.relu(h1) # 通过激活函数
  50. # 第二层计算, [b, 256] => [b, 128]
  51. h2 = h1 @ w2 + b2
  52. h2 = tf.nn.relu(h2)
  53. # 输出层计算, [b, 128] => [b, 10]
  54. out = h2 @ w3 + b3
  55. # 计算网络输出与标签之间的均方差, mse = mean(sum(y-out)^2)
  56. # [b, 10]
  57. loss = tf.square(y - out)
  58. # 误差标量, mean: scalar
  59. loss = tf.reduce_mean(loss)
  60. # 自动梯度,需要求梯度的张量有[w1, b1, w2, b2, w3, b3]
  61. grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
  62. # 梯度更新, assign_sub 将当前值减去参数值,原地更新
  63. w1.assign_sub(lr * grads[0])
  64. b1.assign_sub(lr * grads[1])
  65. w2.assign_sub(lr * grads[2])
  66. b2.assign_sub(lr * grads[3])
  67. w3.assign_sub(lr * grads[4])
  68. b3.assign_sub(lr * grads[5])
  69. if step % 100 == 0:
  70. print(epoch, step, 'loss:', loss.numpy())
  71. return loss.numpy()
  72. def train(epochs):
  73. losses = []
  74. train_dataset = load_data()
  75. w1, b1, w2, b2, w3, b3 = init_paramaters()
  76. for epoch in range(epochs):
  77. loss = train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001)
  78. losses.append(loss)
  79. x = [i for i in range(0, epochs)]
  80. # 绘制曲线
  81. plt.plot(x, losses, color='blue', marker='s', label='训练')
  82. plt.xlabel('Epoch')
  83. plt.ylabel('MSE')
  84. plt.legend()
  85. plt.savefig('MNIST数据集的前向传播训练误差曲线.png')
  86. plt.close()
  87. if __name__ == '__main__':
  88. train(epochs=20)