| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- #%%
- import matplotlib
- from matplotlib import pyplot as plt
- # Default parameters for plots
- matplotlib.rcParams['font.size'] = 20
- matplotlib.rcParams['figure.titlesize'] = 20
- matplotlib.rcParams['figure.figsize'] = [9, 7]
- matplotlib.rcParams['font.family'] = ['STKaiTi']
- matplotlib.rcParams['axes.unicode_minus']=False
- import tensorflow as tf
- from tensorflow import keras
- from tensorflow.keras import datasets, layers, optimizers
- import os
- os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
- print(tf.__version__)
- def preprocess(x, y):
- # [b, 28, 28], [b]
- print(x.shape,y.shape)
- x = tf.cast(x, dtype=tf.float32) / 255.
- x = tf.reshape(x, [-1, 28*28])
- y = tf.cast(y, dtype=tf.int32)
- y = tf.one_hot(y, depth=10)
- return x,y
- #%%
- (x, y), (x_test, y_test) = datasets.mnist.load_data()
- print('x:', x.shape, 'y:', y.shape, 'x test:', x_test.shape, 'y test:', y_test)
- #%%
- batchsz = 512
- train_db = tf.data.Dataset.from_tensor_slices((x, y))
- train_db = train_db.shuffle(1000)
- train_db = train_db.batch(batchsz)
- train_db = train_db.map(preprocess)
- train_db = train_db.repeat(20)
- #%%
- test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
- test_db = test_db.shuffle(1000).batch(batchsz).map(preprocess)
- x,y = next(iter(train_db))
- print('train sample:', x.shape, y.shape)
- # print(x[0], y[0])
- #%%
- def main():
- # learning rate
- lr = 1e-2
- accs,losses = [], []
- # 784 => 512
- w1, b1 = tf.Variable(tf.random.normal([784, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
- # 512 => 256
- w2, b2 = tf.Variable(tf.random.normal([256, 128], stddev=0.1)), tf.Variable(tf.zeros([128]))
- # 256 => 10
- w3, b3 = tf.Variable(tf.random.normal([128, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
-
- for step, (x,y) in enumerate(train_db):
-
- # [b, 28, 28] => [b, 784]
- x = tf.reshape(x, (-1, 784))
- with tf.GradientTape() as tape:
- # layer1.
- h1 = x @ w1 + b1
- h1 = tf.nn.relu(h1)
- # layer2
- h2 = h1 @ w2 + b2
- h2 = tf.nn.relu(h2)
- # output
- out = h2 @ w3 + b3
- # out = tf.nn.relu(out)
- # compute loss
- # [b, 10] - [b, 10]
- loss = tf.square(y-out)
- # [b, 10] => scalar
- loss = tf.reduce_mean(loss)
-
- grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
- for p, g in zip([w1, b1, w2, b2, w3, b3], grads):
- p.assign_sub(lr * g)
- # print
- if step % 80 == 0:
- print(step, 'loss:', float(loss))
- losses.append(float(loss))
-
- if step %80 == 0:
- # evaluate/test
- total, total_correct = 0., 0
- for x, y in test_db:
- # layer1.
- h1 = x @ w1 + b1
- h1 = tf.nn.relu(h1)
- # layer2
- h2 = h1 @ w2 + b2
- h2 = tf.nn.relu(h2)
- # output
- out = h2 @ w3 + b3
- # [b, 10] => [b]
- pred = tf.argmax(out, axis=1)
- # convert one_hot y to number y
- y = tf.argmax(y, axis=1)
- # bool type
- correct = tf.equal(pred, y)
- # bool tensor => int tensor => numpy
- total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
- total += x.shape[0]
- print(step, 'Evaluate Acc:', total_correct/total)
- accs.append(total_correct/total)
- plt.figure()
- x = [i*80 for i in range(len(losses))]
- plt.plot(x, losses, color='C0', marker='s', label='训练')
- plt.ylabel('MSE')
- plt.xlabel('Step')
- plt.legend()
- plt.savefig('train.svg')
- plt.figure()
- plt.plot(x, accs, color='C1', marker='s', label='测试')
- plt.ylabel('准确率')
- plt.xlabel('Step')
- plt.legend()
- plt.savefig('test.svg')
- if __name__ == '__main__':
- main()
|