5 anos atrás · 70f833b3f3
--- a/ch01-人工智能绪论/autograd.py
+++ b/ch01-人工智能绪论/autograd.py
--- a/ch01-人工智能绪论/gpu_accelerate.py
+++ b/ch01-人工智能绪论/gpu_accelerate.py
--- a/ch01-人工智能绪论/tf1.py
+++ b/ch01-人工智能绪论/tf1.py
@@ -1,5 +1,5 @@
 
				 import tensorflow.compat.v1 as tf
			
 
				-tf.disable_v2_behavior()
			
 
				+tf.disable_v2_behavior() # 使用静态图模式运行以下代码
			
 
				 assert tf.__version__.startswith('2.')
			
 
				 
			
 
				 # 1.创建计算图阶段
			
--- a/ch01-人工智能绪论/tf2.py
+++ b/ch01-人工智能绪论/tf2.py
--- a/ch02-回归问题/data.csv
+++ b/ch02-回归问题/data.csv
--- a/ch02-回归问题/linear_regression.py
+++ b/ch02-回归问题/linear_regression.py
--- a/ch02-回归问题/回归实战.pdf
+++ b/ch02-回归问题/回归实战.pdf
--- a/ch02-回归问题/回归问题.pdf
+++ b/ch02-回归问题/回归问题.pdf
--- a/ch03-分类问题/forward_layer.py
+++ b/ch03-分类问题/forward_layer.py
--- a/ch03-分类问题/forward_tensor.py
+++ b/ch03-分类问题/forward_tensor.py
@@ -1,3 +1,5 @@
 
				+import  os
			
 
				+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
			
 
				 import  matplotlib
			
 
				 from 	matplotlib import pyplot as plt
			
 
				 # Default parameters for plots
			
@@ -10,9 +12,7 @@ matplotlib.rcParams['axes.unicode_minus']=False
 
				 import  tensorflow as tf
			
 
				 from    tensorflow import keras
			
 
				 from    tensorflow.keras import datasets
			
 
				-import  os
			
 
				 
			
 
				-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
			
 
				 
			
 
				 # x: [60k, 28, 28],
			
 
				 # y: [60k]
			
--- a/ch03-分类问题/main.py
+++ b/ch03-分类问题/main.py
@@ -1,60 +1,60 @@
 
				-import  tensorflow as tf

			
 
				-from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics

			
 
				-

			
 
				-

			
 
				-# 设置GPU使用方式

			
 
				-# 获取GPU列表

			
 
				-gpus = tf.config.experimental.list_physical_devices('GPU')

			
 
				-if gpus:

			
 
				-  try:

			
 
				-    # 设置GPU为增长式占用

			
 
				-    for gpu in gpus:

			
 
				-      tf.config.experimental.set_memory_growth(gpu, True) 

			
 
				-  except RuntimeError as e:

			
 
				-    # 打印异常

			
 
				-    print(e)

			
 
				-

			
 
				-(xs, ys),_ = datasets.mnist.load_data()

			
 
				-print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())

			
 
				-

			
 
				-batch_size = 32

			
 
				-

			
 
				-xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.

			
 
				-db = tf.data.Dataset.from_tensor_slices((xs,ys))

			
 
				-db = db.batch(batch_size).repeat(30)

			
 
				-

			
 
				-

			
 
				-model = Sequential([layers.Dense(256, activation='relu'), 

			
 
				-                     layers.Dense(128, activation='relu'),

			
 
				-                     layers.Dense(10)])

			
 
				-model.build(input_shape=(4, 28*28))

			
 
				-model.summary()

			
 
				-

			
 
				-optimizer = optimizers.SGD(lr=0.01)

			
 
				-acc_meter = metrics.Accuracy()

			
 
				-

			
 
				-for step, (x,y) in enumerate(db):

			
 
				-

			
 
				-    with tf.GradientTape() as tape:

			
 
				-        # 打平操作，[b, 28, 28] => [b, 784]

			
 
				-        x = tf.reshape(x, (-1, 28*28))

			
 
				-        # Step1. 得到模型输出output [b, 784] => [b, 10]

			
 
				-        out = model(x)

			
 
				-        # [b] => [b, 10]

			
 
				-        y_onehot = tf.one_hot(y, depth=10)

			
 
				-        # 计算差的平方和，[b, 10]

			
 
				-        loss = tf.square(out-y_onehot)

			
 
				-        # 计算每个样本的平均误差，[b]

			
 
				-        loss = tf.reduce_sum(loss) / x.shape[0]

			
 
				-

			
 
				-

			
 
				-    acc_meter.update_state(tf.argmax(out, axis=1), y)

			
 
				-

			
 
				-    grads = tape.gradient(loss, model.trainable_variables)

			
 
				-    optimizer.apply_gradients(zip(grads, model.trainable_variables))

			
 
				-

			
 
				-

			
 
				-    if step % 200==0:

			
 
				-

			
 
				-        print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())

			
 
				-        acc_meter.reset_states()

			
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+# 设置GPU使用方式
			
 
				+# 获取GPU列表
			
 
				+gpus = tf.config.experimental.list_physical_devices('GPU')
			
 
				+if gpus:
			
 
				+  try:
			
 
				+    # 设置GPU为增长式占用
			
 
				+    for gpu in gpus:
			
 
				+      tf.config.experimental.set_memory_growth(gpu, True) 
			
 
				+  except RuntimeError as e:
			
 
				+    # 打印异常
			
 
				+    print(e)
			
 
				+
			
 
				+(xs, ys),_ = datasets.mnist.load_data()
			
 
				+print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())
			
 
				+
			
 
				+batch_size = 32
			
 
				+
			
 
				+xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.
			
 
				+db = tf.data.Dataset.from_tensor_slices((xs,ys))
			
 
				+db = db.batch(batch_size).repeat(30)
			
 
				+
			
 
				+
			
 
				+model = Sequential([layers.Dense(256, activation='relu'), 
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+model.build(input_shape=(4, 28*28))
			
 
				+model.summary()
			
 
				+
			
 
				+optimizer = optimizers.SGD(lr=0.01)
			
 
				+acc_meter = metrics.Accuracy()
			
 
				+
			
 
				+for step, (x,y) in enumerate(db):
			
 
				+
			
 
				+    with tf.GradientTape() as tape:
			
 
				+        # 打平操作，[b, 28, 28] => [b, 784]
			
 
				+        x = tf.reshape(x, (-1, 28*28))
			
 
				+        # Step1. 得到模型输出output [b, 784] => [b, 10]
			
 
				+        out = model(x)
			
 
				+        # [b] => [b, 10]
			
 
				+        y_onehot = tf.one_hot(y, depth=10)
			
 
				+        # 计算差的平方和，[b, 10]
			
 
				+        loss = tf.square(out-y_onehot)
			
 
				+        # 计算每个样本的平均误差，[b]
			
 
				+        loss = tf.reduce_sum(loss) / x.shape[0]
			
 
				+
			
 
				+
			
 
				+    acc_meter.update_state(tf.argmax(out, axis=1), y)
			
 
				+
			
 
				+    grads = tape.gradient(loss, model.trainable_variables)
			
 
				+    optimizer.apply_gradients(zip(grads, model.trainable_variables))
			
 
				+
			
 
				+
			
 
				+    if step % 200==0:
			
 
				+
			
 
				+        print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())
			
 
				+        acc_meter.reset_states()
			
--- a/ch03-分类问题/手写数字问题.pdf
+++ b/ch03-分类问题/手写数字问题.pdf
--- a/ch03-分类问题/手写数字问题体验.pdf
+++ b/ch03-分类问题/手写数字问题体验.pdf
--- a/ch03/readMNIST.py
+++ b/ch03/readMNIST.py
@@ -1,73 +0,0 @@
 
				-#!/usr/bin/python
			
 
				-# -*- coding: utf-8 -*-
			
 
				-
			
 
				-"""
			
 
				-从MNIST中读取原始图片并保存、读取标签数据并保存。
			
 
				-MNIST文件结构分析可以参考：https://blog.csdn.net/justidle/article/details/103149253
			
 
				-"""
			
 
				-"""
			
 
				-使用方法：
			
 
				-1、将MNIST的文件下载到本地。
			
 
				-2、在py文件所在目录下，建立mnist_data目录。然后将MNIST的四个文件拷贝到mnist_data目录，并解压
			
 
				-3、在py文件所在目录下，建立test目录，改目录用于存放解压出的图片文件和标签文件
			
 
				-"""
			
 
				-
			
 
				-import struct
			
 
				-import numpy as np
			
 
				-import PIL.Image
			
 
				-    
			
 
				-def read_image(filename):
			
 
				-    #打开文件
			
 
				-    f = open(filename, 'rb')
			
 
				-    
			
 
				-    #读取文件内容
			
 
				-    index = 0
			
 
				-    buf = f.read()
			
 
				-    
			
 
				-    #关闭文件
			
 
				-    f.close()
			
 
				-    
			
 
				-    #解析文件内容
			
 
				-    #>IIII 表示使用大端规则，读取四个整型
			
 
				-    magic, numImages, rows, columns = struct.unpack_from('>IIII', buf, index)
			
 
				-    index += struct.calcsize('>IIII')
			
 
				-    
			
 
				-    for i in range(0, numImages):
			
 
				-        # L代表灰度图片
			
 
				-        image = PIL.Image.new('L', (columns, rows))
			
 
				-        
			
 
				-        for x in range(rows):
			
 
				-            for y in range(columns):
			
 
				-                # ‘>B' 读取一个字节
			
 
				-                image.putpixel((y,x), int(struct.unpack_from('>B', buf, index)[0]))
			
 
				-                index += struct.calcsize('>B')
			
 
				-                
			
 
				-        print('save ' + str(i) + 'image')
			
 
				-        image.save('mnist_data/test/'+str(i)+'.png')
			
 
				-        
			
 
				-def read_label(filename, saveFilename):
			
 
				-    f = open(filename, 'rb')
			
 
				-    index = 0
			
 
				-    buf = f.read()
			
 
				-    f.close()
			
 
				-    
			
 
				-    magic, labels = struct.unpack_from('>II' , buf , index)
			
 
				-    index += struct.calcsize('>II')
			
 
				-    
			
 
				-    labelArr = [0] * labels
			
 
				-    
			
 
				-    for x in range(labels):
			
 
				-        labelArr[x] = int(struct.unpack_from('>B', buf, index)[0])
			
 
				-        index += struct.calcsize('>B')
			
 
				-    
			
 
				-    save = open(saveFilename, 'w')
			
 
				-    save.write(','.join(map(lambda x: str(x), labelArr)))
			
 
				-    save.write('\n')
			
 
				-    save.close()
			
 
				-    print('save labels success')
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    #注意t10k-images-idx3-ubyte里面一共有10,000张图片
			
 
				-    read_image('mnist_data/t10k-images-idx3-ubyte')
			
 
				-    read_label('mnist_data/t10k-labels-idx1-ubyte', 'mnist_data/test/label.txt')
			
 
				-    
			
--- a/ch04-TensorFlow基础/4.10-forward-prop.py
+++ b/ch04-TensorFlow基础/4.10-forward-prop.py
@@ -0,0 +1,109 @@
 
				+#!/usr/bin/env python
			
 
				+# encoding: utf-8
			
 
				+"""
			
 
				+@author: HuRuiFeng
			
 
				+@file: 4.10-forward-prop.py
			
 
				+@time: 2020/2/14 23:47
			
 
				+@desc: 4.10 前向传播实战的示例代码
			
 
				+"""
			
 
				+
			
 
				+import matplotlib.pyplot as plt
			
 
				+import tensorflow as tf
			
 
				+import tensorflow.keras.datasets as datasets
			
 
				+
			
 
				+plt.rcParams['font.size'] = 16
			
 
				+plt.rcParams['font.family'] = ['STKaiti']
			
 
				+plt.rcParams['axes.unicode_minus'] = False
			
 
				+
			
 
				+
			
 
				+def load_data():
			
 
				+    # 加载 MNIST 数据集
			
 
				+    (x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+    # 转换为浮点张量， 并缩放到-1~1
			
 
				+    x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
			
 
				+    # 转换为整形张量
			
 
				+    y = tf.convert_to_tensor(y, dtype=tf.int32)
			
 
				+    # one-hot 编码
			
 
				+    y = tf.one_hot(y, depth=10)
			
 
				+
			
 
				+    # 改变视图， [b, 28, 28] => [b, 28*28]
			
 
				+    x = tf.reshape(x, (-1, 28 * 28))
			
 
				+
			
 
				+    # 构建数据集对象
			
 
				+    train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
			
 
				+    # 批量训练
			
 
				+    train_dataset = train_dataset.batch(200)
			
 
				+    return train_dataset
			
 
				+
			
 
				+
			
 
				+def init_paramaters():
			
 
				+    # 每层的张量都需要被优化，故使用 Variable 类型，并使用截断的正太分布初始化权值张量
			
 
				+    # 偏置向量初始化为 0 即可
			
 
				+    # 第一层的参数
			
 
				+    w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
			
 
				+    b1 = tf.Variable(tf.zeros([256]))
			
 
				+    # 第二层的参数
			
 
				+    w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
			
 
				+    b2 = tf.Variable(tf.zeros([128]))
			
 
				+    # 第三层的参数
			
 
				+    w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
			
 
				+    b3 = tf.Variable(tf.zeros([10]))
			
 
				+    return w1, b1, w2, b2, w3, b3
			
 
				+
			
 
				+
			
 
				+def train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001):
			
 
				+    for step, (x, y) in enumerate(train_dataset):
			
 
				+        with tf.GradientTape() as tape:
			
 
				+            # 第一层计算， [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b,256] + [b, 256]
			
 
				+            h1 = x @ w1 + tf.broadcast_to(b1, (x.shape[0], 256))
			
 
				+            h1 = tf.nn.relu(h1)  # 通过激活函数
			
 
				+
			
 
				+            # 第二层计算， [b, 256] => [b, 128]
			
 
				+            h2 = h1 @ w2 + b2
			
 
				+            h2 = tf.nn.relu(h2)
			
 
				+            # 输出层计算， [b, 128] => [b, 10]
			
 
				+            out = h2 @ w3 + b3
			
 
				+
			
 
				+            # 计算网络输出与标签之间的均方差， mse = mean(sum(y-out)^2)
			
 
				+            # [b, 10]
			
 
				+            loss = tf.square(y - out)
			
 
				+            # 误差标量， mean: scalar
			
 
				+            loss = tf.reduce_mean(loss)
			
 
				+
			
 
				+            # 自动梯度，需要求梯度的张量有[w1, b1, w2, b2, w3, b3]
			
 
				+            grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
			
 
				+
			
 
				+        # 梯度更新， assign_sub 将当前值减去参数值，原地更新
			
 
				+        w1.assign_sub(lr * grads[0])
			
 
				+        b1.assign_sub(lr * grads[1])
			
 
				+        w2.assign_sub(lr * grads[2])
			
 
				+        b2.assign_sub(lr * grads[3])
			
 
				+        w3.assign_sub(lr * grads[4])
			
 
				+        b3.assign_sub(lr * grads[5])
			
 
				+
			
 
				+        if step % 100 == 0:
			
 
				+            print(epoch, step, 'loss:', loss.numpy())
			
 
				+
			
 
				+    return loss.numpy()
			
 
				+
			
 
				+
			
 
				+def train(epochs):
			
 
				+    losses = []
			
 
				+    train_dataset = load_data()
			
 
				+    w1, b1, w2, b2, w3, b3 = init_paramaters()
			
 
				+    for epoch in range(epochs):
			
 
				+        loss = train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001)
			
 
				+        losses.append(loss)
			
 
				+
			
 
				+    x = [i for i in range(0, epochs)]
			
 
				+    # 绘制曲线
			
 
				+    plt.plot(x, losses, color='blue', marker='s', label='训练')
			
 
				+    plt.xlabel('Epoch')
			
 
				+    plt.ylabel('MSE')
			
 
				+    plt.legend()
			
 
				+    plt.savefig('MNIST数据集的前向传播训练误差曲线.png')
			
 
				+    plt.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    train(epochs=20)
			
--- a/ch04-TensorFlow基础/Broadcasting.pdf
+++ b/ch04-TensorFlow基础/Broadcasting.pdf
--- a/ch04-TensorFlow基础/MNIST数据集的前向传播训练误差曲线.png
+++ b/ch04-TensorFlow基础/MNIST数据集的前向传播训练误差曲线.png
--- a/ch04-TensorFlow基础/ch04-TensorFlow基础.ipynb
+++ b/ch04-TensorFlow基础/ch04-TensorFlow基础.ipynb
--- a/ch04-TensorFlow基础/创建Tensor.pdf
+++ b/ch04-TensorFlow基础/创建Tensor.pdf
--- a/ch04-TensorFlow基础/前向传播.pdf
+++ b/ch04-TensorFlow基础/前向传播.pdf
--- a/ch04-TensorFlow基础/数学运算.pdf
+++ b/ch04-TensorFlow基础/数学运算.pdf
--- a/ch04-TensorFlow基础/数据类型.pdf
+++ b/ch04-TensorFlow基础/数据类型.pdf
--- a/ch04-TensorFlow基础/索引与切片-1.pdf
+++ b/ch04-TensorFlow基础/索引与切片-1.pdf
--- a/ch04-TensorFlow基础/索引与切片-2.pdf
+++ b/ch04-TensorFlow基础/索引与切片-2.pdf
--- a/ch04-TensorFlow基础/维度变换.pdf
+++ b/ch04-TensorFlow基础/维度变换.pdf
--- a/ch05-TensorFlow进阶/acc_topk.py
+++ b/ch05-TensorFlow进阶/acc_topk.py
@@ -0,0 +1,37 @@
 
				+import  tensorflow as tf
			
 
				+import  os
			
 
				+
			
 
				+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
			
 
				+tf.random.set_seed(2467)
			
 
				+
			
 
				+def accuracy(output, target, topk=(1,)):
			
 
				+    maxk = max(topk)
			
 
				+    batch_size = target.shape[0]
			
 
				+
			
 
				+    pred = tf.math.top_k(output, maxk).indices
			
 
				+    pred = tf.transpose(pred, perm=[1, 0])
			
 
				+    target_ = tf.broadcast_to(target, pred.shape)
			
 
				+    # [10, b]
			
 
				+    correct = tf.equal(pred, target_)
			
 
				+
			
 
				+    res = []
			
 
				+    for k in topk:
			
 
				+        correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
			
 
				+        correct_k = tf.reduce_sum(correct_k)
			
 
				+        acc = float(correct_k* (100.0 / batch_size) )
			
 
				+        res.append(acc)
			
 
				+
			
 
				+    return res
			
 
				+
			
 
				+
			
 
				+
			
 
				+output = tf.random.normal([10, 6])
			
 
				+output = tf.math.softmax(output, axis=1)
			
 
				+target = tf.random.uniform([10], maxval=6, dtype=tf.int32)
			
 
				+print('prob:', output.numpy())
			
 
				+pred = tf.argmax(output, axis=1)
			
 
				+print('pred:', pred.numpy())
			
 
				+print('label:', target.numpy())
			
 
				+
			
 
				+acc = accuracy(output, target, topk=(1,2,3,4,5,6))
			
 
				+print('top-1-6 acc:', acc)
			
--- a/ch05-TensorFlow进阶/gradient_clip.py
+++ b/ch05-TensorFlow进阶/gradient_clip.py
@@ -0,0 +1,85 @@
 
				+import  tensorflow as tf
			
 
				+from    tensorflow import keras
			
 
				+from    tensorflow.keras import datasets, layers, optimizers
			
 
				+import  os
			
 
				+
			
 
				+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
			
 
				+print(tf.__version__)
			
 
				+
			
 
				+(x, y), _ = datasets.mnist.load_data()
			
 
				+x = tf.convert_to_tensor(x, dtype=tf.float32) / 50.
			
 
				+y = tf.convert_to_tensor(y)
			
 
				+y = tf.one_hot(y, depth=10)
			
 
				+print('x:', x.shape, 'y:', y.shape)
			
 
				+train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128).repeat(30)
			
 
				+x,y = next(iter(train_db))
			
 
				+print('sample:', x.shape, y.shape)
			
 
				+# print(x[0], y[0])
			
 
				+
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+
			
 
				+    # 784 => 512
			
 
				+    w1, b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)), tf.Variable(tf.zeros([512]))
			
 
				+    # 512 => 256
			
 
				+    w2, b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
			
 
				+    # 256 => 10
			
 
				+    w3, b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
			
 
				+
			
 
				+
			
 
				+
			
 
				+    optimizer = optimizers.SGD(lr=0.01)
			
 
				+
			
 
				+
			
 
				+    for step, (x,y) in enumerate(train_db):
			
 
				+
			
 
				+        # [b, 28, 28] => [b, 784]
			
 
				+        x = tf.reshape(x, (-1, 784))
			
 
				+
			
 
				+        with tf.GradientTape() as tape:
			
 
				+
			
 
				+            # layer1.
			
 
				+            h1 = x @ w1 + b1
			
 
				+            h1 = tf.nn.relu(h1)
			
 
				+            # layer2
			
 
				+            h2 = h1 @ w2 + b2
			
 
				+            h2 = tf.nn.relu(h2)
			
 
				+            # output
			
 
				+            out = h2 @ w3 + b3
			
 
				+            # out = tf.nn.relu(out)
			
 
				+
			
 
				+            # compute loss
			
 
				+            # [b, 10] - [b, 10]
			
 
				+            loss = tf.square(y-out)
			
 
				+            # [b, 10] => [b]
			
 
				+            loss = tf.reduce_mean(loss, axis=1)
			
 
				+            # [b] => scalar
			
 
				+            loss = tf.reduce_mean(loss)
			
 
				+
			
 
				+
			
 
				+
			
 
				+        # compute gradient
			
 
				+        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
			
 
				+        # print('==before==')
			
 
				+        # for g in grads:
			
 
				+        #     print(tf.norm(g))
			
 
				+        
			
 
				+        grads,  _ = tf.clip_by_global_norm(grads, 15)
			
 
				+
			
 
				+        # print('==after==')
			
 
				+        # for g in grads:
			
 
				+        #     print(tf.norm(g))
			
 
				+        # update w' = w - lr*grad
			
 
				+        optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w3, b3]))
			
 
				+
			
 
				+
			
 
				+
			
 
				+        if step % 100 == 0:
			
 
				+            print(step, 'loss:', float(loss))
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/ch05-TensorFlow进阶/mnist_tensor.py
+++ b/ch05-TensorFlow进阶/mnist_tensor.py
--- a/ch05-TensorFlow进阶/合并与分割.pdf
+++ b/ch05-TensorFlow进阶/合并与分割.pdf
--- a/ch05-TensorFlow进阶/填充与复制.pdf
+++ b/ch05-TensorFlow进阶/填充与复制.pdf
--- a/ch05-TensorFlow进阶/张量排序.pdf
+++ b/ch05-TensorFlow进阶/张量排序.pdf
--- a/ch05-TensorFlow进阶/张量限幅.pdf
+++ b/ch05-TensorFlow进阶/张量限幅.pdf
--- a/ch05-TensorFlow进阶/数据统计.pdf
+++ b/ch05-TensorFlow进阶/数据统计.pdf
--- a/ch05-TensorFlow进阶/高阶特性.pdf
+++ b/ch05-TensorFlow进阶/高阶特性.pdf
--- a/ch05/nb.py
+++ b/ch05/nb.py
@@ -1,21 +0,0 @@
 
				-#%%
			
 
				-import  tensorflow as tf
			
 
				-from    tensorflow import keras
			
 
				-from    tensorflow.keras import datasets
			
 
				-import  os
			
 
				-
			
 
				-
			
 
				-#%%
			
 
				-a = tf.random.normal([4,35,8]) # 模拟成绩册A
			
 
				-b = tf.random.normal([6,35,8]) # 模拟成绩册B
			
 
				-tf.concat([a,b],axis=0) # 合并成绩册
			
 
				-
			
 
				-
			
 
				-#%%
			
 
				-x = tf.random.normal([2,784])
			
 
				-w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
			
 
				-b1 = tf.Variable(tf.zeros([256]))
			
 
				-o1 = tf.matmul(x,w1) + b1  #
			
 
				-o1 = tf.nn.relu(o1)
			
 
				-
			
 
				-#%%
			
--- a/ch06-神经网络/auto_efficency_regression.py
+++ b/ch06-神经网络/auto_efficency_regression.py
--- a/ch06-神经网络/ch06-神经网络.ipynb
+++ b/ch06-神经网络/ch06-神经网络.ipynb
--- a/ch06-神经网络/forward.py
+++ b/ch06-神经网络/forward.py
--- a/ch06-神经网络/nb.py
+++ b/ch06-神经网络/nb.py
--- a/ch06-神经网络/全接连层.pdf
+++ b/ch06-神经网络/全接连层.pdf
--- a/ch06-神经网络/误差计算.pdf
+++ b/ch06-神经网络/误差计算.pdf
--- a/ch06-神经网络/输出方式.pdf
+++ b/ch06-神经网络/输出方式.pdf
--- a/ch07-反向传播算法/0.梯度下降-简介.pdf
+++ b/ch07-反向传播算法/0.梯度下降-简介.pdf
--- a/ch07-反向传播算法/2.常见函数的梯度.pdf
+++ b/ch07-反向传播算法/2.常见函数的梯度.pdf
--- a/ch07-反向传播算法/2nd_derivative.py
+++ b/ch07-反向传播算法/2nd_derivative.py
@@ -0,0 +1,18 @@
 
				+import tensorflow as tf
			
 
				+
			
 
				+w = tf.Variable(1.0)
			
 
				+b = tf.Variable(2.0)
			
 
				+x = tf.Variable(3.0)
			
 
				+
			
 
				+with tf.GradientTape() as t1:
			
 
				+  with tf.GradientTape() as t2:
			
 
				+    y = x * w + b
			
 
				+  dy_dw, dy_db = t2.gradient(y, [w, b])
			
 
				+d2y_dw2 = t1.gradient(dy_dw, w)
			
 
				+
			
 
				+print(dy_dw)
			
 
				+print(dy_db)
			
 
				+print(d2y_dw2)
			
 
				+
			
 
				+assert dy_dw.numpy() == 3.0
			
 
				+assert d2y_dw2 is None
			
--- a/ch07-反向传播算法/3.激活函数及其梯度.pdf
+++ b/ch07-反向传播算法/3.激活函数及其梯度.pdf
--- a/ch07-反向传播算法/4.损失函数及其梯度.pdf
+++ b/ch07-反向传播算法/4.损失函数及其梯度.pdf
--- a/ch07-反向传播算法/5.单输出感知机梯度.pdf
+++ b/ch07-反向传播算法/5.单输出感知机梯度.pdf
--- a/ch07-反向传播算法/6.多输出感知机梯度.pdf
+++ b/ch07-反向传播算法/6.多输出感知机梯度.pdf
--- a/ch07-反向传播算法/7.链式法则.pdf
+++ b/ch07-反向传播算法/7.链式法则.pdf
--- a/ch07-反向传播算法/8.多层感知机梯度.pdf
+++ b/ch07-反向传播算法/8.多层感知机梯度.pdf
--- a/ch07-反向传播算法/ch07-反向传播算法.ipynb
+++ b/ch07-反向传播算法/ch07-反向传播算法.ipynb
--- a/ch07-反向传播算法/chain_rule.py
+++ b/ch07-反向传播算法/chain_rule.py
--- a/ch07-反向传播算法/crossentropy_loss.py
+++ b/ch07-反向传播算法/crossentropy_loss.py
@@ -0,0 +1,24 @@
 
				+import tensorflow as tf 
			
 
				+
			
 
				+
			
 
				+tf.random.set_seed(4323)
			
 
				+
			
 
				+x=tf.random.normal([1,3])
			
 
				+
			
 
				+w=tf.random.normal([3,2])
			
 
				+
			
 
				+b=tf.random.normal([2])
			
 
				+
			
 
				+y = tf.constant([0, 1])
			
 
				+
			
 
				+
			
 
				+with tf.GradientTape() as tape:
			
 
				+
			
 
				+	tape.watch([w, b])
			
 
				+	logits = (x@w+b)
			
 
				+	loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y, logits, from_logits=True))
			
 
				+
			
 
				+grads = tape.gradient(loss, [w, b])
			
 
				+print('w grad:', grads[0])
			
 
				+
			
 
				+print('b grad:', grads[1])
			
--- a/ch07-反向传播算法/himmelblau.py
+++ b/ch07-反向传播算法/himmelblau.py
--- a/ch07-反向传播算法/mse_grad.py
+++ b/ch07-反向传播算法/mse_grad.py
@@ -0,0 +1,26 @@
 
				+import tensorflow as tf 
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+x=tf.random.normal([1,3])
			
 
				+
			
 
				+w=tf.ones([3,2])
			
 
				+
			
 
				+b=tf.ones([2])
			
 
				+
			
 
				+y = tf.constant([0, 1])
			
 
				+
			
 
				+
			
 
				+with tf.GradientTape() as tape:
			
 
				+
			
 
				+	tape.watch([w, b])
			
 
				+	logits = tf.sigmoid(x@w+b) 
			
 
				+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
			
 
				+
			
 
				+grads = tape.gradient(loss, [w, b])
			
 
				+print('w grad:', grads[0])
			
 
				+
			
 
				+print('b grad:', grads[1])
			
 
				+
			
 
				+
			
--- a/ch07-反向传播算法/multi_output_perceptron.py
+++ b/ch07-反向传播算法/multi_output_perceptron.py
@@ -0,0 +1,26 @@
 
				+import tensorflow as tf 
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+x=tf.random.normal([1,3])
			
 
				+
			
 
				+w=tf.ones([3,2])
			
 
				+
			
 
				+b=tf.ones([2])
			
 
				+
			
 
				+y = tf.constant([0, 1])
			
 
				+
			
 
				+
			
 
				+with tf.GradientTape() as tape:
			
 
				+
			
 
				+	tape.watch([w, b])
			
 
				+	logits = tf.sigmoid(x@w+b) 
			
 
				+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
			
 
				+
			
 
				+grads = tape.gradient(loss, [w, b])
			
 
				+print('w grad:', grads[0])
			
 
				+
			
 
				+print('b grad:', grads[1])
			
 
				+
			
 
				+
			
--- a/ch07-反向传播算法/numpy-backward-prop.py
+++ b/ch07-反向传播算法/numpy-backward-prop.py
@@ -0,0 +1,223 @@
 
				+#!/usr/bin/env python
			
 
				+# encoding: utf-8
			
 
				+"""
			
 
				+@author: HuRuiFeng
			
 
				+@file: 7.9-backward-prop.py
			
 
				+@time: 2020/2/24 17:32
			
 
				+@desc: 7.9 反向传播算法实战的代码
			
 
				+"""
			
 
				+
			
 
				+import matplotlib.pyplot as plt
			
 
				+import numpy as np
			
 
				+import seaborn as sns
			
 
				+from sklearn.datasets import make_moons
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+
			
 
				+plt.rcParams['font.size'] = 16
			
 
				+plt.rcParams['font.family'] = ['STKaiti']
			
 
				+plt.rcParams['axes.unicode_minus'] = False
			
 
				+
			
 
				+
			
 
				+def load_dataset():
			
 
				+    # 采样点数
			
 
				+    N_SAMPLES = 2000
			
 
				+    # 测试数量比率
			
 
				+    TEST_SIZE = 0.3
			
 
				+    # 利用工具函数直接生成数据集
			
 
				+    X, y = make_moons(n_samples=N_SAMPLES, noise=0.2, random_state=100)
			
 
				+    # 将 2000 个点按着 7:3 分割为训练集和测试集
			
 
				+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
			
 
				+    return X, y, X_train, X_test, y_train, y_test
			
 
				+
			
 
				+
			
 
				+def make_plot(X, y, plot_name, XX=None, YY=None, preds=None, dark=False):
			
 
				+    # 绘制数据集的分布， X 为 2D 坐标， y 为数据点的标签
			
 
				+    if (dark):
			
 
				+        plt.style.use('dark_background')
			
 
				+    else:
			
 
				+        sns.set_style("whitegrid")
			
 
				+    plt.figure(figsize=(16, 12))
			
 
				+    axes = plt.gca()
			
 
				+    axes.set(xlabel="$x_1$", ylabel="$x_2$")
			
 
				+    plt.title(plot_name, fontsize=30)
			
 
				+    plt.subplots_adjust(left=0.20)
			
 
				+    plt.subplots_adjust(right=0.80)
			
 
				+    if XX is not None and YY is not None and preds is not None:
			
 
				+        plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha=1, cmap=plt.cm.Spectral)
			
 
				+        plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
			
 
				+    # 绘制散点图，根据标签区分颜色
			
 
				+    plt.scatter(X[:, 0], X[:, 1], c=y.ravel(), s=40, cmap=plt.cm.Spectral, edgecolors='none')
			
 
				+    plt.savefig('数据集分布.svg')
			
 
				+    plt.close()
			
 
				+
			
 
				+
			
 
				+class Layer:
			
 
				+    # 全连接网络层
			
 
				+    def __init__(self, n_input, n_neurons, activation=None, weights=None,
			
 
				+                 bias=None):
			
 
				+        """
			
 
				+        :param int n_input: 输入节点数
			
 
				+        :param int n_neurons: 输出节点数
			
 
				+        :param str activation: 激活函数类型
			
 
				+        :param weights: 权值张量，默认类内部生成
			
 
				+        :param bias: 偏置，默认类内部生成
			
 
				+        """
			
 
				+        # 通过正态分布初始化网络权值，初始化非常重要，不合适的初始化将导致网络不收敛
			
 
				+        self.weights = weights if weights is not None else np.random.randn(n_input, n_neurons) * np.sqrt(1 / n_neurons)
			
 
				+        self.bias = bias if bias is not None else np.random.rand(n_neurons) * 0.1
			
 
				+        self.activation = activation  # 激活函数类型，如’sigmoid’
			
 
				+        self.last_activation = None  # 激活函数的输出值o
			
 
				+        self.error = None  # 用于计算当前层的delta 变量的中间变量
			
 
				+        self.delta = None  # 记录当前层的delta 变量，用于计算梯度
			
 
				+
			
 
				+    # 网络层的前向传播函数实现如下，其中last_activation 变量用于保存当前层的输出值：
			
 
				+    def activate(self, x):
			
 
				+        # 前向传播函数
			
 
				+        r = np.dot(x, self.weights) + self.bias  # X@W+b
			
 
				+        # 通过激活函数，得到全连接层的输出o
			
 
				+        self.last_activation = self._apply_activation(r)
			
 
				+        return self.last_activation
			
 
				+
			
 
				+    # 上述代码中的self._apply_activation 函数实现了不同类型的激活函数的前向计算过程，
			
 
				+    # 尽管此处我们只使用Sigmoid 激活函数一种。代码如下：
			
 
				+    def _apply_activation(self, r):
			
 
				+        # 计算激活函数的输出
			
 
				+        if self.activation is None:
			
 
				+            return r  # 无激活函数，直接返回
			
 
				+        # ReLU 激活函数
			
 
				+        elif self.activation == 'relu':
			
 
				+            return np.maximum(r, 0)
			
 
				+        # tanh 激活函数
			
 
				+        elif self.activation == 'tanh':
			
 
				+            return np.tanh(r)
			
 
				+        # sigmoid 激活函数
			
 
				+        elif self.activation == 'sigmoid':
			
 
				+            return 1 / (1 + np.exp(-r))
			
 
				+        return r
			
 
				+
			
 
				+    # 针对于不同类型的激活函数，它们的导数计算实现如下：
			
 
				+    def apply_activation_derivative(self, r):
			
 
				+        # 计算激活函数的导数
			
 
				+        # 无激活函数，导数为1
			
 
				+        if self.activation is None:
			
 
				+            return np.ones_like(r)
			
 
				+        # ReLU 函数的导数实现
			
 
				+        elif self.activation == 'relu':
			
 
				+            grad = np.array(r, copy=True)
			
 
				+            grad[r > 0] = 1.
			
 
				+            grad[r <= 0] = 0.
			
 
				+            return grad
			
 
				+        # tanh 函数的导数实现
			
 
				+        elif self.activation == 'tanh':
			
 
				+            return 1 - r ** 2
			
 
				+        # Sigmoid 函数的导数实现
			
 
				+        elif self.activation == 'sigmoid':
			
 
				+            return r * (1 - r)
			
 
				+        return r
			
 
				+
			
 
				+
			
 
				+# 神经网络模型
			
 
				+class NeuralNetwork:
			
 
				+    def __init__(self):
			
 
				+        self._layers = []  # 网络层对象列表
			
 
				+
			
 
				+    def add_layer(self, layer):
			
 
				+        # 追加网络层
			
 
				+        self._layers.append(layer)
			
 
				+
			
 
				+    # 网络的前向传播只需要循环调各个网络层对象的前向计算函数即可，代码如下：
			
 
				+    # 前向传播
			
 
				+    def feed_forward(self, X):
			
 
				+        for layer in self._layers:
			
 
				+            # 依次通过各个网络层
			
 
				+            X = layer.activate(X)
			
 
				+        return X
			
 
				+
			
 
				+    def backpropagation(self, X, y, learning_rate):
			
 
				+        # 反向传播算法实现
			
 
				+        # 前向计算，得到输出值
			
 
				+        output = self.feed_forward(X)
			
 
				+        for i in reversed(range(len(self._layers))):  # 反向循环
			
 
				+            layer = self._layers[i]  # 得到当前层对象
			
 
				+            # 如果是输出层
			
 
				+            if layer == self._layers[-1]:  # 对于输出层
			
 
				+                layer.error = y - output  # 计算2 分类任务的均方差的导数
			
 
				+                # 关键步骤：计算最后一层的delta，参考输出层的梯度公式
			
 
				+                layer.delta = layer.error * layer.apply_activation_derivative(output)
			
 
				+            else:  # 如果是隐藏层
			
 
				+                next_layer = self._layers[i + 1]  # 得到下一层对象
			
 
				+                layer.error = np.dot(next_layer.weights, next_layer.delta)
			
 
				+                # 关键步骤：计算隐藏层的delta，参考隐藏层的梯度公式
			
 
				+                layer.delta = layer.error * layer.apply_activation_derivative(layer.last_activation)
			
 
				+
			
 
				+        # 循环更新权值
			
 
				+        for i in range(len(self._layers)):
			
 
				+            layer = self._layers[i]
			
 
				+            # o_i 为上一网络层的输出
			
 
				+            o_i = np.atleast_2d(X if i == 0 else self._layers[i - 1].last_activation)
			
 
				+            # 梯度下降算法，delta 是公式中的负数，故这里用加号
			
 
				+            layer.weights += layer.delta * o_i.T * learning_rate
			
 
				+
			
 
				+    def train(self, X_train, X_test, y_train, y_test, learning_rate, max_epochs):
			
 
				+        # 网络训练函数
			
 
				+        # one-hot 编码
			
 
				+        y_onehot = np.zeros((y_train.shape[0], 2))
			
 
				+        y_onehot[np.arange(y_train.shape[0]), y_train] = 1
			
 
				+
			
 
				+        # 将One-hot 编码后的真实标签与网络的输出计算均方误差，并调用反向传播函数更新网络参数，循环迭代训练集1000 遍即可
			
 
				+        mses = []
			
 
				+        accuracys = []
			
 
				+        for i in range(max_epochs + 1):  # 训练1000 个epoch
			
 
				+            for j in range(len(X_train)):  # 一次训练一个样本
			
 
				+                self.backpropagation(X_train[j], y_onehot[j], learning_rate)
			
 
				+            if i % 10 == 0:
			
 
				+                # 打印出MSE Loss
			
 
				+                mse = np.mean(np.square(y_onehot - self.feed_forward(X_train)))
			
 
				+                mses.append(mse)
			
 
				+                accuracy = self.accuracy(self.predict(X_test), y_test.flatten())
			
 
				+                accuracys.append(accuracy)
			
 
				+                print('Epoch: #%s, MSE: %f' % (i, float(mse)))
			
 
				+                # 统计并打印准确率
			
 
				+                print('Accuracy: %.2f%%' % (accuracy * 100))
			
 
				+        return mses, accuracys
			
 
				+
			
 
				+    def predict(self, X):
			
 
				+        return self.feed_forward(X)
			
 
				+
			
 
				+    def accuracy(self, X, y):
			
 
				+        return np.sum(np.equal(np.argmax(X, axis=1), y)) / y.shape[0]
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    X, y, X_train, X_test, y_train, y_test = load_dataset()
			
 
				+    # 调用 make_plot 函数绘制数据的分布，其中 X 为 2D 坐标， y 为标签
			
 
				+    make_plot(X, y, "Classification Dataset Visualization ")
			
 
				+    plt.show()
			
 
				+    nn = NeuralNetwork()  # 实例化网络类
			
 
				+    nn.add_layer(Layer(2, 25, 'sigmoid'))  # 隐藏层 1, 2=>25
			
 
				+    nn.add_layer(Layer(25, 50, 'sigmoid'))  # 隐藏层 2, 25=>50
			
 
				+    nn.add_layer(Layer(50, 25, 'sigmoid'))  # 隐藏层 3, 50=>25
			
 
				+    nn.add_layer(Layer(25, 2, 'sigmoid'))  # 输出层, 25=>2
			
 
				+    mses, accuracys = nn.train(X_train, X_test, y_train, y_test, 0.01, 1000)
			
 
				+
			
 
				+    x = [i for i in range(0, 101, 10)]
			
 
				+
			
 
				+    # 绘制MES曲线
			
 
				+    plt.title("MES Loss")
			
 
				+    plt.plot(x, mses[:11], color='blue')
			
 
				+    plt.xlabel('Epoch')
			
 
				+    plt.ylabel('MSE')
			
 
				+    plt.savefig('训练误差曲线.svg')
			
 
				+    plt.close()
			
 
				+
			
 
				+    # 绘制Accuracy曲线
			
 
				+    plt.title("Accuracy")
			
 
				+    plt.plot(x, accuracys[:11], color='blue')
			
 
				+    plt.xlabel('Epoch')
			
 
				+    plt.ylabel('Accuracy')
			
 
				+    plt.savefig('网络测试准确率.svg')
			
 
				+    plt.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/ch07-反向传播算法/sigmoid_grad.py
+++ b/ch07-反向传播算法/sigmoid_grad.py
@@ -0,0 +1,14 @@
 
				+import tensorflow as tf 
			
 
				+
			
 
				+
			
 
				+a = tf.linspace(-10., 10., 10)
			
 
				+
			
 
				+with tf.GradientTape() as tape:
			
 
				+	tape.watch(a)
			
 
				+	y = tf.sigmoid(a)
			
 
				+
			
 
				+
			
 
				+grads = tape.gradient(y, [a])
			
 
				+print('x:', a.numpy())
			
 
				+print('y:', y.numpy())
			
 
				+print('grad:', grads[0].numpy())
			
--- a/ch07-反向传播算法/single_output_perceptron.py
+++ b/ch07-反向传播算法/single_output_perceptron.py
@@ -0,0 +1,26 @@
 
				+import tensorflow as tf 
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+x=tf.random.normal([1,3])
			
 
				+
			
 
				+w=tf.ones([3,1])
			
 
				+
			
 
				+b=tf.ones([1])
			
 
				+
			
 
				+y = tf.constant([1])
			
 
				+
			
 
				+
			
 
				+with tf.GradientTape() as tape:
			
 
				+
			
 
				+	tape.watch([w, b])
			
 
				+	logits = tf.sigmoid(x@w+b) 
			
 
				+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
			
 
				+
			
 
				+grads = tape.gradient(loss, [w, b])
			
 
				+print('w grad:', grads[0])
			
 
				+
			
 
				+print('b grad:', grads[1])
			
 
				+
			
 
				+
			
--- a/ch07/nb.py
+++ b/ch07/nb.py
@@ -1,11 +0,0 @@
 
				-#%%
			
 
				-import  tensorflow as tf
			
 
				-from    tensorflow import keras
			
 
				-from    tensorflow.keras import datasets, layers
			
 
				-
			
 
				-#%%
			
 
				-def sigmoid(x): # sigmoid函数，也可以直接使用tf.nn.sigmoid
			
 
				-    return 1 / (1 + tf.math.exp(-x))
			
 
				-
			
 
				-def derivative(x): # sigmoid导数的计算
			
 
				-    return sigmoid(x)*(1-sigmoid(x))
			
--- a/ch08-Keras高层接口/1.Metrics.pdf
+++ b/ch08-Keras高层接口/1.Metrics.pdf
--- a/ch08-Keras高层接口/2.Compile&Fit.pdf
+++ b/ch08-Keras高层接口/2.Compile&Fit.pdf
--- a/ch08-Keras高层接口/3.自定义层.pdf
+++ b/ch08-Keras高层接口/3.自定义层.pdf
--- a/ch08-Keras高层接口/Keras实战CIFAR10.pdf
+++ b/ch08-Keras高层接口/Keras实战CIFAR10.pdf
--- a/ch08-Keras高层接口/compile_fit.py
+++ b/ch08-Keras高层接口/compile_fit.py
@@ -0,0 +1,60 @@
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+    """
			
 
				+    x is a simple image, not a batch
			
 
				+    """
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    x = tf.reshape(x, [28*28])
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+    y = tf.one_hot(y, depth=10)
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz)
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+sample = next(iter(db))
			
 
				+print(sample[0].shape, sample[1].shape)
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+		metrics=['accuracy']
			
 
				+	)
			
 
				+
			
 
				+network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2)
			
 
				+ 
			
 
				+network.evaluate(ds_val)
			
 
				+
			
 
				+sample = next(iter(ds_val))
			
 
				+x = sample[0]
			
 
				+y = sample[1] # one-hot
			
 
				+pred = network.predict(x) # [b, 10]
			
 
				+# convert back to number 
			
 
				+y = tf.argmax(y, axis=1)
			
 
				+pred = tf.argmax(pred, axis=1)
			
 
				+
			
 
				+print(pred)
			
 
				+print(y)
			
--- a/ch08-Keras高层接口/keras_train.py
+++ b/ch08-Keras高层接口/keras_train.py
@@ -0,0 +1,107 @@
 
				+import  os
			
 
				+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
			
 
				+
			
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+from 	tensorflow import keras
			
 
				+
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+    # [0~255] => [-1~1]
			
 
				+    x = 2 * tf.cast(x, dtype=tf.float32) / 255. - 1.
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+# [50k, 32, 32, 3], [10k, 1]
			
 
				+(x, y), (x_val, y_val) = datasets.cifar10.load_data()
			
 
				+y = tf.squeeze(y)
			
 
				+y_val = tf.squeeze(y_val)
			
 
				+y = tf.one_hot(y, depth=10) # [50k, 10]
			
 
				+y_val = tf.one_hot(y_val, depth=10) # [10k, 10]
			
 
				+print('datasets:', x.shape, y.shape, x_val.shape, y_val.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+train_db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+train_db = train_db.map(preprocess).shuffle(10000).batch(batchsz)
			
 
				+test_db = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+test_db = test_db.map(preprocess).batch(batchsz)
			
 
				+
			
 
				+
			
 
				+sample = next(iter(train_db))
			
 
				+print('batch:', sample[0].shape, sample[1].shape)
			
 
				+
			
 
				+
			
 
				+class MyDense(layers.Layer):
			
 
				+    # to replace standard layers.Dense()
			
 
				+    def __init__(self, inp_dim, outp_dim):
			
 
				+        super(MyDense, self).__init__()
			
 
				+
			
 
				+        self.kernel = self.add_variable('w', [inp_dim, outp_dim])
			
 
				+        # self.bias = self.add_variable('b', [outp_dim])
			
 
				+
			
 
				+    def call(self, inputs, training=None):
			
 
				+
			
 
				+        x = inputs @ self.kernel
			
 
				+        return x
			
 
				+
			
 
				+class MyNetwork(keras.Model):
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        super(MyNetwork, self).__init__()
			
 
				+
			
 
				+        self.fc1 = MyDense(32*32*3, 256)
			
 
				+        self.fc2 = MyDense(256, 128)
			
 
				+        self.fc3 = MyDense(128, 64)
			
 
				+        self.fc4 = MyDense(64, 32)
			
 
				+        self.fc5 = MyDense(32, 10)
			
 
				+
			
 
				+
			
 
				+
			
 
				+    def call(self, inputs, training=None):
			
 
				+        """
			
 
				+
			
 
				+        :param inputs: [b, 32, 32, 3]
			
 
				+        :param training:
			
 
				+        :return:
			
 
				+        """
			
 
				+        x = tf.reshape(inputs, [-1, 32*32*3])
			
 
				+        # [b, 32*32*3] => [b, 256]
			
 
				+        x = self.fc1(x)
			
 
				+        x = tf.nn.relu(x)
			
 
				+        # [b, 256] => [b, 128]
			
 
				+        x = self.fc2(x)
			
 
				+        x = tf.nn.relu(x)
			
 
				+        # [b, 128] => [b, 64]
			
 
				+        x = self.fc3(x)
			
 
				+        x = tf.nn.relu(x)
			
 
				+        # [b, 64] => [b, 32]
			
 
				+        x = self.fc4(x)
			
 
				+        x = tf.nn.relu(x)
			
 
				+        # [b, 32] => [b, 10]
			
 
				+        x = self.fc5(x)
			
 
				+
			
 
				+        return x
			
 
				+
			
 
				+
			
 
				+network = MyNetwork()
			
 
				+network.compile(optimizer=optimizers.Adam(lr=1e-3),
			
 
				+                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+                metrics=['accuracy'])
			
 
				+network.fit(train_db, epochs=15, validation_data=test_db, validation_freq=1)
			
 
				+
			
 
				+network.evaluate(test_db)
			
 
				+network.save_weights('ckpt/weights.ckpt')
			
 
				+del network
			
 
				+print('saved to ckpt/weights.ckpt')
			
 
				+
			
 
				+
			
 
				+network = MyNetwork()
			
 
				+network.compile(optimizer=optimizers.Adam(lr=1e-3),
			
 
				+                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+                metrics=['accuracy'])
			
 
				+network.load_weights('ckpt/weights.ckpt')
			
 
				+print('loaded weights from file.')
			
 
				+network.evaluate(test_db)
			
--- a/ch08-Keras高层接口/layer_model.py
+++ b/ch08-Keras高层接口/layer_model.py
@@ -0,0 +1,102 @@
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+from 	tensorflow import keras
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+    """
			
 
				+    x is a simple image, not a batch
			
 
				+    """
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    x = tf.reshape(x, [28*28])
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+    y = tf.one_hot(y, depth=10)
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz)
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+sample = next(iter(db))
			
 
				+print(sample[0].shape, sample[1].shape)
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+
			
 
				+class MyDense(layers.Layer):
			
 
				+
			
 
				+	def __init__(self, inp_dim, outp_dim):
			
 
				+		super(MyDense, self).__init__()
			
 
				+
			
 
				+		self.kernel = self.add_weight('w', [inp_dim, outp_dim])
			
 
				+		self.bias = self.add_weight('b', [outp_dim])
			
 
				+
			
 
				+	def call(self, inputs, training=None):
			
 
				+
			
 
				+		out = inputs @ self.kernel + self.bias
			
 
				+
			
 
				+		return out 
			
 
				+
			
 
				+class MyModel(keras.Model):
			
 
				+
			
 
				+	def __init__(self):
			
 
				+		super(MyModel, self).__init__()
			
 
				+
			
 
				+		self.fc1 = MyDense(28*28, 256)
			
 
				+		self.fc2 = MyDense(256, 128)
			
 
				+		self.fc3 = MyDense(128, 64)
			
 
				+		self.fc4 = MyDense(64, 32)
			
 
				+		self.fc5 = MyDense(32, 10)
			
 
				+
			
 
				+	def call(self, inputs, training=None):
			
 
				+
			
 
				+		x = self.fc1(inputs)
			
 
				+		x = tf.nn.relu(x)
			
 
				+		x = self.fc2(x)
			
 
				+		x = tf.nn.relu(x)
			
 
				+		x = self.fc3(x)
			
 
				+		x = tf.nn.relu(x)
			
 
				+		x = self.fc4(x)
			
 
				+		x = tf.nn.relu(x)
			
 
				+		x = self.fc5(x) 
			
 
				+
			
 
				+		return x
			
 
				+
			
 
				+
			
 
				+network = MyModel()
			
 
				+
			
 
				+
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+		metrics=['accuracy']
			
 
				+	)
			
 
				+
			
 
				+network.fit(db, epochs=5, validation_data=ds_val,
			
 
				+              validation_freq=2)
			
 
				+ 
			
 
				+network.evaluate(ds_val)
			
 
				+
			
 
				+sample = next(iter(ds_val))
			
 
				+x = sample[0]
			
 
				+y = sample[1] # one-hot
			
 
				+pred = network.predict(x) # [b, 10]
			
 
				+# convert back to number 
			
 
				+y = tf.argmax(y, axis=1)
			
 
				+pred = tf.argmax(pred, axis=1)
			
 
				+
			
 
				+print(pred)
			
 
				+print(y)
			
--- a/ch08-Keras高层接口/metrics.py
+++ b/ch08-Keras高层接口/metrics.py
@@ -0,0 +1,92 @@
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
			
 
				+
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+optimizer = optimizers.Adam(lr=0.01)
			
 
				+
			
 
				+acc_meter = metrics.Accuracy()
			
 
				+loss_meter = metrics.Mean()
			
 
				+
			
 
				+
			
 
				+for step, (x,y) in enumerate(db):
			
 
				+
			
 
				+    with tf.GradientTape() as tape:
			
 
				+        # [b, 28, 28] => [b, 784]
			
 
				+        x = tf.reshape(x, (-1, 28*28))
			
 
				+        # [b, 784] => [b, 10]
			
 
				+        out = network(x)
			
 
				+        # [b] => [b, 10]
			
 
				+        y_onehot = tf.one_hot(y, depth=10) 
			
 
				+        # [b]
			
 
				+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
			
 
				+
			
 
				+        loss_meter.update_state(loss)
			
 
				+
			
 
				+ 
			
 
				+
			
 
				+    grads = tape.gradient(loss, network.trainable_variables)
			
 
				+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
			
 
				+
			
 
				+
			
 
				+    if step % 100 == 0:
			
 
				+
			
 
				+        print(step, 'loss:', loss_meter.result().numpy()) 
			
 
				+        loss_meter.reset_states()
			
 
				+
			
 
				+
			
 
				+    # evaluate
			
 
				+    if step % 500 == 0:
			
 
				+        total, total_correct = 0., 0
			
 
				+        acc_meter.reset_states()
			
 
				+
			
 
				+        for step, (x, y) in enumerate(ds_val): 
			
 
				+            # [b, 28, 28] => [b, 784]
			
 
				+            x = tf.reshape(x, (-1, 28*28))
			
 
				+            # [b, 784] => [b, 10]
			
 
				+            out = network(x) 
			
 
				+
			
 
				+
			
 
				+            # [b, 10] => [b] 
			
 
				+            pred = tf.argmax(out, axis=1) 
			
 
				+            pred = tf.cast(pred, dtype=tf.int32)
			
 
				+            # bool type 
			
 
				+            correct = tf.equal(pred, y)
			
 
				+            # bool tensor => int tensor => numpy
			
 
				+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
			
 
				+            total += x.shape[0]
			
 
				+
			
 
				+            acc_meter.update_state(y, pred)
			
 
				+
			
 
				+
			
 
				+        print(step, 'Evaluate Acc:', total_correct/total, acc_meter.result().numpy())
			
--- a/ch08-Keras高层接口/nb.py
+++ b/ch08-Keras高层接口/nb.py
--- a/ch08-Keras高层接口/pretained.py
+++ b/ch08-Keras高层接口/pretained.py
--- a/ch08-Keras高层接口/save_load_model.py
+++ b/ch08-Keras高层接口/save_load_model.py
@@ -0,0 +1,69 @@
 
				+import  os
			
 
				+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
			
 
				+
			
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+    """
			
 
				+    x is a simple image, not a batch
			
 
				+    """
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    x = tf.reshape(x, [28*28])
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+    y = tf.one_hot(y, depth=10)
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz)
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+sample = next(iter(db))
			
 
				+print(sample[0].shape, sample[1].shape)
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+		metrics=['accuracy']
			
 
				+	)
			
 
				+
			
 
				+network.fit(db, epochs=3, validation_data=ds_val, validation_freq=2)
			
 
				+ 
			
 
				+network.evaluate(ds_val)
			
 
				+
			
 
				+network.save('model.h5')
			
 
				+print('saved total model.')
			
 
				+del network
			
 
				+
			
 
				+print('loaded model from file.')
			
 
				+network = tf.keras.models.load_model('model.h5', compile=False)
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+        loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+        metrics=['accuracy']
			
 
				+    )
			
 
				+x_val = tf.cast(x_val, dtype=tf.float32) / 255.
			
 
				+x_val = tf.reshape(x_val, [-1, 28*28])
			
 
				+y_val = tf.cast(y_val, dtype=tf.int32)
			
 
				+y_val = tf.one_hot(y_val, depth=10)
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(128)
			
 
				+network.evaluate(ds_val)
			
--- a/ch08-Keras高层接口/save_load_weight.py
+++ b/ch08-Keras高层接口/save_load_weight.py
@@ -0,0 +1,69 @@
 
				+import  os
			
 
				+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
			
 
				+
			
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+    """
			
 
				+    x is a simple image, not a batch
			
 
				+    """
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    x = tf.reshape(x, [28*28])
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+    y = tf.one_hot(y, depth=10)
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz)
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+sample = next(iter(db))
			
 
				+print(sample[0].shape, sample[1].shape)
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+		metrics=['accuracy']
			
 
				+	)
			
 
				+
			
 
				+network.fit(db, epochs=3, validation_data=ds_val, validation_freq=2)
			
 
				+ 
			
 
				+network.evaluate(ds_val)
			
 
				+
			
 
				+network.save_weights('weights.ckpt')
			
 
				+print('saved weights.')
			
 
				+del network
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+		metrics=['accuracy']
			
 
				+	)
			
 
				+network.load_weights('weights.ckpt')
			
 
				+print('loaded weights!')
			
 
				+network.evaluate(ds_val)
			
--- a/ch08-Keras高层接口/模型加载与保存.pdf
+++ b/ch08-Keras高层接口/模型加载与保存.pdf
--- a/ch09-过拟合/9.8-over-fitting-and-under-fitting.py
+++ b/ch09-过拟合/9.8-over-fitting-and-under-fitting.py
@@ -0,0 +1,224 @@
 
				+#!/usr/bin/env python
			
 
				+# encoding: utf-8
			
 
				+"""
			
 
				+@author: HuRuiFeng
			
 
				+@file: 9.8-over-fitting-and-under-fitting.py
			
 
				+@time: 2020/2/25 21:14
			
 
				+@desc: 9.8 过拟合问题实战的代码
			
 
				+       from mpl_toolkits.mplot3d import Axes3D 这个必须添加，解决3d报错问题
			
 
				+"""
			
 
				+
			
 
				+import matplotlib.pyplot as plt
			
 
				+# 导入数据集生成工具
			
 
				+import numpy as np
			
 
				+import seaborn as sns
			
 
				+from sklearn.datasets import make_moons
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from tensorflow.keras import layers, Sequential, regularizers
			
 
				+from mpl_toolkits.mplot3d import Axes3D
			
 
				+
			
 
				+plt.rcParams['font.size'] = 16
			
 
				+plt.rcParams['font.family'] = ['STKaiti']
			
 
				+plt.rcParams['axes.unicode_minus'] = False
			
 
				+
			
 
				+OUTPUT_DIR = 'output_dir'
			
 
				+N_EPOCHS = 500
			
 
				+
			
 
				+
			
 
				+def load_dataset():
			
 
				+    # 采样点数
			
 
				+    N_SAMPLES = 1000
			
 
				+    # 测试数量比率
			
 
				+    TEST_SIZE = None
			
 
				+
			
 
				+    # 从 moon 分布中随机采样 1000 个点，并切分为训练集-测试集
			
 
				+    X, y = make_moons(n_samples=N_SAMPLES, noise=0.25, random_state=100)
			
 
				+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
			
 
				+    return X, y, X_train, X_test, y_train, y_test
			
 
				+
			
 
				+
			
 
				+def make_plot(X, y, plot_name, file_name, XX=None, YY=None, preds=None, dark=False, output_dir=OUTPUT_DIR):
			
 
				+    # 绘制数据集的分布， X 为 2D 坐标， y 为数据点的标签
			
 
				+    if dark:
			
 
				+        plt.style.use('dark_background')
			
 
				+    else:
			
 
				+        sns.set_style("whitegrid")
			
 
				+    axes = plt.gca()
			
 
				+    axes.set_xlim([-2, 3])
			
 
				+    axes.set_ylim([-1.5, 2])
			
 
				+    axes.set(xlabel="$x_1$", ylabel="$x_2$")
			
 
				+    plt.title(plot_name, fontsize=20, fontproperties='SimHei')
			
 
				+    plt.subplots_adjust(left=0.20)
			
 
				+    plt.subplots_adjust(right=0.80)
			
 
				+    if XX is not None and YY is not None and preds is not None:
			
 
				+        plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha=0.08, cmap=plt.cm.Spectral)
			
 
				+        plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
			
 
				+    # 绘制散点图，根据标签区分颜色m=markers
			
 
				+    markers = ['o' if i == 1 else 's' for i in y.ravel()]
			
 
				+    mscatter(X[:, 0], X[:, 1], c=y.ravel(), s=20, cmap=plt.cm.Spectral, edgecolors='none', m=markers, ax=axes)
			
 
				+    # 保存矢量图
			
 
				+    plt.savefig(output_dir + '/' + file_name)
			
 
				+    plt.close()
			
 
				+
			
 
				+
			
 
				+def mscatter(x, y, ax=None, m=None, **kw):
			
 
				+    import matplotlib.markers as mmarkers
			
 
				+    if not ax: ax = plt.gca()
			
 
				+    sc = ax.scatter(x, y, **kw)
			
 
				+    if (m is not None) and (len(m) == len(x)):
			
 
				+        paths = []
			
 
				+        for marker in m:
			
 
				+            if isinstance(marker, mmarkers.MarkerStyle):
			
 
				+                marker_obj = marker
			
 
				+            else:
			
 
				+                marker_obj = mmarkers.MarkerStyle(marker)
			
 
				+            path = marker_obj.get_path().transformed(
			
 
				+                marker_obj.get_transform())
			
 
				+            paths.append(path)
			
 
				+        sc.set_paths(paths)
			
 
				+    return sc
			
 
				+
			
 
				+
			
 
				+def network_layers_influence(X_train, y_train):
			
 
				+    # 构建 5 种不同层数的网络
			
 
				+    for n in range(5):
			
 
				+        # 创建容器
			
 
				+        model = Sequential()
			
 
				+        # 创建第一层
			
 
				+        model.add(layers.Dense(8, input_dim=2, activation='relu'))
			
 
				+        # 添加 n 层，共 n+2 层
			
 
				+        for _ in range(n):
			
 
				+            model.add(layers.Dense(32, activation='relu'))
			
 
				+        # 创建最末层
			
 
				+        model.add(layers.Dense(1, activation='sigmoid'))
			
 
				+        # 模型装配与训练
			
 
				+        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
			
 
				+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
			
 
				+        # 绘制不同层数的网络决策边界曲线
			
 
				+        # 可视化的 x 坐标范围为[-2, 3]
			
 
				+        xx = np.arange(-2, 3, 0.01)
			
 
				+        # 可视化的 y 坐标范围为[-1.5, 2]
			
 
				+        yy = np.arange(-1.5, 2, 0.01)
			
 
				+        # 生成 x-y 平面采样网格点，方便可视化
			
 
				+        XX, YY = np.meshgrid(xx, yy)
			
 
				+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
			
 
				+        title = "网络层数：{0}".format(2 + n)
			
 
				+        file = "网络容量_%i.png" % (2 + n)
			
 
				+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/network_layers')
			
 
				+
			
 
				+
			
 
				+def dropout_influence(X_train, y_train):
			
 
				+    # 构建 5 种不同数量 Dropout 层的网络
			
 
				+    for n in range(5):
			
 
				+        # 创建容器
			
 
				+        model = Sequential()
			
 
				+        # 创建第一层
			
 
				+        model.add(layers.Dense(8, input_dim=2, activation='relu'))
			
 
				+        counter = 0
			
 
				+        # 网络层数固定为 5
			
 
				+        for _ in range(5):
			
 
				+            model.add(layers.Dense(64, activation='relu'))
			
 
				+        # 添加 n 个 Dropout 层
			
 
				+        if counter < n:
			
 
				+            counter += 1
			
 
				+            model.add(layers.Dropout(rate=0.5))
			
 
				+
			
 
				+        # 输出层
			
 
				+        model.add(layers.Dense(1, activation='sigmoid'))
			
 
				+        # 模型装配
			
 
				+        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
			
 
				+        # 训练
			
 
				+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
			
 
				+        # 绘制不同 Dropout 层数的决策边界曲线
			
 
				+        # 可视化的 x 坐标范围为[-2, 3]
			
 
				+        xx = np.arange(-2, 3, 0.01)
			
 
				+        # 可视化的 y 坐标范围为[-1.5, 2]
			
 
				+        yy = np.arange(-1.5, 2, 0.01)
			
 
				+        # 生成 x-y 平面采样网格点，方便可视化
			
 
				+        XX, YY = np.meshgrid(xx, yy)
			
 
				+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
			
 
				+        title = "无Dropout层" if n == 0 else "{0}层 Dropout层".format(n)
			
 
				+        file = "Dropout_%i.png" % n
			
 
				+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/dropout')
			
 
				+
			
 
				+
			
 
				+def build_model_with_regularization(_lambda):
			
 
				+    # 创建带正则化项的神经网络
			
 
				+    model = Sequential()
			
 
				+    model.add(layers.Dense(8, input_dim=2, activation='relu'))  # 不带正则化项
			
 
				+    # 2-4层均是带 L2 正则化项
			
 
				+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
			
 
				+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
			
 
				+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
			
 
				+    # 输出层
			
 
				+    model.add(layers.Dense(1, activation='sigmoid'))
			
 
				+    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])  # 模型装配
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				+def plot_weights_matrix(model, layer_index, plot_name, file_name, output_dir=OUTPUT_DIR):
			
 
				+    # 绘制权值范围函数
			
 
				+    # 提取指定层的权值矩阵
			
 
				+    weights = model.layers[layer_index].get_weights()[0]
			
 
				+    shape = weights.shape
			
 
				+    # 生成和权值矩阵等大小的网格坐标
			
 
				+    X = np.array(range(shape[1]))
			
 
				+    Y = np.array(range(shape[0]))
			
 
				+    X, Y = np.meshgrid(X, Y)
			
 
				+    # 绘制3D图
			
 
				+    fig = plt.figure()
			
 
				+    ax = fig.gca(projection='3d')
			
 
				+    ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
			
 
				+    ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
			
 
				+    ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
			
 
				+    plt.title(plot_name, fontsize=20, fontproperties='SimHei')
			
 
				+    # 绘制权值矩阵范围
			
 
				+    ax.plot_surface(X, Y, weights, cmap=plt.get_cmap('rainbow'), linewidth=0)
			
 
				+    # 设置坐标轴名
			
 
				+    ax.set_xlabel('网格x坐标', fontsize=16, rotation=0, fontproperties='SimHei')
			
 
				+    ax.set_ylabel('网格y坐标', fontsize=16, rotation=0, fontproperties='SimHei')
			
 
				+    ax.set_zlabel('权值', fontsize=16, rotation=90, fontproperties='SimHei')
			
 
				+    # 保存矩阵范围图
			
 
				+    plt.savefig(output_dir + "/" + file_name + ".svg")
			
 
				+    plt.close(fig)
			
 
				+
			
 
				+
			
 
				+def regularizers_influence(X_train, y_train):
			
 
				+    for _lambda in [1e-5, 1e-3, 1e-1, 0.12, 0.13]:  # 设置不同的正则化系数
			
 
				+        # 创建带正则化项的模型
			
 
				+        model = build_model_with_regularization(_lambda)
			
 
				+        # 模型训练
			
 
				+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
			
 
				+        # 绘制权值范围
			
 
				+        layer_index = 2
			
 
				+        plot_title = "正则化系数：{}".format(_lambda)
			
 
				+        file_name = "正则化网络权值_" + str(_lambda)
			
 
				+        # 绘制网络权值范围图
			
 
				+        plot_weights_matrix(model, layer_index, plot_title, file_name, output_dir=OUTPUT_DIR + '/regularizers')
			
 
				+        # 绘制不同正则化系数的决策边界线
			
 
				+        # 可视化的 x 坐标范围为[-2, 3]
			
 
				+        xx = np.arange(-2, 3, 0.01)
			
 
				+        # 可视化的 y 坐标范围为[-1.5, 2]
			
 
				+        yy = np.arange(-1.5, 2, 0.01)
			
 
				+        # 生成 x-y 平面采样网格点，方便可视化
			
 
				+        XX, YY = np.meshgrid(xx, yy)
			
 
				+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
			
 
				+        title = "正则化系数：{}".format(_lambda)
			
 
				+        file = "正则化_%g.svg" % _lambda
			
 
				+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/regularizers')
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    X, y, X_train, X_test, y_train, y_test = load_dataset()
			
 
				+    # 绘制数据集分布
			
 
				+    make_plot(X, y, None, "月牙形状二分类数据集分布.svg")
			
 
				+    # 网络层数的影响
			
 
				+    network_layers_influence(X_train, y_train)
			
 
				+    # Dropout的影响
			
 
				+    dropout_influence(X_train, y_train)
			
 
				+    # 正则化的影响
			
 
				+    regularizers_influence(X_train, y_train)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/ch09-过拟合/Regularization.pdf
+++ b/ch09-过拟合/Regularization.pdf
--- a/ch09-过拟合/compile_fit.py
+++ b/ch09-过拟合/compile_fit.py
@@ -0,0 +1,61 @@
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+    """
			
 
				+    x is a simple image, not a batch
			
 
				+    """
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    x = tf.reshape(x, [28*28])
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+    y = tf.one_hot(y, depth=10)
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz)
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+sample = next(iter(db))
			
 
				+print(sample[0].shape, sample[1].shape)
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+		metrics=['accuracy']
			
 
				+	)
			
 
				+
			
 
				+network.fit(db, epochs=5, validation_data=ds_val,
			
 
				+              validation_steps=2)
			
 
				+ 
			
 
				+network.evaluate(ds_val)
			
 
				+
			
 
				+sample = next(iter(ds_val))
			
 
				+x = sample[0]
			
 
				+y = sample[1] # one-hot
			
 
				+pred = network.predict(x) # [b, 10]
			
 
				+# convert back to number 
			
 
				+y = tf.argmax(y, axis=1)
			
 
				+pred = tf.argmax(pred, axis=1)
			
 
				+
			
 
				+print(pred)
			
 
				+print(y)
			
--- a/ch09-过拟合/dropout.py
+++ b/ch09-过拟合/dropout.py
@@ -0,0 +1,111 @@
 
				+import  os
			
 
				+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
			
 
				+
			
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
			
 
				+
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dropout(0.5), # 0.5 rate to drop
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dropout(0.5), # 0.5 rate to drop
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+optimizer = optimizers.Adam(lr=0.01)
			
 
				+
			
 
				+
			
 
				+
			
 
				+for step, (x,y) in enumerate(db):
			
 
				+
			
 
				+    with tf.GradientTape() as tape:
			
 
				+        # [b, 28, 28] => [b, 784]
			
 
				+        x = tf.reshape(x, (-1, 28*28))
			
 
				+        # [b, 784] => [b, 10]
			
 
				+        out = network(x, training=True)
			
 
				+        # [b] => [b, 10]
			
 
				+        y_onehot = tf.one_hot(y, depth=10) 
			
 
				+        # [b]
			
 
				+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
			
 
				+
			
 
				+
			
 
				+        loss_regularization = []
			
 
				+        for p in network.trainable_variables:
			
 
				+            loss_regularization.append(tf.nn.l2_loss(p))
			
 
				+        loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
			
 
				+
			
 
				+        loss = loss + 0.0001 * loss_regularization
			
 
				+ 
			
 
				+
			
 
				+    grads = tape.gradient(loss, network.trainable_variables)
			
 
				+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
			
 
				+
			
 
				+
			
 
				+    if step % 100 == 0:
			
 
				+
			
 
				+        print(step, 'loss:', float(loss), 'loss_regularization:', float(loss_regularization)) 
			
 
				+
			
 
				+
			
 
				+    # evaluate
			
 
				+    if step % 500 == 0:
			
 
				+        total, total_correct = 0., 0
			
 
				+
			
 
				+        for step, (x, y) in enumerate(ds_val): 
			
 
				+            # [b, 28, 28] => [b, 784]
			
 
				+            x = tf.reshape(x, (-1, 28*28))
			
 
				+            # [b, 784] => [b, 10] 
			
 
				+            out = network(x, training=True)  
			
 
				+            # [b, 10] => [b] 
			
 
				+            pred = tf.argmax(out, axis=1) 
			
 
				+            pred = tf.cast(pred, dtype=tf.int32)
			
 
				+            # bool type 
			
 
				+            correct = tf.equal(pred, y)
			
 
				+            # bool tensor => int tensor => numpy
			
 
				+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
			
 
				+            total += x.shape[0]
			
 
				+
			
 
				+        print(step, 'Evaluate Acc with drop:', total_correct/total)
			
 
				+
			
 
				+        total, total_correct = 0., 0
			
 
				+
			
 
				+        for step, (x, y) in enumerate(ds_val): 
			
 
				+            # [b, 28, 28] => [b, 784]
			
 
				+            x = tf.reshape(x, (-1, 28*28))
			
 
				+            # [b, 784] => [b, 10] 
			
 
				+            out = network(x, training=False)  
			
 
				+            # [b, 10] => [b] 
			
 
				+            pred = tf.argmax(out, axis=1) 
			
 
				+            pred = tf.cast(pred, dtype=tf.int32)
			
 
				+            # bool type 
			
 
				+            correct = tf.equal(pred, y)
			
 
				+            # bool tensor => int tensor => numpy
			
 
				+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
			
 
				+            total += x.shape[0]
			
 
				+
			
 
				+        print(step, 'Evaluate Acc without drop:', total_correct/total)
			
--- a/ch09-过拟合/lenna.png
+++ b/ch09-过拟合/lenna.png
--- a/ch09-过拟合/lenna_crop.png
+++ b/ch09-过拟合/lenna_crop.png
--- a/ch09-过拟合/lenna_crop2.png
+++ b/ch09-过拟合/lenna_crop2.png
--- a/ch09-过拟合/lenna_eras.png
+++ b/ch09-过拟合/lenna_eras.png
--- a/ch09-过拟合/lenna_eras2.png
+++ b/ch09-过拟合/lenna_eras2.png
--- a/ch09-过拟合/lenna_flip.png
+++ b/ch09-过拟合/lenna_flip.png
--- a/ch09-过拟合/lenna_flip2.png
+++ b/ch09-过拟合/lenna_flip2.png
--- a/ch09-过拟合/lenna_guassian.png
+++ b/ch09-过拟合/lenna_guassian.png
--- a/ch09-过拟合/lenna_perspective.png
+++ b/ch09-过拟合/lenna_perspective.png
--- a/ch09-过拟合/lenna_resize.png
+++ b/ch09-过拟合/lenna_resize.png
--- a/ch09-过拟合/lenna_rotate.png
+++ b/ch09-过拟合/lenna_rotate.png
--- a/ch09-过拟合/lenna_rotate2.png
+++ b/ch09-过拟合/lenna_rotate2.png
--- a/ch09-过拟合/misc.pdf
+++ b/ch09-过拟合/misc.pdf
--- a/ch09-过拟合/regularization.py
+++ b/ch09-过拟合/regularization.py
@@ -0,0 +1,88 @@
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_val, y_val) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+db = tf.data.Dataset.from_tensor_slices((x,y))
			
 
				+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
			
 
				+
			
 
				+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
			
 
				+ds_val = ds_val.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+optimizer = optimizers.Adam(lr=0.01)
			
 
				+
			
 
				+
			
 
				+
			
 
				+for step, (x,y) in enumerate(db):
			
 
				+
			
 
				+    with tf.GradientTape() as tape:
			
 
				+        # [b, 28, 28] => [b, 784]
			
 
				+        x = tf.reshape(x, (-1, 28*28))
			
 
				+        # [b, 784] => [b, 10]
			
 
				+        out = network(x)
			
 
				+        # [b] => [b, 10]
			
 
				+        y_onehot = tf.one_hot(y, depth=10) 
			
 
				+        # [b]
			
 
				+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
			
 
				+
			
 
				+
			
 
				+        loss_regularization = []
			
 
				+        for p in network.trainable_variables:
			
 
				+            loss_regularization.append(tf.nn.l2_loss(p))
			
 
				+        loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
			
 
				+
			
 
				+        loss = loss + 0.0001 * loss_regularization
			
 
				+ 
			
 
				+
			
 
				+    grads = tape.gradient(loss, network.trainable_variables)
			
 
				+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
			
 
				+
			
 
				+
			
 
				+    if step % 100 == 0:
			
 
				+
			
 
				+        print(step, 'loss:', float(loss), 'loss_regularization:', float(loss_regularization)) 
			
 
				+
			
 
				+
			
 
				+    # evaluate
			
 
				+    if step % 500 == 0:
			
 
				+        total, total_correct = 0., 0
			
 
				+
			
 
				+        for step, (x, y) in enumerate(ds_val): 
			
 
				+            # [b, 28, 28] => [b, 784]
			
 
				+            x = tf.reshape(x, (-1, 28*28))
			
 
				+            # [b, 784] => [b, 10]
			
 
				+            out = network(x) 
			
 
				+            # [b, 10] => [b] 
			
 
				+            pred = tf.argmax(out, axis=1) 
			
 
				+            pred = tf.cast(pred, dtype=tf.int32)
			
 
				+            # bool type 
			
 
				+            correct = tf.equal(pred, y)
			
 
				+            # bool tensor => int tensor => numpy
			
 
				+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
			
 
				+            total += x.shape[0]
			
 
				+
			
 
				+        print(step, 'Evaluate Acc:', total_correct/total)
			
--- a/ch09-过拟合/train_evalute_test.py
+++ b/ch09-过拟合/train_evalute_test.py
@@ -0,0 +1,73 @@
 
				+import  tensorflow as tf
			
 
				+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
			
 
				+
			
 
				+
			
 
				+def preprocess(x, y):
			
 
				+    """
			
 
				+    x is a simple image, not a batch
			
 
				+    """
			
 
				+    x = tf.cast(x, dtype=tf.float32) / 255.
			
 
				+    x = tf.reshape(x, [28*28])
			
 
				+    y = tf.cast(y, dtype=tf.int32)
			
 
				+    y = tf.one_hot(y, depth=10)
			
 
				+    return x,y
			
 
				+
			
 
				+
			
 
				+batchsz = 128
			
 
				+(x, y), (x_test, y_test) = datasets.mnist.load_data()
			
 
				+print('datasets:', x.shape, y.shape, x.min(), x.max())
			
 
				+
			
 
				+
			
 
				+
			
 
				+idx = tf.range(60000)
			
 
				+idx = tf.random.shuffle(idx)
			
 
				+x_train, y_train = tf.gather(x, idx[:50000]), tf.gather(y, idx[:50000])
			
 
				+x_val, y_val = tf.gather(x, idx[-10000:]) , tf.gather(y, idx[-10000:])
			
 
				+print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)
			
 
				+db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
			
 
				+db_train = db_train.map(preprocess).shuffle(50000).batch(batchsz)
			
 
				+
			
 
				+db_val = tf.data.Dataset.from_tensor_slices((x_val,y_val))
			
 
				+db_val = db_val.map(preprocess).shuffle(10000).batch(batchsz)
			
 
				+
			
 
				+
			
 
				+
			
 
				+db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
			
 
				+db_test = db_test.map(preprocess).batch(batchsz) 
			
 
				+
			
 
				+sample = next(iter(db_train))
			
 
				+print(sample[0].shape, sample[1].shape)
			
 
				+
			
 
				+
			
 
				+network = Sequential([layers.Dense(256, activation='relu'),
			
 
				+                     layers.Dense(128, activation='relu'),
			
 
				+                     layers.Dense(64, activation='relu'),
			
 
				+                     layers.Dense(32, activation='relu'),
			
 
				+                     layers.Dense(10)])
			
 
				+network.build(input_shape=(None, 28*28))
			
 
				+network.summary()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+network.compile(optimizer=optimizers.Adam(lr=0.01),
			
 
				+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
			
 
				+		metrics=['accuracy']
			
 
				+	)
			
 
				+
			
 
				+network.fit(db_train, epochs=6, validation_data=db_val, validation_freq=2)
			
 
				+
			
 
				+print('Test performance:') 
			
 
				+network.evaluate(db_test)
			
 
				+ 
			
 
				+
			
 
				+sample = next(iter(db_test))
			
 
				+x = sample[0]
			
 
				+y = sample[1] # one-hot
			
 
				+pred = network.predict(x) # [b, 10]
			
 
				+# convert back to number 
			
 
				+y = tf.argmax(y, axis=1)
			
 
				+pred = tf.argmax(pred, axis=1)
			
 
				+
			
 
				+print(pred)
			
 
				+print(y)
			
--- a/ch09-过拟合/交叉验证.pdf
+++ b/ch09-过拟合/交叉验证.pdf
--- a/ch09-过拟合/学习率与动量.pdf
+++ b/ch09-过拟合/学习率与动量.pdf
--- a/ch09-过拟合/过拟合与欠拟合.pdf
+++ b/ch09-过拟合/过拟合与欠拟合.pdf
--- a/ch09/nb.py
+++ b/ch09/nb.py
@@ -1,36 +0,0 @@
 
				-#%%
			
 
				-import tensorflow as tf 
			
 
				-from    tensorflow.keras import layers
			
 
				-
			
 
				-pip install -U scikit-learn
			
 
				-
			
 
				-#%%
			
 
				-# 添加dropout操作
			
 
				-x = tf.nn.dropout(x, rate=0.5)
			
 
				-# 添加Dropout层
			
 
				-model.add(layers.Dropout(rate=0.5))
			
 
				-
			
 
				-# 手动计算每个张量的范数
			
 
				-loss_reg = lambda_ * tf.reduce_sum(tf.square(w))
			
 
				-# 在层方式时添加范数函数
			
 
				-Dense(256, activation='relu',
			
 
				-                    kernel_regularizer=regularizers.l2(_lambda))
			
 
				-
			
 
				-#%%
			
 
				-#                     
			
 
				-# 创建网络参数w1,w2
			
 
				-w1 = tf.random.normal([4,3])
			
 
				-w2 = tf.random.normal([4,2])
			
 
				-# 计算L1正则化项
			
 
				-loss_reg = tf.reduce_sum(tf.math.abs(w1))\
			
 
				-    + tf.reduce_sum(tf.math.abs(w2))
			
 
				-
			
 
				-
			
 
				-# 计算L2正则化项
			
 
				-loss_reg = tf.reduce_sum(tf.square(w1))\
			
 
				-    + tf.reduce_sum(tf.square(w2))
			
 
				-
			
 
				-#%%
			
 
				-loss_reg
			
 
				-
			
 
				-#%%
			
--- a/ch10-卷积神经网络/BatchNorm.pdf
+++ b/ch10-卷积神经网络/BatchNorm.pdf
--- a/ch10-卷积神经网络/CIFAR与VGG实战.pdf
+++ b/ch10-卷积神经网络/CIFAR与VGG实战.pdf
--- a/ch10-卷积神经网络/ResNet与DenseNet.pdf
+++ b/ch10-卷积神经网络/ResNet与DenseNet.pdf