5 years ago · 70f833b3f3
--- a/ch01-人工智能绪论/autograd.py
+++ b/ch01-人工智能绪论/autograd.py
--- a/ch01-人工智能绪论/gpu_accelerate.py
+++ b/ch01-人工智能绪论/gpu_accelerate.py
--- a/ch01-人工智能绪论/tf1.py
+++ b/ch01-人工智能绪论/tf1.py
@@ -1,5 +1,5 @@
 
															 import tensorflow.compat.v1 as tf
														
 
															-tf.disable_v2_behavior()
														
 
															+tf.disable_v2_behavior() # 使用静态图模式运行以下代码
														
 
															 assert tf.__version__.startswith('2.')
														
 
															 # 1.创建计算图阶段
														
--- a/ch01-人工智能绪论/tf2.py
+++ b/ch01-人工智能绪论/tf2.py
--- a/ch02-回归问题/data.csv
+++ b/ch02-回归问题/data.csv
--- a/ch02-回归问题/linear_regression.py
+++ b/ch02-回归问题/linear_regression.py
--- a/ch02-回归问题/回归实战.pdf
+++ b/ch02-回归问题/回归实战.pdf
--- a/ch02-回归问题/回归问题.pdf
+++ b/ch02-回归问题/回归问题.pdf
--- a/ch03-分类问题/forward_layer.py
+++ b/ch03-分类问题/forward_layer.py
--- a/ch03-分类问题/forward_tensor.py
+++ b/ch03-分类问题/forward_tensor.py
@@ -1,3 +1,5 @@
 
															+import  os
														
 
															+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
														
 
															 import  matplotlib
														
 
															 from 	matplotlib import pyplot as plt
														
 
															 # Default parameters for plots
														
@@ -10,9 +12,7 @@ matplotlib.rcParams['axes.unicode_minus']=False
 
															 import  tensorflow as tf
														
 
															 from    tensorflow import keras
														
 
															 from    tensorflow.keras import datasets
														
 
															-import  os
														
 
															-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
														
 
															 # x: [60k, 28, 28],
														
 
															 # y: [60k]
														
--- a/ch03-分类问题/main.py
+++ b/ch03-分类问题/main.py
@@ -1,60 +1,60 @@
 
															-import  tensorflow as tf

														
 
															-from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics

														
 
															-

														
 
															-

														
 
															-# 设置GPU使用方式

														
 
															-# 获取GPU列表

														
 
															-gpus = tf.config.experimental.list_physical_devices('GPU')

														
 
															-if gpus:

														
 
															-  try:

														
 
															-    # 设置GPU为增长式占用

														
 
															-    for gpu in gpus:

														
 
															-      tf.config.experimental.set_memory_growth(gpu, True) 

														
 
															-  except RuntimeError as e:

														
 
															-    # 打印异常

														
 
															-    print(e)

														
 
															-

														
 
															-(xs, ys),_ = datasets.mnist.load_data()

														
 
															-print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())

														
 
															-

														
 
															-batch_size = 32

														
 
															-

														
 
															-xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.

														
 
															-db = tf.data.Dataset.from_tensor_slices((xs,ys))

														
 
															-db = db.batch(batch_size).repeat(30)

														
 
															-

														
 
															-

														
 
															-model = Sequential([layers.Dense(256, activation='relu'), 

														
 
															-                     layers.Dense(128, activation='relu'),

														
 
															-                     layers.Dense(10)])

														
 
															-model.build(input_shape=(4, 28*28))

														
 
															-model.summary()

														
 
															-

														
 
															-optimizer = optimizers.SGD(lr=0.01)

														
 
															-acc_meter = metrics.Accuracy()

														
 
															-

														
 
															-for step, (x,y) in enumerate(db):

														
 
															-

														
 
															-    with tf.GradientTape() as tape:

														
 
															-        # 打平操作，[b, 28, 28] => [b, 784]

														
 
															-        x = tf.reshape(x, (-1, 28*28))

														
 
															-        # Step1. 得到模型输出output [b, 784] => [b, 10]

														
 
															-        out = model(x)

														
 
															-        # [b] => [b, 10]

														
 
															-        y_onehot = tf.one_hot(y, depth=10)

														
 
															-        # 计算差的平方和，[b, 10]

														
 
															-        loss = tf.square(out-y_onehot)

														
 
															-        # 计算每个样本的平均误差，[b]

														
 
															-        loss = tf.reduce_sum(loss) / x.shape[0]

														
 
															-

														
 
															-

														
 
															-    acc_meter.update_state(tf.argmax(out, axis=1), y)

														
 
															-

														
 
															-    grads = tape.gradient(loss, model.trainable_variables)

														
 
															-    optimizer.apply_gradients(zip(grads, model.trainable_variables))

														
 
															-

														
 
															-

														
 
															-    if step % 200==0:

														
 
															-

														
 
															-        print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())

														
 
															-        acc_meter.reset_states()

														
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+# 设置GPU使用方式
														
 
															+# 获取GPU列表
														
 
															+gpus = tf.config.experimental.list_physical_devices('GPU')
														
 
															+if gpus:
														
 
															+  try:
														
 
															+    # 设置GPU为增长式占用
														
 
															+    for gpu in gpus:
														
 
															+      tf.config.experimental.set_memory_growth(gpu, True) 
														
 
															+  except RuntimeError as e:
														
 
															+    # 打印异常
														
 
															+    print(e)
														
 
															+
														
 
															+(xs, ys),_ = datasets.mnist.load_data()
														
 
															+print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())
														
 
															+
														
 
															+batch_size = 32
														
 
															+
														
 
															+xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.
														
 
															+db = tf.data.Dataset.from_tensor_slices((xs,ys))
														
 
															+db = db.batch(batch_size).repeat(30)
														
 
															+
														
 
															+
														
 
															+model = Sequential([layers.Dense(256, activation='relu'), 
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+model.build(input_shape=(4, 28*28))
														
 
															+model.summary()
														
 
															+
														
 
															+optimizer = optimizers.SGD(lr=0.01)
														
 
															+acc_meter = metrics.Accuracy()
														
 
															+
														
 
															+for step, (x,y) in enumerate(db):
														
 
															+
														
 
															+    with tf.GradientTape() as tape:
														
 
															+        # 打平操作，[b, 28, 28] => [b, 784]
														
 
															+        x = tf.reshape(x, (-1, 28*28))
														
 
															+        # Step1. 得到模型输出output [b, 784] => [b, 10]
														
 
															+        out = model(x)
														
 
															+        # [b] => [b, 10]
														
 
															+        y_onehot = tf.one_hot(y, depth=10)
														
 
															+        # 计算差的平方和，[b, 10]
														
 
															+        loss = tf.square(out-y_onehot)
														
 
															+        # 计算每个样本的平均误差，[b]
														
 
															+        loss = tf.reduce_sum(loss) / x.shape[0]
														
 
															+
														
 
															+
														
 
															+    acc_meter.update_state(tf.argmax(out, axis=1), y)
														
 
															+
														
 
															+    grads = tape.gradient(loss, model.trainable_variables)
														
 
															+    optimizer.apply_gradients(zip(grads, model.trainable_variables))
														
 
															+
														
 
															+
														
 
															+    if step % 200==0:
														
 
															+
														
 
															+        print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())
														
 
															+        acc_meter.reset_states()
														
--- a/ch03-分类问题/手写数字问题.pdf
+++ b/ch03-分类问题/手写数字问题.pdf
--- a/ch03-分类问题/手写数字问题体验.pdf
+++ b/ch03-分类问题/手写数字问题体验.pdf
--- a/ch03/readMNIST.py
+++ b/ch03/readMNIST.py
@@ -1,73 +0,0 @@
 
															-#!/usr/bin/python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-
														
 
															-"""
														
 
															-从MNIST中读取原始图片并保存、读取标签数据并保存。
														
 
															-MNIST文件结构分析可以参考：https://blog.csdn.net/justidle/article/details/103149253
														
 
															-"""
														
 
															-"""
														
 
															-使用方法：
														
 
															-1、将MNIST的文件下载到本地。
														
 
															-2、在py文件所在目录下，建立mnist_data目录。然后将MNIST的四个文件拷贝到mnist_data目录，并解压
														
 
															-3、在py文件所在目录下，建立test目录，改目录用于存放解压出的图片文件和标签文件
														
 
															-"""
														
 
															-
														
 
															-import struct
														
 
															-import numpy as np
														
 
															-import PIL.Image
														
 
															-    
														
 
															-def read_image(filename):
														
 
															-    #打开文件
														
 
															-    f = open(filename, 'rb')
														
 
															-    
														
 
															-    #读取文件内容
														
 
															-    index = 0
														
 
															-    buf = f.read()
														
 
															-    
														
 
															-    #关闭文件
														
 
															-    f.close()
														
 
															-    
														
 
															-    #解析文件内容
														
 
															-    #>IIII 表示使用大端规则，读取四个整型
														
 
															-    magic, numImages, rows, columns = struct.unpack_from('>IIII', buf, index)
														
 
															-    index += struct.calcsize('>IIII')
														
 
															-    
														
 
															-    for i in range(0, numImages):
														
 
															-        # L代表灰度图片
														
 
															-        image = PIL.Image.new('L', (columns, rows))
														
 
															-        
														
 
															-        for x in range(rows):
														
 
															-            for y in range(columns):
														
 
															-                # ‘>B' 读取一个字节
														
 
															-                image.putpixel((y,x), int(struct.unpack_from('>B', buf, index)[0]))
														
 
															-                index += struct.calcsize('>B')
														
 
															-                
														
 
															-        print('save ' + str(i) + 'image')
														
 
															-        image.save('mnist_data/test/'+str(i)+'.png')
														
 
															-        
														
 
															-def read_label(filename, saveFilename):
														
 
															-    f = open(filename, 'rb')
														
 
															-    index = 0
														
 
															-    buf = f.read()
														
 
															-    f.close()
														
 
															-    
														
 
															-    magic, labels = struct.unpack_from('>II' , buf , index)
														
 
															-    index += struct.calcsize('>II')
														
 
															-    
														
 
															-    labelArr = [0] * labels
														
 
															-    
														
 
															-    for x in range(labels):
														
 
															-        labelArr[x] = int(struct.unpack_from('>B', buf, index)[0])
														
 
															-        index += struct.calcsize('>B')
														
 
															-    
														
 
															-    save = open(saveFilename, 'w')
														
 
															-    save.write(','.join(map(lambda x: str(x), labelArr)))
														
 
															-    save.write('\n')
														
 
															-    save.close()
														
 
															-    print('save labels success')
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    #注意t10k-images-idx3-ubyte里面一共有10,000张图片
														
 
															-    read_image('mnist_data/t10k-images-idx3-ubyte')
														
 
															-    read_label('mnist_data/t10k-labels-idx1-ubyte', 'mnist_data/test/label.txt')
														
 
															-    
														
--- a/ch04-TensorFlow基础/4.10-forward-prop.py
+++ b/ch04-TensorFlow基础/4.10-forward-prop.py
@@ -0,0 +1,109 @@
 
															+#!/usr/bin/env python
														
 
															+# encoding: utf-8
														
 
															+"""
														
 
															+@author: HuRuiFeng
														
 
															+@file: 4.10-forward-prop.py
														
 
															+@time: 2020/2/14 23:47
														
 
															+@desc: 4.10 前向传播实战的示例代码
														
 
															+"""
														
 
															+
														
 
															+import matplotlib.pyplot as plt
														
 
															+import tensorflow as tf
														
 
															+import tensorflow.keras.datasets as datasets
														
 
															+
														
 
															+plt.rcParams['font.size'] = 16
														
 
															+plt.rcParams['font.family'] = ['STKaiti']
														
 
															+plt.rcParams['axes.unicode_minus'] = False
														
 
															+
														
 
															+
														
 
															+def load_data():
														
 
															+    # 加载 MNIST 数据集
														
 
															+    (x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+    # 转换为浮点张量， 并缩放到-1~1
														
 
															+    x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
														
 
															+    # 转换为整形张量
														
 
															+    y = tf.convert_to_tensor(y, dtype=tf.int32)
														
 
															+    # one-hot 编码
														
 
															+    y = tf.one_hot(y, depth=10)
														
 
															+
														
 
															+    # 改变视图， [b, 28, 28] => [b, 28*28]
														
 
															+    x = tf.reshape(x, (-1, 28 * 28))
														
 
															+
														
 
															+    # 构建数据集对象
														
 
															+    train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
														
 
															+    # 批量训练
														
 
															+    train_dataset = train_dataset.batch(200)
														
 
															+    return train_dataset
														
 
															+
														
 
															+
														
 
															+def init_paramaters():
														
 
															+    # 每层的张量都需要被优化，故使用 Variable 类型，并使用截断的正太分布初始化权值张量
														
 
															+    # 偏置向量初始化为 0 即可
														
 
															+    # 第一层的参数
														
 
															+    w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
														
 
															+    b1 = tf.Variable(tf.zeros([256]))
														
 
															+    # 第二层的参数
														
 
															+    w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
														
 
															+    b2 = tf.Variable(tf.zeros([128]))
														
 
															+    # 第三层的参数
														
 
															+    w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
														
 
															+    b3 = tf.Variable(tf.zeros([10]))
														
 
															+    return w1, b1, w2, b2, w3, b3
														
 
															+
														
 
															+
														
 
															+def train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001):
														
 
															+    for step, (x, y) in enumerate(train_dataset):
														
 
															+        with tf.GradientTape() as tape:
														
 
															+            # 第一层计算， [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b,256] + [b, 256]
														
 
															+            h1 = x @ w1 + tf.broadcast_to(b1, (x.shape[0], 256))
														
 
															+            h1 = tf.nn.relu(h1)  # 通过激活函数
														
 
															+
														
 
															+            # 第二层计算， [b, 256] => [b, 128]
														
 
															+            h2 = h1 @ w2 + b2
														
 
															+            h2 = tf.nn.relu(h2)
														
 
															+            # 输出层计算， [b, 128] => [b, 10]
														
 
															+            out = h2 @ w3 + b3
														
 
															+
														
 
															+            # 计算网络输出与标签之间的均方差， mse = mean(sum(y-out)^2)
														
 
															+            # [b, 10]
														
 
															+            loss = tf.square(y - out)
														
 
															+            # 误差标量， mean: scalar
														
 
															+            loss = tf.reduce_mean(loss)
														
 
															+
														
 
															+            # 自动梯度，需要求梯度的张量有[w1, b1, w2, b2, w3, b3]
														
 
															+            grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
														
 
															+
														
 
															+        # 梯度更新， assign_sub 将当前值减去参数值，原地更新
														
 
															+        w1.assign_sub(lr * grads[0])
														
 
															+        b1.assign_sub(lr * grads[1])
														
 
															+        w2.assign_sub(lr * grads[2])
														
 
															+        b2.assign_sub(lr * grads[3])
														
 
															+        w3.assign_sub(lr * grads[4])
														
 
															+        b3.assign_sub(lr * grads[5])
														
 
															+
														
 
															+        if step % 100 == 0:
														
 
															+            print(epoch, step, 'loss:', loss.numpy())
														
 
															+
														
 
															+    return loss.numpy()
														
 
															+
														
 
															+
														
 
															+def train(epochs):
														
 
															+    losses = []
														
 
															+    train_dataset = load_data()
														
 
															+    w1, b1, w2, b2, w3, b3 = init_paramaters()
														
 
															+    for epoch in range(epochs):
														
 
															+        loss = train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001)
														
 
															+        losses.append(loss)
														
 
															+
														
 
															+    x = [i for i in range(0, epochs)]
														
 
															+    # 绘制曲线
														
 
															+    plt.plot(x, losses, color='blue', marker='s', label='训练')
														
 
															+    plt.xlabel('Epoch')
														
 
															+    plt.ylabel('MSE')
														
 
															+    plt.legend()
														
 
															+    plt.savefig('MNIST数据集的前向传播训练误差曲线.png')
														
 
															+    plt.close()
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    train(epochs=20)
														
--- a/ch04-TensorFlow基础/Broadcasting.pdf
+++ b/ch04-TensorFlow基础/Broadcasting.pdf
--- a/ch04-TensorFlow基础/MNIST数据集的前向传播训练误差曲线.png
+++ b/ch04-TensorFlow基础/MNIST数据集的前向传播训练误差曲线.png
--- a/ch04-TensorFlow基础/ch04-TensorFlow基础.ipynb
+++ b/ch04-TensorFlow基础/ch04-TensorFlow基础.ipynb
--- a/ch04-TensorFlow基础/创建Tensor.pdf
+++ b/ch04-TensorFlow基础/创建Tensor.pdf
--- a/ch04-TensorFlow基础/前向传播.pdf
+++ b/ch04-TensorFlow基础/前向传播.pdf
--- a/ch04-TensorFlow基础/数学运算.pdf
+++ b/ch04-TensorFlow基础/数学运算.pdf
--- a/ch04-TensorFlow基础/数据类型.pdf
+++ b/ch04-TensorFlow基础/数据类型.pdf
--- a/ch04-TensorFlow基础/索引与切片-1.pdf
+++ b/ch04-TensorFlow基础/索引与切片-1.pdf
--- a/ch04-TensorFlow基础/索引与切片-2.pdf
+++ b/ch04-TensorFlow基础/索引与切片-2.pdf
--- a/ch04-TensorFlow基础/维度变换.pdf
+++ b/ch04-TensorFlow基础/维度变换.pdf
--- a/ch05-TensorFlow进阶/acc_topk.py
+++ b/ch05-TensorFlow进阶/acc_topk.py
@@ -0,0 +1,37 @@
 
															+import  tensorflow as tf
														
 
															+import  os
														
 
															+
														
 
															+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
														
 
															+tf.random.set_seed(2467)
														
 
															+
														
 
															+def accuracy(output, target, topk=(1,)):
														
 
															+    maxk = max(topk)
														
 
															+    batch_size = target.shape[0]
														
 
															+
														
 
															+    pred = tf.math.top_k(output, maxk).indices
														
 
															+    pred = tf.transpose(pred, perm=[1, 0])
														
 
															+    target_ = tf.broadcast_to(target, pred.shape)
														
 
															+    # [10, b]
														
 
															+    correct = tf.equal(pred, target_)
														
 
															+
														
 
															+    res = []
														
 
															+    for k in topk:
														
 
															+        correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
														
 
															+        correct_k = tf.reduce_sum(correct_k)
														
 
															+        acc = float(correct_k* (100.0 / batch_size) )
														
 
															+        res.append(acc)
														
 
															+
														
 
															+    return res
														
 
															+
														
 
															+
														
 
															+
														
 
															+output = tf.random.normal([10, 6])
														
 
															+output = tf.math.softmax(output, axis=1)
														
 
															+target = tf.random.uniform([10], maxval=6, dtype=tf.int32)
														
 
															+print('prob:', output.numpy())
														
 
															+pred = tf.argmax(output, axis=1)
														
 
															+print('pred:', pred.numpy())
														
 
															+print('label:', target.numpy())
														
 
															+
														
 
															+acc = accuracy(output, target, topk=(1,2,3,4,5,6))
														
 
															+print('top-1-6 acc:', acc)
														
--- a/ch05-TensorFlow进阶/gradient_clip.py
+++ b/ch05-TensorFlow进阶/gradient_clip.py
@@ -0,0 +1,85 @@
 
															+import  tensorflow as tf
														
 
															+from    tensorflow import keras
														
 
															+from    tensorflow.keras import datasets, layers, optimizers
														
 
															+import  os
														
 
															+
														
 
															+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
														
 
															+print(tf.__version__)
														
 
															+
														
 
															+(x, y), _ = datasets.mnist.load_data()
														
 
															+x = tf.convert_to_tensor(x, dtype=tf.float32) / 50.
														
 
															+y = tf.convert_to_tensor(y)
														
 
															+y = tf.one_hot(y, depth=10)
														
 
															+print('x:', x.shape, 'y:', y.shape)
														
 
															+train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128).repeat(30)
														
 
															+x,y = next(iter(train_db))
														
 
															+print('sample:', x.shape, y.shape)
														
 
															+# print(x[0], y[0])
														
 
															+
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+
														
 
															+    # 784 => 512
														
 
															+    w1, b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)), tf.Variable(tf.zeros([512]))
														
 
															+    # 512 => 256
														
 
															+    w2, b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
														
 
															+    # 256 => 10
														
 
															+    w3, b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
														
 
															+
														
 
															+
														
 
															+
														
 
															+    optimizer = optimizers.SGD(lr=0.01)
														
 
															+
														
 
															+
														
 
															+    for step, (x,y) in enumerate(train_db):
														
 
															+
														
 
															+        # [b, 28, 28] => [b, 784]
														
 
															+        x = tf.reshape(x, (-1, 784))
														
 
															+
														
 
															+        with tf.GradientTape() as tape:
														
 
															+
														
 
															+            # layer1.
														
 
															+            h1 = x @ w1 + b1
														
 
															+            h1 = tf.nn.relu(h1)
														
 
															+            # layer2
														
 
															+            h2 = h1 @ w2 + b2
														
 
															+            h2 = tf.nn.relu(h2)
														
 
															+            # output
														
 
															+            out = h2 @ w3 + b3
														
 
															+            # out = tf.nn.relu(out)
														
 
															+
														
 
															+            # compute loss
														
 
															+            # [b, 10] - [b, 10]
														
 
															+            loss = tf.square(y-out)
														
 
															+            # [b, 10] => [b]
														
 
															+            loss = tf.reduce_mean(loss, axis=1)
														
 
															+            # [b] => scalar
														
 
															+            loss = tf.reduce_mean(loss)
														
 
															+
														
 
															+
														
 
															+
														
 
															+        # compute gradient
														
 
															+        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
														
 
															+        # print('==before==')
														
 
															+        # for g in grads:
														
 
															+        #     print(tf.norm(g))
														
 
															+        
														
 
															+        grads,  _ = tf.clip_by_global_norm(grads, 15)
														
 
															+
														
 
															+        # print('==after==')
														
 
															+        # for g in grads:
														
 
															+        #     print(tf.norm(g))
														
 
															+        # update w' = w - lr*grad
														
 
															+        optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w3, b3]))
														
 
															+
														
 
															+
														
 
															+
														
 
															+        if step % 100 == 0:
														
 
															+            print(step, 'loss:', float(loss))
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    main()
														
--- a/ch05-TensorFlow进阶/mnist_tensor.py
+++ b/ch05-TensorFlow进阶/mnist_tensor.py
--- a/ch05-TensorFlow进阶/合并与分割.pdf
+++ b/ch05-TensorFlow进阶/合并与分割.pdf
--- a/ch05-TensorFlow进阶/填充与复制.pdf
+++ b/ch05-TensorFlow进阶/填充与复制.pdf
--- a/ch05-TensorFlow进阶/张量排序.pdf
+++ b/ch05-TensorFlow进阶/张量排序.pdf
--- a/ch05-TensorFlow进阶/张量限幅.pdf
+++ b/ch05-TensorFlow进阶/张量限幅.pdf
--- a/ch05-TensorFlow进阶/数据统计.pdf
+++ b/ch05-TensorFlow进阶/数据统计.pdf
--- a/ch05-TensorFlow进阶/高阶特性.pdf
+++ b/ch05-TensorFlow进阶/高阶特性.pdf
--- a/ch05/nb.py
+++ b/ch05/nb.py
@@ -1,21 +0,0 @@
 
															-#%%
														
 
															-import  tensorflow as tf
														
 
															-from    tensorflow import keras
														
 
															-from    tensorflow.keras import datasets
														
 
															-import  os
														
 
															-
														
 
															-
														
 
															-#%%
														
 
															-a = tf.random.normal([4,35,8]) # 模拟成绩册A
														
 
															-b = tf.random.normal([6,35,8]) # 模拟成绩册B
														
 
															-tf.concat([a,b],axis=0) # 合并成绩册
														
 
															-
														
 
															-
														
 
															-#%%
														
 
															-x = tf.random.normal([2,784])
														
 
															-w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
														
 
															-b1 = tf.Variable(tf.zeros([256]))
														
 
															-o1 = tf.matmul(x,w1) + b1  #
														
 
															-o1 = tf.nn.relu(o1)
														
 
															-
														
 
															-#%%
														
--- a/ch06-神经网络/auto_efficency_regression.py
+++ b/ch06-神经网络/auto_efficency_regression.py
--- a/ch06-神经网络/ch06-神经网络.ipynb
+++ b/ch06-神经网络/ch06-神经网络.ipynb
--- a/ch06-神经网络/forward.py
+++ b/ch06-神经网络/forward.py
--- a/ch06-神经网络/nb.py
+++ b/ch06-神经网络/nb.py
--- a/ch06-神经网络/全接连层.pdf
+++ b/ch06-神经网络/全接连层.pdf
--- a/ch06-神经网络/误差计算.pdf
+++ b/ch06-神经网络/误差计算.pdf
--- a/ch06-神经网络/输出方式.pdf
+++ b/ch06-神经网络/输出方式.pdf
--- a/ch07-反向传播算法/0.梯度下降-简介.pdf
+++ b/ch07-反向传播算法/0.梯度下降-简介.pdf
--- a/ch07-反向传播算法/2.常见函数的梯度.pdf
+++ b/ch07-反向传播算法/2.常见函数的梯度.pdf
--- a/ch07-反向传播算法/2nd_derivative.py
+++ b/ch07-反向传播算法/2nd_derivative.py
@@ -0,0 +1,18 @@
 
															+import tensorflow as tf
														
 
															+
														
 
															+w = tf.Variable(1.0)
														
 
															+b = tf.Variable(2.0)
														
 
															+x = tf.Variable(3.0)
														
 
															+
														
 
															+with tf.GradientTape() as t1:
														
 
															+  with tf.GradientTape() as t2:
														
 
															+    y = x * w + b
														
 
															+  dy_dw, dy_db = t2.gradient(y, [w, b])
														
 
															+d2y_dw2 = t1.gradient(dy_dw, w)
														
 
															+
														
 
															+print(dy_dw)
														
 
															+print(dy_db)
														
 
															+print(d2y_dw2)
														
 
															+
														
 
															+assert dy_dw.numpy() == 3.0
														
 
															+assert d2y_dw2 is None
														
--- a/ch07-反向传播算法/3.激活函数及其梯度.pdf
+++ b/ch07-反向传播算法/3.激活函数及其梯度.pdf
--- a/ch07-反向传播算法/4.损失函数及其梯度.pdf
+++ b/ch07-反向传播算法/4.损失函数及其梯度.pdf
--- a/ch07-反向传播算法/5.单输出感知机梯度.pdf
+++ b/ch07-反向传播算法/5.单输出感知机梯度.pdf
--- a/ch07-反向传播算法/6.多输出感知机梯度.pdf
+++ b/ch07-反向传播算法/6.多输出感知机梯度.pdf
--- a/ch07-反向传播算法/7.链式法则.pdf
+++ b/ch07-反向传播算法/7.链式法则.pdf
--- a/ch07-反向传播算法/8.多层感知机梯度.pdf
+++ b/ch07-反向传播算法/8.多层感知机梯度.pdf
--- a/ch07-反向传播算法/ch07-反向传播算法.ipynb
+++ b/ch07-反向传播算法/ch07-反向传播算法.ipynb
--- a/ch07-反向传播算法/chain_rule.py
+++ b/ch07-反向传播算法/chain_rule.py
--- a/ch07-反向传播算法/crossentropy_loss.py
+++ b/ch07-反向传播算法/crossentropy_loss.py
@@ -0,0 +1,24 @@
 
															+import tensorflow as tf 
														
 
															+
														
 
															+
														
 
															+tf.random.set_seed(4323)
														
 
															+
														
 
															+x=tf.random.normal([1,3])
														
 
															+
														
 
															+w=tf.random.normal([3,2])
														
 
															+
														
 
															+b=tf.random.normal([2])
														
 
															+
														
 
															+y = tf.constant([0, 1])
														
 
															+
														
 
															+
														
 
															+with tf.GradientTape() as tape:
														
 
															+
														
 
															+	tape.watch([w, b])
														
 
															+	logits = (x@w+b)
														
 
															+	loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y, logits, from_logits=True))
														
 
															+
														
 
															+grads = tape.gradient(loss, [w, b])
														
 
															+print('w grad:', grads[0])
														
 
															+
														
 
															+print('b grad:', grads[1])
														
--- a/ch07-反向传播算法/himmelblau.py
+++ b/ch07-反向传播算法/himmelblau.py
--- a/ch07-反向传播算法/mse_grad.py
+++ b/ch07-反向传播算法/mse_grad.py
@@ -0,0 +1,26 @@
 
															+import tensorflow as tf 
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+x=tf.random.normal([1,3])
														
 
															+
														
 
															+w=tf.ones([3,2])
														
 
															+
														
 
															+b=tf.ones([2])
														
 
															+
														
 
															+y = tf.constant([0, 1])
														
 
															+
														
 
															+
														
 
															+with tf.GradientTape() as tape:
														
 
															+
														
 
															+	tape.watch([w, b])
														
 
															+	logits = tf.sigmoid(x@w+b) 
														
 
															+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
														
 
															+
														
 
															+grads = tape.gradient(loss, [w, b])
														
 
															+print('w grad:', grads[0])
														
 
															+
														
 
															+print('b grad:', grads[1])
														
 
															+
														
 
															+
														
--- a/ch07-反向传播算法/multi_output_perceptron.py
+++ b/ch07-反向传播算法/multi_output_perceptron.py
@@ -0,0 +1,26 @@
 
															+import tensorflow as tf 
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+x=tf.random.normal([1,3])
														
 
															+
														
 
															+w=tf.ones([3,2])
														
 
															+
														
 
															+b=tf.ones([2])
														
 
															+
														
 
															+y = tf.constant([0, 1])
														
 
															+
														
 
															+
														
 
															+with tf.GradientTape() as tape:
														
 
															+
														
 
															+	tape.watch([w, b])
														
 
															+	logits = tf.sigmoid(x@w+b) 
														
 
															+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
														
 
															+
														
 
															+grads = tape.gradient(loss, [w, b])
														
 
															+print('w grad:', grads[0])
														
 
															+
														
 
															+print('b grad:', grads[1])
														
 
															+
														
 
															+
														
--- a/ch07-反向传播算法/numpy-backward-prop.py
+++ b/ch07-反向传播算法/numpy-backward-prop.py
@@ -0,0 +1,223 @@
 
															+#!/usr/bin/env python
														
 
															+# encoding: utf-8
														
 
															+"""
														
 
															+@author: HuRuiFeng
														
 
															+@file: 7.9-backward-prop.py
														
 
															+@time: 2020/2/24 17:32
														
 
															+@desc: 7.9 反向传播算法实战的代码
														
 
															+"""
														
 
															+
														
 
															+import matplotlib.pyplot as plt
														
 
															+import numpy as np
														
 
															+import seaborn as sns
														
 
															+from sklearn.datasets import make_moons
														
 
															+from sklearn.model_selection import train_test_split
														
 
															+
														
 
															+plt.rcParams['font.size'] = 16
														
 
															+plt.rcParams['font.family'] = ['STKaiti']
														
 
															+plt.rcParams['axes.unicode_minus'] = False
														
 
															+
														
 
															+
														
 
															+def load_dataset():
														
 
															+    # 采样点数
														
 
															+    N_SAMPLES = 2000
														
 
															+    # 测试数量比率
														
 
															+    TEST_SIZE = 0.3
														
 
															+    # 利用工具函数直接生成数据集
														
 
															+    X, y = make_moons(n_samples=N_SAMPLES, noise=0.2, random_state=100)
														
 
															+    # 将 2000 个点按着 7:3 分割为训练集和测试集
														
 
															+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
														
 
															+    return X, y, X_train, X_test, y_train, y_test
														
 
															+
														
 
															+
														
 
															+def make_plot(X, y, plot_name, XX=None, YY=None, preds=None, dark=False):
														
 
															+    # 绘制数据集的分布， X 为 2D 坐标， y 为数据点的标签
														
 
															+    if (dark):
														
 
															+        plt.style.use('dark_background')
														
 
															+    else:
														
 
															+        sns.set_style("whitegrid")
														
 
															+    plt.figure(figsize=(16, 12))
														
 
															+    axes = plt.gca()
														
 
															+    axes.set(xlabel="$x_1$", ylabel="$x_2$")
														
 
															+    plt.title(plot_name, fontsize=30)
														
 
															+    plt.subplots_adjust(left=0.20)
														
 
															+    plt.subplots_adjust(right=0.80)
														
 
															+    if XX is not None and YY is not None and preds is not None:
														
 
															+        plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha=1, cmap=plt.cm.Spectral)
														
 
															+        plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
														
 
															+    # 绘制散点图，根据标签区分颜色
														
 
															+    plt.scatter(X[:, 0], X[:, 1], c=y.ravel(), s=40, cmap=plt.cm.Spectral, edgecolors='none')
														
 
															+    plt.savefig('数据集分布.svg')
														
 
															+    plt.close()
														
 
															+
														
 
															+
														
 
															+class Layer:
														
 
															+    # 全连接网络层
														
 
															+    def __init__(self, n_input, n_neurons, activation=None, weights=None,
														
 
															+                 bias=None):
														
 
															+        """
														
 
															+        :param int n_input: 输入节点数
														
 
															+        :param int n_neurons: 输出节点数
														
 
															+        :param str activation: 激活函数类型
														
 
															+        :param weights: 权值张量，默认类内部生成
														
 
															+        :param bias: 偏置，默认类内部生成
														
 
															+        """
														
 
															+        # 通过正态分布初始化网络权值，初始化非常重要，不合适的初始化将导致网络不收敛
														
 
															+        self.weights = weights if weights is not None else np.random.randn(n_input, n_neurons) * np.sqrt(1 / n_neurons)
														
 
															+        self.bias = bias if bias is not None else np.random.rand(n_neurons) * 0.1
														
 
															+        self.activation = activation  # 激活函数类型，如’sigmoid’
														
 
															+        self.last_activation = None  # 激活函数的输出值o
														
 
															+        self.error = None  # 用于计算当前层的delta 变量的中间变量
														
 
															+        self.delta = None  # 记录当前层的delta 变量，用于计算梯度
														
 
															+
														
 
															+    # 网络层的前向传播函数实现如下，其中last_activation 变量用于保存当前层的输出值：
														
 
															+    def activate(self, x):
														
 
															+        # 前向传播函数
														
 
															+        r = np.dot(x, self.weights) + self.bias  # X@W+b
														
 
															+        # 通过激活函数，得到全连接层的输出o
														
 
															+        self.last_activation = self._apply_activation(r)
														
 
															+        return self.last_activation
														
 
															+
														
 
															+    # 上述代码中的self._apply_activation 函数实现了不同类型的激活函数的前向计算过程，
														
 
															+    # 尽管此处我们只使用Sigmoid 激活函数一种。代码如下：
														
 
															+    def _apply_activation(self, r):
														
 
															+        # 计算激活函数的输出
														
 
															+        if self.activation is None:
														
 
															+            return r  # 无激活函数，直接返回
														
 
															+        # ReLU 激活函数
														
 
															+        elif self.activation == 'relu':
														
 
															+            return np.maximum(r, 0)
														
 
															+        # tanh 激活函数
														
 
															+        elif self.activation == 'tanh':
														
 
															+            return np.tanh(r)
														
 
															+        # sigmoid 激活函数
														
 
															+        elif self.activation == 'sigmoid':
														
 
															+            return 1 / (1 + np.exp(-r))
														
 
															+        return r
														
 
															+
														
 
															+    # 针对于不同类型的激活函数，它们的导数计算实现如下：
														
 
															+    def apply_activation_derivative(self, r):
														
 
															+        # 计算激活函数的导数
														
 
															+        # 无激活函数，导数为1
														
 
															+        if self.activation is None:
														
 
															+            return np.ones_like(r)
														
 
															+        # ReLU 函数的导数实现
														
 
															+        elif self.activation == 'relu':
														
 
															+            grad = np.array(r, copy=True)
														
 
															+            grad[r > 0] = 1.
														
 
															+            grad[r <= 0] = 0.
														
 
															+            return grad
														
 
															+        # tanh 函数的导数实现
														
 
															+        elif self.activation == 'tanh':
														
 
															+            return 1 - r ** 2
														
 
															+        # Sigmoid 函数的导数实现
														
 
															+        elif self.activation == 'sigmoid':
														
 
															+            return r * (1 - r)
														
 
															+        return r
														
 
															+
														
 
															+
														
 
															+# 神经网络模型
														
 
															+class NeuralNetwork:
														
 
															+    def __init__(self):
														
 
															+        self._layers = []  # 网络层对象列表
														
 
															+
														
 
															+    def add_layer(self, layer):
														
 
															+        # 追加网络层
														
 
															+        self._layers.append(layer)
														
 
															+
														
 
															+    # 网络的前向传播只需要循环调各个网络层对象的前向计算函数即可，代码如下：
														
 
															+    # 前向传播
														
 
															+    def feed_forward(self, X):
														
 
															+        for layer in self._layers:
														
 
															+            # 依次通过各个网络层
														
 
															+            X = layer.activate(X)
														
 
															+        return X
														
 
															+
														
 
															+    def backpropagation(self, X, y, learning_rate):
														
 
															+        # 反向传播算法实现
														
 
															+        # 前向计算，得到输出值
														
 
															+        output = self.feed_forward(X)
														
 
															+        for i in reversed(range(len(self._layers))):  # 反向循环
														
 
															+            layer = self._layers[i]  # 得到当前层对象
														
 
															+            # 如果是输出层
														
 
															+            if layer == self._layers[-1]:  # 对于输出层
														
 
															+                layer.error = y - output  # 计算2 分类任务的均方差的导数
														
 
															+                # 关键步骤：计算最后一层的delta，参考输出层的梯度公式
														
 
															+                layer.delta = layer.error * layer.apply_activation_derivative(output)
														
 
															+            else:  # 如果是隐藏层
														
 
															+                next_layer = self._layers[i + 1]  # 得到下一层对象
														
 
															+                layer.error = np.dot(next_layer.weights, next_layer.delta)
														
 
															+                # 关键步骤：计算隐藏层的delta，参考隐藏层的梯度公式
														
 
															+                layer.delta = layer.error * layer.apply_activation_derivative(layer.last_activation)
														
 
															+
														
 
															+        # 循环更新权值
														
 
															+        for i in range(len(self._layers)):
														
 
															+            layer = self._layers[i]
														
 
															+            # o_i 为上一网络层的输出
														
 
															+            o_i = np.atleast_2d(X if i == 0 else self._layers[i - 1].last_activation)
														
 
															+            # 梯度下降算法，delta 是公式中的负数，故这里用加号
														
 
															+            layer.weights += layer.delta * o_i.T * learning_rate
														
 
															+
														
 
															+    def train(self, X_train, X_test, y_train, y_test, learning_rate, max_epochs):
														
 
															+        # 网络训练函数
														
 
															+        # one-hot 编码
														
 
															+        y_onehot = np.zeros((y_train.shape[0], 2))
														
 
															+        y_onehot[np.arange(y_train.shape[0]), y_train] = 1
														
 
															+
														
 
															+        # 将One-hot 编码后的真实标签与网络的输出计算均方误差，并调用反向传播函数更新网络参数，循环迭代训练集1000 遍即可
														
 
															+        mses = []
														
 
															+        accuracys = []
														
 
															+        for i in range(max_epochs + 1):  # 训练1000 个epoch
														
 
															+            for j in range(len(X_train)):  # 一次训练一个样本
														
 
															+                self.backpropagation(X_train[j], y_onehot[j], learning_rate)
														
 
															+            if i % 10 == 0:
														
 
															+                # 打印出MSE Loss
														
 
															+                mse = np.mean(np.square(y_onehot - self.feed_forward(X_train)))
														
 
															+                mses.append(mse)
														
 
															+                accuracy = self.accuracy(self.predict(X_test), y_test.flatten())
														
 
															+                accuracys.append(accuracy)
														
 
															+                print('Epoch: #%s, MSE: %f' % (i, float(mse)))
														
 
															+                # 统计并打印准确率
														
 
															+                print('Accuracy: %.2f%%' % (accuracy * 100))
														
 
															+        return mses, accuracys
														
 
															+
														
 
															+    def predict(self, X):
														
 
															+        return self.feed_forward(X)
														
 
															+
														
 
															+    def accuracy(self, X, y):
														
 
															+        return np.sum(np.equal(np.argmax(X, axis=1), y)) / y.shape[0]
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    X, y, X_train, X_test, y_train, y_test = load_dataset()
														
 
															+    # 调用 make_plot 函数绘制数据的分布，其中 X 为 2D 坐标， y 为标签
														
 
															+    make_plot(X, y, "Classification Dataset Visualization ")
														
 
															+    plt.show()
														
 
															+    nn = NeuralNetwork()  # 实例化网络类
														
 
															+    nn.add_layer(Layer(2, 25, 'sigmoid'))  # 隐藏层 1, 2=>25
														
 
															+    nn.add_layer(Layer(25, 50, 'sigmoid'))  # 隐藏层 2, 25=>50
														
 
															+    nn.add_layer(Layer(50, 25, 'sigmoid'))  # 隐藏层 3, 50=>25
														
 
															+    nn.add_layer(Layer(25, 2, 'sigmoid'))  # 输出层, 25=>2
														
 
															+    mses, accuracys = nn.train(X_train, X_test, y_train, y_test, 0.01, 1000)
														
 
															+
														
 
															+    x = [i for i in range(0, 101, 10)]
														
 
															+
														
 
															+    # 绘制MES曲线
														
 
															+    plt.title("MES Loss")
														
 
															+    plt.plot(x, mses[:11], color='blue')
														
 
															+    plt.xlabel('Epoch')
														
 
															+    plt.ylabel('MSE')
														
 
															+    plt.savefig('训练误差曲线.svg')
														
 
															+    plt.close()
														
 
															+
														
 
															+    # 绘制Accuracy曲线
														
 
															+    plt.title("Accuracy")
														
 
															+    plt.plot(x, accuracys[:11], color='blue')
														
 
															+    plt.xlabel('Epoch')
														
 
															+    plt.ylabel('Accuracy')
														
 
															+    plt.savefig('网络测试准确率.svg')
														
 
															+    plt.close()
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    main()
														
--- a/ch07-反向传播算法/sigmoid_grad.py
+++ b/ch07-反向传播算法/sigmoid_grad.py
@@ -0,0 +1,14 @@
 
															+import tensorflow as tf 
														
 
															+
														
 
															+
														
 
															+a = tf.linspace(-10., 10., 10)
														
 
															+
														
 
															+with tf.GradientTape() as tape:
														
 
															+	tape.watch(a)
														
 
															+	y = tf.sigmoid(a)
														
 
															+
														
 
															+
														
 
															+grads = tape.gradient(y, [a])
														
 
															+print('x:', a.numpy())
														
 
															+print('y:', y.numpy())
														
 
															+print('grad:', grads[0].numpy())
														
--- a/ch07-反向传播算法/single_output_perceptron.py
+++ b/ch07-反向传播算法/single_output_perceptron.py
@@ -0,0 +1,26 @@
 
															+import tensorflow as tf 
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+x=tf.random.normal([1,3])
														
 
															+
														
 
															+w=tf.ones([3,1])
														
 
															+
														
 
															+b=tf.ones([1])
														
 
															+
														
 
															+y = tf.constant([1])
														
 
															+
														
 
															+
														
 
															+with tf.GradientTape() as tape:
														
 
															+
														
 
															+	tape.watch([w, b])
														
 
															+	logits = tf.sigmoid(x@w+b) 
														
 
															+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
														
 
															+
														
 
															+grads = tape.gradient(loss, [w, b])
														
 
															+print('w grad:', grads[0])
														
 
															+
														
 
															+print('b grad:', grads[1])
														
 
															+
														
 
															+
														
--- a/ch07/nb.py
+++ b/ch07/nb.py
@@ -1,11 +0,0 @@
 
															-#%%
														
 
															-import  tensorflow as tf
														
 
															-from    tensorflow import keras
														
 
															-from    tensorflow.keras import datasets, layers
														
 
															-
														
 
															-#%%
														
 
															-def sigmoid(x): # sigmoid函数，也可以直接使用tf.nn.sigmoid
														
 
															-    return 1 / (1 + tf.math.exp(-x))
														
 
															-
														
 
															-def derivative(x): # sigmoid导数的计算
														
 
															-    return sigmoid(x)*(1-sigmoid(x))
														
--- a/ch08-Keras高层接口/1.Metrics.pdf
+++ b/ch08-Keras高层接口/1.Metrics.pdf
--- a/ch08-Keras高层接口/2.Compile&Fit.pdf
+++ b/ch08-Keras高层接口/2.Compile&Fit.pdf
--- a/ch08-Keras高层接口/3.自定义层.pdf
+++ b/ch08-Keras高层接口/3.自定义层.pdf
--- a/ch08-Keras高层接口/Keras实战CIFAR10.pdf
+++ b/ch08-Keras高层接口/Keras实战CIFAR10.pdf
--- a/ch08-Keras高层接口/compile_fit.py
+++ b/ch08-Keras高层接口/compile_fit.py
@@ -0,0 +1,60 @@
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+    """
														
 
															+    x is a simple image, not a batch
														
 
															+    """
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    x = tf.reshape(x, [28*28])
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+    y = tf.one_hot(y, depth=10)
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz)
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+sample = next(iter(db))
														
 
															+print(sample[0].shape, sample[1].shape)
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+		metrics=['accuracy']
														
 
															+	)
														
 
															+
														
 
															+network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2)
														
 
															+ 
														
 
															+network.evaluate(ds_val)
														
 
															+
														
 
															+sample = next(iter(ds_val))
														
 
															+x = sample[0]
														
 
															+y = sample[1] # one-hot
														
 
															+pred = network.predict(x) # [b, 10]
														
 
															+# convert back to number 
														
 
															+y = tf.argmax(y, axis=1)
														
 
															+pred = tf.argmax(pred, axis=1)
														
 
															+
														
 
															+print(pred)
														
 
															+print(y)
														
--- a/ch08-Keras高层接口/keras_train.py
+++ b/ch08-Keras高层接口/keras_train.py
@@ -0,0 +1,107 @@
 
															+import  os
														
 
															+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
														
 
															+
														
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+from 	tensorflow import keras
														
 
															+
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+    # [0~255] => [-1~1]
														
 
															+    x = 2 * tf.cast(x, dtype=tf.float32) / 255. - 1.
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+# [50k, 32, 32, 3], [10k, 1]
														
 
															+(x, y), (x_val, y_val) = datasets.cifar10.load_data()
														
 
															+y = tf.squeeze(y)
														
 
															+y_val = tf.squeeze(y_val)
														
 
															+y = tf.one_hot(y, depth=10) # [50k, 10]
														
 
															+y_val = tf.one_hot(y_val, depth=10) # [10k, 10]
														
 
															+print('datasets:', x.shape, y.shape, x_val.shape, y_val.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+train_db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+train_db = train_db.map(preprocess).shuffle(10000).batch(batchsz)
														
 
															+test_db = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+test_db = test_db.map(preprocess).batch(batchsz)
														
 
															+
														
 
															+
														
 
															+sample = next(iter(train_db))
														
 
															+print('batch:', sample[0].shape, sample[1].shape)
														
 
															+
														
 
															+
														
 
															+class MyDense(layers.Layer):
														
 
															+    # to replace standard layers.Dense()
														
 
															+    def __init__(self, inp_dim, outp_dim):
														
 
															+        super(MyDense, self).__init__()
														
 
															+
														
 
															+        self.kernel = self.add_variable('w', [inp_dim, outp_dim])
														
 
															+        # self.bias = self.add_variable('b', [outp_dim])
														
 
															+
														
 
															+    def call(self, inputs, training=None):
														
 
															+
														
 
															+        x = inputs @ self.kernel
														
 
															+        return x
														
 
															+
														
 
															+class MyNetwork(keras.Model):
														
 
															+
														
 
															+    def __init__(self):
														
 
															+        super(MyNetwork, self).__init__()
														
 
															+
														
 
															+        self.fc1 = MyDense(32*32*3, 256)
														
 
															+        self.fc2 = MyDense(256, 128)
														
 
															+        self.fc3 = MyDense(128, 64)
														
 
															+        self.fc4 = MyDense(64, 32)
														
 
															+        self.fc5 = MyDense(32, 10)
														
 
															+
														
 
															+
														
 
															+
														
 
															+    def call(self, inputs, training=None):
														
 
															+        """
														
 
															+
														
 
															+        :param inputs: [b, 32, 32, 3]
														
 
															+        :param training:
														
 
															+        :return:
														
 
															+        """
														
 
															+        x = tf.reshape(inputs, [-1, 32*32*3])
														
 
															+        # [b, 32*32*3] => [b, 256]
														
 
															+        x = self.fc1(x)
														
 
															+        x = tf.nn.relu(x)
														
 
															+        # [b, 256] => [b, 128]
														
 
															+        x = self.fc2(x)
														
 
															+        x = tf.nn.relu(x)
														
 
															+        # [b, 128] => [b, 64]
														
 
															+        x = self.fc3(x)
														
 
															+        x = tf.nn.relu(x)
														
 
															+        # [b, 64] => [b, 32]
														
 
															+        x = self.fc4(x)
														
 
															+        x = tf.nn.relu(x)
														
 
															+        # [b, 32] => [b, 10]
														
 
															+        x = self.fc5(x)
														
 
															+
														
 
															+        return x
														
 
															+
														
 
															+
														
 
															+network = MyNetwork()
														
 
															+network.compile(optimizer=optimizers.Adam(lr=1e-3),
														
 
															+                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+                metrics=['accuracy'])
														
 
															+network.fit(train_db, epochs=15, validation_data=test_db, validation_freq=1)
														
 
															+
														
 
															+network.evaluate(test_db)
														
 
															+network.save_weights('ckpt/weights.ckpt')
														
 
															+del network
														
 
															+print('saved to ckpt/weights.ckpt')
														
 
															+
														
 
															+
														
 
															+network = MyNetwork()
														
 
															+network.compile(optimizer=optimizers.Adam(lr=1e-3),
														
 
															+                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+                metrics=['accuracy'])
														
 
															+network.load_weights('ckpt/weights.ckpt')
														
 
															+print('loaded weights from file.')
														
 
															+network.evaluate(test_db)
														
--- a/ch08-Keras高层接口/layer_model.py
+++ b/ch08-Keras高层接口/layer_model.py
@@ -0,0 +1,102 @@
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+from 	tensorflow import keras
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+    """
														
 
															+    x is a simple image, not a batch
														
 
															+    """
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    x = tf.reshape(x, [28*28])
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+    y = tf.one_hot(y, depth=10)
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz)
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+sample = next(iter(db))
														
 
															+print(sample[0].shape, sample[1].shape)
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+
														
 
															+class MyDense(layers.Layer):
														
 
															+
														
 
															+	def __init__(self, inp_dim, outp_dim):
														
 
															+		super(MyDense, self).__init__()
														
 
															+
														
 
															+		self.kernel = self.add_weight('w', [inp_dim, outp_dim])
														
 
															+		self.bias = self.add_weight('b', [outp_dim])
														
 
															+
														
 
															+	def call(self, inputs, training=None):
														
 
															+
														
 
															+		out = inputs @ self.kernel + self.bias
														
 
															+
														
 
															+		return out 
														
 
															+
														
 
															+class MyModel(keras.Model):
														
 
															+
														
 
															+	def __init__(self):
														
 
															+		super(MyModel, self).__init__()
														
 
															+
														
 
															+		self.fc1 = MyDense(28*28, 256)
														
 
															+		self.fc2 = MyDense(256, 128)
														
 
															+		self.fc3 = MyDense(128, 64)
														
 
															+		self.fc4 = MyDense(64, 32)
														
 
															+		self.fc5 = MyDense(32, 10)
														
 
															+
														
 
															+	def call(self, inputs, training=None):
														
 
															+
														
 
															+		x = self.fc1(inputs)
														
 
															+		x = tf.nn.relu(x)
														
 
															+		x = self.fc2(x)
														
 
															+		x = tf.nn.relu(x)
														
 
															+		x = self.fc3(x)
														
 
															+		x = tf.nn.relu(x)
														
 
															+		x = self.fc4(x)
														
 
															+		x = tf.nn.relu(x)
														
 
															+		x = self.fc5(x) 
														
 
															+
														
 
															+		return x
														
 
															+
														
 
															+
														
 
															+network = MyModel()
														
 
															+
														
 
															+
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+		metrics=['accuracy']
														
 
															+	)
														
 
															+
														
 
															+network.fit(db, epochs=5, validation_data=ds_val,
														
 
															+              validation_freq=2)
														
 
															+ 
														
 
															+network.evaluate(ds_val)
														
 
															+
														
 
															+sample = next(iter(ds_val))
														
 
															+x = sample[0]
														
 
															+y = sample[1] # one-hot
														
 
															+pred = network.predict(x) # [b, 10]
														
 
															+# convert back to number 
														
 
															+y = tf.argmax(y, axis=1)
														
 
															+pred = tf.argmax(pred, axis=1)
														
 
															+
														
 
															+print(pred)
														
 
															+print(y)
														
--- a/ch08-Keras高层接口/metrics.py
+++ b/ch08-Keras高层接口/metrics.py
@@ -0,0 +1,92 @@
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
														
 
															+
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+optimizer = optimizers.Adam(lr=0.01)
														
 
															+
														
 
															+acc_meter = metrics.Accuracy()
														
 
															+loss_meter = metrics.Mean()
														
 
															+
														
 
															+
														
 
															+for step, (x,y) in enumerate(db):
														
 
															+
														
 
															+    with tf.GradientTape() as tape:
														
 
															+        # [b, 28, 28] => [b, 784]
														
 
															+        x = tf.reshape(x, (-1, 28*28))
														
 
															+        # [b, 784] => [b, 10]
														
 
															+        out = network(x)
														
 
															+        # [b] => [b, 10]
														
 
															+        y_onehot = tf.one_hot(y, depth=10) 
														
 
															+        # [b]
														
 
															+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
														
 
															+
														
 
															+        loss_meter.update_state(loss)
														
 
															+
														
 
															+ 
														
 
															+
														
 
															+    grads = tape.gradient(loss, network.trainable_variables)
														
 
															+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
														
 
															+
														
 
															+
														
 
															+    if step % 100 == 0:
														
 
															+
														
 
															+        print(step, 'loss:', loss_meter.result().numpy()) 
														
 
															+        loss_meter.reset_states()
														
 
															+
														
 
															+
														
 
															+    # evaluate
														
 
															+    if step % 500 == 0:
														
 
															+        total, total_correct = 0., 0
														
 
															+        acc_meter.reset_states()
														
 
															+
														
 
															+        for step, (x, y) in enumerate(ds_val): 
														
 
															+            # [b, 28, 28] => [b, 784]
														
 
															+            x = tf.reshape(x, (-1, 28*28))
														
 
															+            # [b, 784] => [b, 10]
														
 
															+            out = network(x) 
														
 
															+
														
 
															+
														
 
															+            # [b, 10] => [b] 
														
 
															+            pred = tf.argmax(out, axis=1) 
														
 
															+            pred = tf.cast(pred, dtype=tf.int32)
														
 
															+            # bool type 
														
 
															+            correct = tf.equal(pred, y)
														
 
															+            # bool tensor => int tensor => numpy
														
 
															+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
														
 
															+            total += x.shape[0]
														
 
															+
														
 
															+            acc_meter.update_state(y, pred)
														
 
															+
														
 
															+
														
 
															+        print(step, 'Evaluate Acc:', total_correct/total, acc_meter.result().numpy())
														
--- a/ch08-Keras高层接口/nb.py
+++ b/ch08-Keras高层接口/nb.py
--- a/ch08-Keras高层接口/pretained.py
+++ b/ch08-Keras高层接口/pretained.py
--- a/ch08-Keras高层接口/save_load_model.py
+++ b/ch08-Keras高层接口/save_load_model.py
@@ -0,0 +1,69 @@
 
															+import  os
														
 
															+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
														
 
															+
														
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+    """
														
 
															+    x is a simple image, not a batch
														
 
															+    """
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    x = tf.reshape(x, [28*28])
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+    y = tf.one_hot(y, depth=10)
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz)
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+sample = next(iter(db))
														
 
															+print(sample[0].shape, sample[1].shape)
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+		metrics=['accuracy']
														
 
															+	)
														
 
															+
														
 
															+network.fit(db, epochs=3, validation_data=ds_val, validation_freq=2)
														
 
															+ 
														
 
															+network.evaluate(ds_val)
														
 
															+
														
 
															+network.save('model.h5')
														
 
															+print('saved total model.')
														
 
															+del network
														
 
															+
														
 
															+print('loaded model from file.')
														
 
															+network = tf.keras.models.load_model('model.h5', compile=False)
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+        loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+        metrics=['accuracy']
														
 
															+    )
														
 
															+x_val = tf.cast(x_val, dtype=tf.float32) / 255.
														
 
															+x_val = tf.reshape(x_val, [-1, 28*28])
														
 
															+y_val = tf.cast(y_val, dtype=tf.int32)
														
 
															+y_val = tf.one_hot(y_val, depth=10)
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(128)
														
 
															+network.evaluate(ds_val)
														
--- a/ch08-Keras高层接口/save_load_weight.py
+++ b/ch08-Keras高层接口/save_load_weight.py
@@ -0,0 +1,69 @@
 
															+import  os
														
 
															+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
														
 
															+
														
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+    """
														
 
															+    x is a simple image, not a batch
														
 
															+    """
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    x = tf.reshape(x, [28*28])
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+    y = tf.one_hot(y, depth=10)
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz)
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+sample = next(iter(db))
														
 
															+print(sample[0].shape, sample[1].shape)
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+		metrics=['accuracy']
														
 
															+	)
														
 
															+
														
 
															+network.fit(db, epochs=3, validation_data=ds_val, validation_freq=2)
														
 
															+ 
														
 
															+network.evaluate(ds_val)
														
 
															+
														
 
															+network.save_weights('weights.ckpt')
														
 
															+print('saved weights.')
														
 
															+del network
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+		metrics=['accuracy']
														
 
															+	)
														
 
															+network.load_weights('weights.ckpt')
														
 
															+print('loaded weights!')
														
 
															+network.evaluate(ds_val)
														
--- a/ch08-Keras高层接口/模型加载与保存.pdf
+++ b/ch08-Keras高层接口/模型加载与保存.pdf
--- a/ch09-过拟合/9.8-over-fitting-and-under-fitting.py
+++ b/ch09-过拟合/9.8-over-fitting-and-under-fitting.py
@@ -0,0 +1,224 @@
 
															+#!/usr/bin/env python
														
 
															+# encoding: utf-8
														
 
															+"""
														
 
															+@author: HuRuiFeng
														
 
															+@file: 9.8-over-fitting-and-under-fitting.py
														
 
															+@time: 2020/2/25 21:14
														
 
															+@desc: 9.8 过拟合问题实战的代码
														
 
															+       from mpl_toolkits.mplot3d import Axes3D 这个必须添加，解决3d报错问题
														
 
															+"""
														
 
															+
														
 
															+import matplotlib.pyplot as plt
														
 
															+# 导入数据集生成工具
														
 
															+import numpy as np
														
 
															+import seaborn as sns
														
 
															+from sklearn.datasets import make_moons
														
 
															+from sklearn.model_selection import train_test_split
														
 
															+from tensorflow.keras import layers, Sequential, regularizers
														
 
															+from mpl_toolkits.mplot3d import Axes3D
														
 
															+
														
 
															+plt.rcParams['font.size'] = 16
														
 
															+plt.rcParams['font.family'] = ['STKaiti']
														
 
															+plt.rcParams['axes.unicode_minus'] = False
														
 
															+
														
 
															+OUTPUT_DIR = 'output_dir'
														
 
															+N_EPOCHS = 500
														
 
															+
														
 
															+
														
 
															+def load_dataset():
														
 
															+    # 采样点数
														
 
															+    N_SAMPLES = 1000
														
 
															+    # 测试数量比率
														
 
															+    TEST_SIZE = None
														
 
															+
														
 
															+    # 从 moon 分布中随机采样 1000 个点，并切分为训练集-测试集
														
 
															+    X, y = make_moons(n_samples=N_SAMPLES, noise=0.25, random_state=100)
														
 
															+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
														
 
															+    return X, y, X_train, X_test, y_train, y_test
														
 
															+
														
 
															+
														
 
															+def make_plot(X, y, plot_name, file_name, XX=None, YY=None, preds=None, dark=False, output_dir=OUTPUT_DIR):
														
 
															+    # 绘制数据集的分布， X 为 2D 坐标， y 为数据点的标签
														
 
															+    if dark:
														
 
															+        plt.style.use('dark_background')
														
 
															+    else:
														
 
															+        sns.set_style("whitegrid")
														
 
															+    axes = plt.gca()
														
 
															+    axes.set_xlim([-2, 3])
														
 
															+    axes.set_ylim([-1.5, 2])
														
 
															+    axes.set(xlabel="$x_1$", ylabel="$x_2$")
														
 
															+    plt.title(plot_name, fontsize=20, fontproperties='SimHei')
														
 
															+    plt.subplots_adjust(left=0.20)
														
 
															+    plt.subplots_adjust(right=0.80)
														
 
															+    if XX is not None and YY is not None and preds is not None:
														
 
															+        plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha=0.08, cmap=plt.cm.Spectral)
														
 
															+        plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
														
 
															+    # 绘制散点图，根据标签区分颜色m=markers
														
 
															+    markers = ['o' if i == 1 else 's' for i in y.ravel()]
														
 
															+    mscatter(X[:, 0], X[:, 1], c=y.ravel(), s=20, cmap=plt.cm.Spectral, edgecolors='none', m=markers, ax=axes)
														
 
															+    # 保存矢量图
														
 
															+    plt.savefig(output_dir + '/' + file_name)
														
 
															+    plt.close()
														
 
															+
														
 
															+
														
 
															+def mscatter(x, y, ax=None, m=None, **kw):
														
 
															+    import matplotlib.markers as mmarkers
														
 
															+    if not ax: ax = plt.gca()
														
 
															+    sc = ax.scatter(x, y, **kw)
														
 
															+    if (m is not None) and (len(m) == len(x)):
														
 
															+        paths = []
														
 
															+        for marker in m:
														
 
															+            if isinstance(marker, mmarkers.MarkerStyle):
														
 
															+                marker_obj = marker
														
 
															+            else:
														
 
															+                marker_obj = mmarkers.MarkerStyle(marker)
														
 
															+            path = marker_obj.get_path().transformed(
														
 
															+                marker_obj.get_transform())
														
 
															+            paths.append(path)
														
 
															+        sc.set_paths(paths)
														
 
															+    return sc
														
 
															+
														
 
															+
														
 
															+def network_layers_influence(X_train, y_train):
														
 
															+    # 构建 5 种不同层数的网络
														
 
															+    for n in range(5):
														
 
															+        # 创建容器
														
 
															+        model = Sequential()
														
 
															+        # 创建第一层
														
 
															+        model.add(layers.Dense(8, input_dim=2, activation='relu'))
														
 
															+        # 添加 n 层，共 n+2 层
														
 
															+        for _ in range(n):
														
 
															+            model.add(layers.Dense(32, activation='relu'))
														
 
															+        # 创建最末层
														
 
															+        model.add(layers.Dense(1, activation='sigmoid'))
														
 
															+        # 模型装配与训练
														
 
															+        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
														
 
															+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
														
 
															+        # 绘制不同层数的网络决策边界曲线
														
 
															+        # 可视化的 x 坐标范围为[-2, 3]
														
 
															+        xx = np.arange(-2, 3, 0.01)
														
 
															+        # 可视化的 y 坐标范围为[-1.5, 2]
														
 
															+        yy = np.arange(-1.5, 2, 0.01)
														
 
															+        # 生成 x-y 平面采样网格点，方便可视化
														
 
															+        XX, YY = np.meshgrid(xx, yy)
														
 
															+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
														
 
															+        title = "网络层数：{0}".format(2 + n)
														
 
															+        file = "网络容量_%i.png" % (2 + n)
														
 
															+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/network_layers')
														
 
															+
														
 
															+
														
 
															+def dropout_influence(X_train, y_train):
														
 
															+    # 构建 5 种不同数量 Dropout 层的网络
														
 
															+    for n in range(5):
														
 
															+        # 创建容器
														
 
															+        model = Sequential()
														
 
															+        # 创建第一层
														
 
															+        model.add(layers.Dense(8, input_dim=2, activation='relu'))
														
 
															+        counter = 0
														
 
															+        # 网络层数固定为 5
														
 
															+        for _ in range(5):
														
 
															+            model.add(layers.Dense(64, activation='relu'))
														
 
															+        # 添加 n 个 Dropout 层
														
 
															+        if counter < n:
														
 
															+            counter += 1
														
 
															+            model.add(layers.Dropout(rate=0.5))
														
 
															+
														
 
															+        # 输出层
														
 
															+        model.add(layers.Dense(1, activation='sigmoid'))
														
 
															+        # 模型装配
														
 
															+        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
														
 
															+        # 训练
														
 
															+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
														
 
															+        # 绘制不同 Dropout 层数的决策边界曲线
														
 
															+        # 可视化的 x 坐标范围为[-2, 3]
														
 
															+        xx = np.arange(-2, 3, 0.01)
														
 
															+        # 可视化的 y 坐标范围为[-1.5, 2]
														
 
															+        yy = np.arange(-1.5, 2, 0.01)
														
 
															+        # 生成 x-y 平面采样网格点，方便可视化
														
 
															+        XX, YY = np.meshgrid(xx, yy)
														
 
															+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
														
 
															+        title = "无Dropout层" if n == 0 else "{0}层 Dropout层".format(n)
														
 
															+        file = "Dropout_%i.png" % n
														
 
															+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/dropout')
														
 
															+
														
 
															+
														
 
															+def build_model_with_regularization(_lambda):
														
 
															+    # 创建带正则化项的神经网络
														
 
															+    model = Sequential()
														
 
															+    model.add(layers.Dense(8, input_dim=2, activation='relu'))  # 不带正则化项
														
 
															+    # 2-4层均是带 L2 正则化项
														
 
															+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
														
 
															+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
														
 
															+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
														
 
															+    # 输出层
														
 
															+    model.add(layers.Dense(1, activation='sigmoid'))
														
 
															+    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])  # 模型装配
														
 
															+    return model
														
 
															+
														
 
															+
														
 
															+def plot_weights_matrix(model, layer_index, plot_name, file_name, output_dir=OUTPUT_DIR):
														
 
															+    # 绘制权值范围函数
														
 
															+    # 提取指定层的权值矩阵
														
 
															+    weights = model.layers[layer_index].get_weights()[0]
														
 
															+    shape = weights.shape
														
 
															+    # 生成和权值矩阵等大小的网格坐标
														
 
															+    X = np.array(range(shape[1]))
														
 
															+    Y = np.array(range(shape[0]))
														
 
															+    X, Y = np.meshgrid(X, Y)
														
 
															+    # 绘制3D图
														
 
															+    fig = plt.figure()
														
 
															+    ax = fig.gca(projection='3d')
														
 
															+    ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
														
 
															+    ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
														
 
															+    ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
														
 
															+    plt.title(plot_name, fontsize=20, fontproperties='SimHei')
														
 
															+    # 绘制权值矩阵范围
														
 
															+    ax.plot_surface(X, Y, weights, cmap=plt.get_cmap('rainbow'), linewidth=0)
														
 
															+    # 设置坐标轴名
														
 
															+    ax.set_xlabel('网格x坐标', fontsize=16, rotation=0, fontproperties='SimHei')
														
 
															+    ax.set_ylabel('网格y坐标', fontsize=16, rotation=0, fontproperties='SimHei')
														
 
															+    ax.set_zlabel('权值', fontsize=16, rotation=90, fontproperties='SimHei')
														
 
															+    # 保存矩阵范围图
														
 
															+    plt.savefig(output_dir + "/" + file_name + ".svg")
														
 
															+    plt.close(fig)
														
 
															+
														
 
															+
														
 
															+def regularizers_influence(X_train, y_train):
														
 
															+    for _lambda in [1e-5, 1e-3, 1e-1, 0.12, 0.13]:  # 设置不同的正则化系数
														
 
															+        # 创建带正则化项的模型
														
 
															+        model = build_model_with_regularization(_lambda)
														
 
															+        # 模型训练
														
 
															+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
														
 
															+        # 绘制权值范围
														
 
															+        layer_index = 2
														
 
															+        plot_title = "正则化系数：{}".format(_lambda)
														
 
															+        file_name = "正则化网络权值_" + str(_lambda)
														
 
															+        # 绘制网络权值范围图
														
 
															+        plot_weights_matrix(model, layer_index, plot_title, file_name, output_dir=OUTPUT_DIR + '/regularizers')
														
 
															+        # 绘制不同正则化系数的决策边界线
														
 
															+        # 可视化的 x 坐标范围为[-2, 3]
														
 
															+        xx = np.arange(-2, 3, 0.01)
														
 
															+        # 可视化的 y 坐标范围为[-1.5, 2]
														
 
															+        yy = np.arange(-1.5, 2, 0.01)
														
 
															+        # 生成 x-y 平面采样网格点，方便可视化
														
 
															+        XX, YY = np.meshgrid(xx, yy)
														
 
															+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
														
 
															+        title = "正则化系数：{}".format(_lambda)
														
 
															+        file = "正则化_%g.svg" % _lambda
														
 
															+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/regularizers')
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    X, y, X_train, X_test, y_train, y_test = load_dataset()
														
 
															+    # 绘制数据集分布
														
 
															+    make_plot(X, y, None, "月牙形状二分类数据集分布.svg")
														
 
															+    # 网络层数的影响
														
 
															+    network_layers_influence(X_train, y_train)
														
 
															+    # Dropout的影响
														
 
															+    dropout_influence(X_train, y_train)
														
 
															+    # 正则化的影响
														
 
															+    regularizers_influence(X_train, y_train)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    main()
														
--- a/ch09-过拟合/Regularization.pdf
+++ b/ch09-过拟合/Regularization.pdf
--- a/ch09-过拟合/compile_fit.py
+++ b/ch09-过拟合/compile_fit.py
@@ -0,0 +1,61 @@
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+    """
														
 
															+    x is a simple image, not a batch
														
 
															+    """
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    x = tf.reshape(x, [28*28])
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+    y = tf.one_hot(y, depth=10)
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz)
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+sample = next(iter(db))
														
 
															+print(sample[0].shape, sample[1].shape)
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+		metrics=['accuracy']
														
 
															+	)
														
 
															+
														
 
															+network.fit(db, epochs=5, validation_data=ds_val,
														
 
															+              validation_steps=2)
														
 
															+ 
														
 
															+network.evaluate(ds_val)
														
 
															+
														
 
															+sample = next(iter(ds_val))
														
 
															+x = sample[0]
														
 
															+y = sample[1] # one-hot
														
 
															+pred = network.predict(x) # [b, 10]
														
 
															+# convert back to number 
														
 
															+y = tf.argmax(y, axis=1)
														
 
															+pred = tf.argmax(pred, axis=1)
														
 
															+
														
 
															+print(pred)
														
 
															+print(y)
														
--- a/ch09-过拟合/dropout.py
+++ b/ch09-过拟合/dropout.py
@@ -0,0 +1,111 @@
 
															+import  os
														
 
															+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
														
 
															+
														
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
														
 
															+
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dropout(0.5), # 0.5 rate to drop
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dropout(0.5), # 0.5 rate to drop
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+optimizer = optimizers.Adam(lr=0.01)
														
 
															+
														
 
															+
														
 
															+
														
 
															+for step, (x,y) in enumerate(db):
														
 
															+
														
 
															+    with tf.GradientTape() as tape:
														
 
															+        # [b, 28, 28] => [b, 784]
														
 
															+        x = tf.reshape(x, (-1, 28*28))
														
 
															+        # [b, 784] => [b, 10]
														
 
															+        out = network(x, training=True)
														
 
															+        # [b] => [b, 10]
														
 
															+        y_onehot = tf.one_hot(y, depth=10) 
														
 
															+        # [b]
														
 
															+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
														
 
															+
														
 
															+
														
 
															+        loss_regularization = []
														
 
															+        for p in network.trainable_variables:
														
 
															+            loss_regularization.append(tf.nn.l2_loss(p))
														
 
															+        loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
														
 
															+
														
 
															+        loss = loss + 0.0001 * loss_regularization
														
 
															+ 
														
 
															+
														
 
															+    grads = tape.gradient(loss, network.trainable_variables)
														
 
															+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
														
 
															+
														
 
															+
														
 
															+    if step % 100 == 0:
														
 
															+
														
 
															+        print(step, 'loss:', float(loss), 'loss_regularization:', float(loss_regularization)) 
														
 
															+
														
 
															+
														
 
															+    # evaluate
														
 
															+    if step % 500 == 0:
														
 
															+        total, total_correct = 0., 0
														
 
															+
														
 
															+        for step, (x, y) in enumerate(ds_val): 
														
 
															+            # [b, 28, 28] => [b, 784]
														
 
															+            x = tf.reshape(x, (-1, 28*28))
														
 
															+            # [b, 784] => [b, 10] 
														
 
															+            out = network(x, training=True)  
														
 
															+            # [b, 10] => [b] 
														
 
															+            pred = tf.argmax(out, axis=1) 
														
 
															+            pred = tf.cast(pred, dtype=tf.int32)
														
 
															+            # bool type 
														
 
															+            correct = tf.equal(pred, y)
														
 
															+            # bool tensor => int tensor => numpy
														
 
															+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
														
 
															+            total += x.shape[0]
														
 
															+
														
 
															+        print(step, 'Evaluate Acc with drop:', total_correct/total)
														
 
															+
														
 
															+        total, total_correct = 0., 0
														
 
															+
														
 
															+        for step, (x, y) in enumerate(ds_val): 
														
 
															+            # [b, 28, 28] => [b, 784]
														
 
															+            x = tf.reshape(x, (-1, 28*28))
														
 
															+            # [b, 784] => [b, 10] 
														
 
															+            out = network(x, training=False)  
														
 
															+            # [b, 10] => [b] 
														
 
															+            pred = tf.argmax(out, axis=1) 
														
 
															+            pred = tf.cast(pred, dtype=tf.int32)
														
 
															+            # bool type 
														
 
															+            correct = tf.equal(pred, y)
														
 
															+            # bool tensor => int tensor => numpy
														
 
															+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
														
 
															+            total += x.shape[0]
														
 
															+
														
 
															+        print(step, 'Evaluate Acc without drop:', total_correct/total)
														
--- a/ch09-过拟合/lenna.png
+++ b/ch09-过拟合/lenna.png
--- a/ch09-过拟合/lenna_crop.png
+++ b/ch09-过拟合/lenna_crop.png
--- a/ch09-过拟合/lenna_crop2.png
+++ b/ch09-过拟合/lenna_crop2.png
--- a/ch09-过拟合/lenna_eras.png
+++ b/ch09-过拟合/lenna_eras.png
--- a/ch09-过拟合/lenna_eras2.png
+++ b/ch09-过拟合/lenna_eras2.png
--- a/ch09-过拟合/lenna_flip.png
+++ b/ch09-过拟合/lenna_flip.png
--- a/ch09-过拟合/lenna_flip2.png
+++ b/ch09-过拟合/lenna_flip2.png
--- a/ch09-过拟合/lenna_guassian.png
+++ b/ch09-过拟合/lenna_guassian.png
--- a/ch09-过拟合/lenna_perspective.png
+++ b/ch09-过拟合/lenna_perspective.png
--- a/ch09-过拟合/lenna_resize.png
+++ b/ch09-过拟合/lenna_resize.png
--- a/ch09-过拟合/lenna_rotate.png
+++ b/ch09-过拟合/lenna_rotate.png
--- a/ch09-过拟合/lenna_rotate2.png
+++ b/ch09-过拟合/lenna_rotate2.png
--- a/ch09-过拟合/misc.pdf
+++ b/ch09-过拟合/misc.pdf
--- a/ch09-过拟合/regularization.py
+++ b/ch09-过拟合/regularization.py
@@ -0,0 +1,88 @@
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_val, y_val) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+db = tf.data.Dataset.from_tensor_slices((x,y))
														
 
															+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
														
 
															+
														
 
															+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
														
 
															+ds_val = ds_val.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+optimizer = optimizers.Adam(lr=0.01)
														
 
															+
														
 
															+
														
 
															+
														
 
															+for step, (x,y) in enumerate(db):
														
 
															+
														
 
															+    with tf.GradientTape() as tape:
														
 
															+        # [b, 28, 28] => [b, 784]
														
 
															+        x = tf.reshape(x, (-1, 28*28))
														
 
															+        # [b, 784] => [b, 10]
														
 
															+        out = network(x)
														
 
															+        # [b] => [b, 10]
														
 
															+        y_onehot = tf.one_hot(y, depth=10) 
														
 
															+        # [b]
														
 
															+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
														
 
															+
														
 
															+
														
 
															+        loss_regularization = []
														
 
															+        for p in network.trainable_variables:
														
 
															+            loss_regularization.append(tf.nn.l2_loss(p))
														
 
															+        loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
														
 
															+
														
 
															+        loss = loss + 0.0001 * loss_regularization
														
 
															+ 
														
 
															+
														
 
															+    grads = tape.gradient(loss, network.trainable_variables)
														
 
															+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
														
 
															+
														
 
															+
														
 
															+    if step % 100 == 0:
														
 
															+
														
 
															+        print(step, 'loss:', float(loss), 'loss_regularization:', float(loss_regularization)) 
														
 
															+
														
 
															+
														
 
															+    # evaluate
														
 
															+    if step % 500 == 0:
														
 
															+        total, total_correct = 0., 0
														
 
															+
														
 
															+        for step, (x, y) in enumerate(ds_val): 
														
 
															+            # [b, 28, 28] => [b, 784]
														
 
															+            x = tf.reshape(x, (-1, 28*28))
														
 
															+            # [b, 784] => [b, 10]
														
 
															+            out = network(x) 
														
 
															+            # [b, 10] => [b] 
														
 
															+            pred = tf.argmax(out, axis=1) 
														
 
															+            pred = tf.cast(pred, dtype=tf.int32)
														
 
															+            # bool type 
														
 
															+            correct = tf.equal(pred, y)
														
 
															+            # bool tensor => int tensor => numpy
														
 
															+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
														
 
															+            total += x.shape[0]
														
 
															+
														
 
															+        print(step, 'Evaluate Acc:', total_correct/total)
														
--- a/ch09-过拟合/train_evalute_test.py
+++ b/ch09-过拟合/train_evalute_test.py
@@ -0,0 +1,73 @@
 
															+import  tensorflow as tf
														
 
															+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
														
 
															+
														
 
															+
														
 
															+def preprocess(x, y):
														
 
															+    """
														
 
															+    x is a simple image, not a batch
														
 
															+    """
														
 
															+    x = tf.cast(x, dtype=tf.float32) / 255.
														
 
															+    x = tf.reshape(x, [28*28])
														
 
															+    y = tf.cast(y, dtype=tf.int32)
														
 
															+    y = tf.one_hot(y, depth=10)
														
 
															+    return x,y
														
 
															+
														
 
															+
														
 
															+batchsz = 128
														
 
															+(x, y), (x_test, y_test) = datasets.mnist.load_data()
														
 
															+print('datasets:', x.shape, y.shape, x.min(), x.max())
														
 
															+
														
 
															+
														
 
															+
														
 
															+idx = tf.range(60000)
														
 
															+idx = tf.random.shuffle(idx)
														
 
															+x_train, y_train = tf.gather(x, idx[:50000]), tf.gather(y, idx[:50000])
														
 
															+x_val, y_val = tf.gather(x, idx[-10000:]) , tf.gather(y, idx[-10000:])
														
 
															+print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)
														
 
															+db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
														
 
															+db_train = db_train.map(preprocess).shuffle(50000).batch(batchsz)
														
 
															+
														
 
															+db_val = tf.data.Dataset.from_tensor_slices((x_val,y_val))
														
 
															+db_val = db_val.map(preprocess).shuffle(10000).batch(batchsz)
														
 
															+
														
 
															+
														
 
															+
														
 
															+db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
														
 
															+db_test = db_test.map(preprocess).batch(batchsz) 
														
 
															+
														
 
															+sample = next(iter(db_train))
														
 
															+print(sample[0].shape, sample[1].shape)
														
 
															+
														
 
															+
														
 
															+network = Sequential([layers.Dense(256, activation='relu'),
														
 
															+                     layers.Dense(128, activation='relu'),
														
 
															+                     layers.Dense(64, activation='relu'),
														
 
															+                     layers.Dense(32, activation='relu'),
														
 
															+                     layers.Dense(10)])
														
 
															+network.build(input_shape=(None, 28*28))
														
 
															+network.summary()
														
 
															+
														
 
															+
														
 
															+
														
 
															+
														
 
															+network.compile(optimizer=optimizers.Adam(lr=0.01),
														
 
															+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
														
 
															+		metrics=['accuracy']
														
 
															+	)
														
 
															+
														
 
															+network.fit(db_train, epochs=6, validation_data=db_val, validation_freq=2)
														
 
															+
														
 
															+print('Test performance:') 
														
 
															+network.evaluate(db_test)
														
 
															+ 
														
 
															+
														
 
															+sample = next(iter(db_test))
														
 
															+x = sample[0]
														
 
															+y = sample[1] # one-hot
														
 
															+pred = network.predict(x) # [b, 10]
														
 
															+# convert back to number 
														
 
															+y = tf.argmax(y, axis=1)
														
 
															+pred = tf.argmax(pred, axis=1)
														
 
															+
														
 
															+print(pred)
														
 
															+print(y)
														
--- a/ch09-过拟合/交叉验证.pdf
+++ b/ch09-过拟合/交叉验证.pdf
--- a/ch09-过拟合/学习率与动量.pdf
+++ b/ch09-过拟合/学习率与动量.pdf
--- a/ch09-过拟合/过拟合与欠拟合.pdf
+++ b/ch09-过拟合/过拟合与欠拟合.pdf
--- a/ch09/nb.py
+++ b/ch09/nb.py
@@ -1,36 +0,0 @@
 
															-#%%
														
 
															-import tensorflow as tf 
														
 
															-from    tensorflow.keras import layers
														
 
															-
														
 
															-pip install -U scikit-learn
														
 
															-
														
 
															-#%%
														
 
															-# 添加dropout操作
														
 
															-x = tf.nn.dropout(x, rate=0.5)
														
 
															-# 添加Dropout层
														
 
															-model.add(layers.Dropout(rate=0.5))
														
 
															-
														
 
															-# 手动计算每个张量的范数
														
 
															-loss_reg = lambda_ * tf.reduce_sum(tf.square(w))
														
 
															-# 在层方式时添加范数函数
														
 
															-Dense(256, activation='relu',
														
 
															-                    kernel_regularizer=regularizers.l2(_lambda))
														
 
															-
														
 
															-#%%
														
 
															-#                     
														
 
															-# 创建网络参数w1,w2
														
 
															-w1 = tf.random.normal([4,3])
														
 
															-w2 = tf.random.normal([4,2])
														
 
															-# 计算L1正则化项
														
 
															-loss_reg = tf.reduce_sum(tf.math.abs(w1))\
														
 
															-    + tf.reduce_sum(tf.math.abs(w2))
														
 
															-
														
 
															-
														
 
															-# 计算L2正则化项
														
 
															-loss_reg = tf.reduce_sum(tf.square(w1))\
														
 
															-    + tf.reduce_sum(tf.square(w2))
														
 
															-
														
 
															-#%%
														
 
															-loss_reg
														
 
															-
														
 
															-#%%
														
--- a/ch10-卷积神经网络/BatchNorm.pdf
+++ b/ch10-卷积神经网络/BatchNorm.pdf
--- a/ch10-卷积神经网络/CIFAR与VGG实战.pdf
+++ b/ch10-卷积神经网络/CIFAR与VGG实战.pdf
--- a/ch10-卷积神经网络/ResNet与DenseNet.pdf
+++ b/ch10-卷积神经网络/ResNet与DenseNet.pdf