Ver código fonte

完善PPT和代码

Jackie Loong 5 anos atrás
pai
commit
70f833b3f3
100 arquivos alterados com 9933 adições e 204 exclusões
  1. 0 0
      ch01-人工智能绪论/autograd.py
  2. 0 0
      ch01-人工智能绪论/gpu_accelerate.py
  3. 1 1
      ch01-人工智能绪论/tf1.py
  4. 0 0
      ch01-人工智能绪论/tf2.py
  5. 0 0
      ch02-回归问题/data.csv
  6. 0 0
      ch02-回归问题/linear_regression.py
  7. BIN
      ch02-回归问题/回归实战.pdf
  8. BIN
      ch02-回归问题/回归问题.pdf
  9. 0 0
      ch03-分类问题/forward_layer.py
  10. 2 2
      ch03-分类问题/forward_tensor.py
  11. 60 60
      ch03-分类问题/main.py
  12. BIN
      ch03-分类问题/手写数字问题.pdf
  13. BIN
      ch03-分类问题/手写数字问题体验.pdf
  14. 0 73
      ch03/readMNIST.py
  15. 109 0
      ch04-TensorFlow基础/4.10-forward-prop.py
  16. BIN
      ch04-TensorFlow基础/Broadcasting.pdf
  17. BIN
      ch04-TensorFlow基础/MNIST数据集的前向传播训练误差曲线.png
  18. 7489 0
      ch04-TensorFlow基础/ch04-TensorFlow基础.ipynb
  19. BIN
      ch04-TensorFlow基础/创建Tensor.pdf
  20. BIN
      ch04-TensorFlow基础/前向传播.pdf
  21. BIN
      ch04-TensorFlow基础/数学运算.pdf
  22. BIN
      ch04-TensorFlow基础/数据类型.pdf
  23. BIN
      ch04-TensorFlow基础/索引与切片-1.pdf
  24. BIN
      ch04-TensorFlow基础/索引与切片-2.pdf
  25. BIN
      ch04-TensorFlow基础/维度变换.pdf
  26. 37 0
      ch05-TensorFlow进阶/acc_topk.py
  27. 85 0
      ch05-TensorFlow进阶/gradient_clip.py
  28. 0 0
      ch05-TensorFlow进阶/mnist_tensor.py
  29. BIN
      ch05-TensorFlow进阶/合并与分割.pdf
  30. BIN
      ch05-TensorFlow进阶/填充与复制.pdf
  31. BIN
      ch05-TensorFlow进阶/张量排序.pdf
  32. BIN
      ch05-TensorFlow进阶/张量限幅.pdf
  33. BIN
      ch05-TensorFlow进阶/数据统计.pdf
  34. BIN
      ch05-TensorFlow进阶/高阶特性.pdf
  35. 0 21
      ch05/nb.py
  36. 0 0
      ch06-神经网络/auto_efficency_regression.py
  37. 673 0
      ch06-神经网络/ch06-神经网络.ipynb
  38. 0 0
      ch06-神经网络/forward.py
  39. 0 0
      ch06-神经网络/nb.py
  40. BIN
      ch06-神经网络/全接连层.pdf
  41. BIN
      ch06-神经网络/误差计算.pdf
  42. BIN
      ch06-神经网络/输出方式.pdf
  43. BIN
      ch07-反向传播算法/0.梯度下降-简介.pdf
  44. BIN
      ch07-反向传播算法/2.常见函数的梯度.pdf
  45. 18 0
      ch07-反向传播算法/2nd_derivative.py
  46. BIN
      ch07-反向传播算法/3.激活函数及其梯度.pdf
  47. BIN
      ch07-反向传播算法/4.损失函数及其梯度.pdf
  48. BIN
      ch07-反向传播算法/5.单输出感知机梯度.pdf
  49. BIN
      ch07-反向传播算法/6.多输出感知机梯度.pdf
  50. BIN
      ch07-反向传播算法/7.链式法则.pdf
  51. BIN
      ch07-反向传播算法/8.多层感知机梯度.pdf
  52. 64 0
      ch07-反向传播算法/ch07-反向传播算法.ipynb
  53. 0 0
      ch07-反向传播算法/chain_rule.py
  54. 24 0
      ch07-反向传播算法/crossentropy_loss.py
  55. 0 0
      ch07-反向传播算法/himmelblau.py
  56. 26 0
      ch07-反向传播算法/mse_grad.py
  57. 26 0
      ch07-反向传播算法/multi_output_perceptron.py
  58. 223 0
      ch07-反向传播算法/numpy-backward-prop.py
  59. 14 0
      ch07-反向传播算法/sigmoid_grad.py
  60. 26 0
      ch07-反向传播算法/single_output_perceptron.py
  61. 0 11
      ch07/nb.py
  62. BIN
      ch08-Keras高层接口/1.Metrics.pdf
  63. BIN
      ch08-Keras高层接口/2.Compile&Fit.pdf
  64. BIN
      ch08-Keras高层接口/3.自定义层.pdf
  65. BIN
      ch08-Keras高层接口/Keras实战CIFAR10.pdf
  66. 60 0
      ch08-Keras高层接口/compile_fit.py
  67. 107 0
      ch08-Keras高层接口/keras_train.py
  68. 102 0
      ch08-Keras高层接口/layer_model.py
  69. 92 0
      ch08-Keras高层接口/metrics.py
  70. 0 0
      ch08-Keras高层接口/nb.py
  71. 0 0
      ch08-Keras高层接口/pretained.py
  72. 69 0
      ch08-Keras高层接口/save_load_model.py
  73. 69 0
      ch08-Keras高层接口/save_load_weight.py
  74. BIN
      ch08-Keras高层接口/模型加载与保存.pdf
  75. 224 0
      ch09-过拟合/9.8-over-fitting-and-under-fitting.py
  76. BIN
      ch09-过拟合/Regularization.pdf
  77. 61 0
      ch09-过拟合/compile_fit.py
  78. 111 0
      ch09-过拟合/dropout.py
  79. 0 0
      ch09-过拟合/lenna.png
  80. 0 0
      ch09-过拟合/lenna_crop.png
  81. 0 0
      ch09-过拟合/lenna_crop2.png
  82. 0 0
      ch09-过拟合/lenna_eras.png
  83. 0 0
      ch09-过拟合/lenna_eras2.png
  84. 0 0
      ch09-过拟合/lenna_flip.png
  85. 0 0
      ch09-过拟合/lenna_flip2.png
  86. 0 0
      ch09-过拟合/lenna_guassian.png
  87. 0 0
      ch09-过拟合/lenna_perspective.png
  88. 0 0
      ch09-过拟合/lenna_resize.png
  89. 0 0
      ch09-过拟合/lenna_rotate.png
  90. 0 0
      ch09-过拟合/lenna_rotate2.png
  91. BIN
      ch09-过拟合/misc.pdf
  92. 88 0
      ch09-过拟合/regularization.py
  93. 73 0
      ch09-过拟合/train_evalute_test.py
  94. BIN
      ch09-过拟合/交叉验证.pdf
  95. BIN
      ch09-过拟合/学习率与动量.pdf
  96. BIN
      ch09-过拟合/过拟合与欠拟合.pdf
  97. 0 36
      ch09/nb.py
  98. BIN
      ch10-卷积神经网络/BatchNorm.pdf
  99. BIN
      ch10-卷积神经网络/CIFAR与VGG实战.pdf
  100. BIN
      ch10-卷积神经网络/ResNet与DenseNet.pdf

+ 0 - 0
ch01/autograd.py → ch01-人工智能绪论/autograd.py


+ 0 - 0
ch01/gpu_accelerate.py → ch01-人工智能绪论/gpu_accelerate.py


+ 1 - 1
ch01/tf1.py → ch01-人工智能绪论/tf1.py

@@ -1,5 +1,5 @@
 import tensorflow.compat.v1 as tf
-tf.disable_v2_behavior()
+tf.disable_v2_behavior() # 使用静态图模式运行以下代码
 assert tf.__version__.startswith('2.')
 
 # 1.创建计算图阶段

+ 0 - 0
ch01/tf2.py → ch01-人工智能绪论/tf2.py


+ 0 - 0
ch02/data.csv → ch02-回归问题/data.csv


+ 0 - 0
ch02/linear_regression.py → ch02-回归问题/linear_regression.py


BIN
ch02-回归问题/回归实战.pdf


BIN
ch02-回归问题/回归问题.pdf


+ 0 - 0
ch03/main.py → ch03-分类问题/forward_layer.py


+ 2 - 2
ch03/forward.py → ch03-分类问题/forward_tensor.py

@@ -1,3 +1,5 @@
+import  os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 import  matplotlib
 from 	matplotlib import pyplot as plt
 # Default parameters for plots
@@ -10,9 +12,7 @@ matplotlib.rcParams['axes.unicode_minus']=False
 import  tensorflow as tf
 from    tensorflow import keras
 from    tensorflow.keras import datasets
-import  os
 
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
 # x: [60k, 28, 28],
 # y: [60k]

+ 60 - 60
ch03/demo.py → ch03-分类问题/main.py

@@ -1,60 +1,60 @@
-import  tensorflow as tf
-from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
-
-
-# 设置GPU使用方式
-# 获取GPU列表
-gpus = tf.config.experimental.list_physical_devices('GPU')
-if gpus:
-  try:
-    # 设置GPU为增长式占用
-    for gpu in gpus:
-      tf.config.experimental.set_memory_growth(gpu, True) 
-  except RuntimeError as e:
-    # 打印异常
-    print(e)
-
-(xs, ys),_ = datasets.mnist.load_data()
-print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())
-
-batch_size = 32
-
-xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.
-db = tf.data.Dataset.from_tensor_slices((xs,ys))
-db = db.batch(batch_size).repeat(30)
-
-
-model = Sequential([layers.Dense(256, activation='relu'), 
-                     layers.Dense(128, activation='relu'),
-                     layers.Dense(10)])
-model.build(input_shape=(4, 28*28))
-model.summary()
-
-optimizer = optimizers.SGD(lr=0.01)
-acc_meter = metrics.Accuracy()
-
-for step, (x,y) in enumerate(db):
-
-    with tf.GradientTape() as tape:
-        # 打平操作,[b, 28, 28] => [b, 784]
-        x = tf.reshape(x, (-1, 28*28))
-        # Step1. 得到模型输出output [b, 784] => [b, 10]
-        out = model(x)
-        # [b] => [b, 10]
-        y_onehot = tf.one_hot(y, depth=10)
-        # 计算差的平方和,[b, 10]
-        loss = tf.square(out-y_onehot)
-        # 计算每个样本的平均误差,[b]
-        loss = tf.reduce_sum(loss) / x.shape[0]
-
-
-    acc_meter.update_state(tf.argmax(out, axis=1), y)
-
-    grads = tape.gradient(loss, model.trainable_variables)
-    optimizer.apply_gradients(zip(grads, model.trainable_variables))
-
-
-    if step % 200==0:
-
-        print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())
-        acc_meter.reset_states()
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+# 设置GPU使用方式
+# 获取GPU列表
+gpus = tf.config.experimental.list_physical_devices('GPU')
+if gpus:
+  try:
+    # 设置GPU为增长式占用
+    for gpu in gpus:
+      tf.config.experimental.set_memory_growth(gpu, True) 
+  except RuntimeError as e:
+    # 打印异常
+    print(e)
+
+(xs, ys),_ = datasets.mnist.load_data()
+print('datasets:', xs.shape, ys.shape, xs.min(), xs.max())
+
+batch_size = 32
+
+xs = tf.convert_to_tensor(xs, dtype=tf.float32) / 255.
+db = tf.data.Dataset.from_tensor_slices((xs,ys))
+db = db.batch(batch_size).repeat(30)
+
+
+model = Sequential([layers.Dense(256, activation='relu'), 
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(10)])
+model.build(input_shape=(4, 28*28))
+model.summary()
+
+optimizer = optimizers.SGD(lr=0.01)
+acc_meter = metrics.Accuracy()
+
+for step, (x,y) in enumerate(db):
+
+    with tf.GradientTape() as tape:
+        # 打平操作,[b, 28, 28] => [b, 784]
+        x = tf.reshape(x, (-1, 28*28))
+        # Step1. 得到模型输出output [b, 784] => [b, 10]
+        out = model(x)
+        # [b] => [b, 10]
+        y_onehot = tf.one_hot(y, depth=10)
+        # 计算差的平方和,[b, 10]
+        loss = tf.square(out-y_onehot)
+        # 计算每个样本的平均误差,[b]
+        loss = tf.reduce_sum(loss) / x.shape[0]
+
+
+    acc_meter.update_state(tf.argmax(out, axis=1), y)
+
+    grads = tape.gradient(loss, model.trainable_variables)
+    optimizer.apply_gradients(zip(grads, model.trainable_variables))
+
+
+    if step % 200==0:
+
+        print(step, 'loss:', float(loss), 'acc:', acc_meter.result().numpy())
+        acc_meter.reset_states()

BIN
ch03-分类问题/手写数字问题.pdf


BIN
ch03-分类问题/手写数字问题体验.pdf


+ 0 - 73
ch03/readMNIST.py

@@ -1,73 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-"""
-从MNIST中读取原始图片并保存、读取标签数据并保存。
-MNIST文件结构分析可以参考:https://blog.csdn.net/justidle/article/details/103149253
-"""
-"""
-使用方法:
-1、将MNIST的文件下载到本地。
-2、在py文件所在目录下,建立mnist_data目录。然后将MNIST的四个文件拷贝到mnist_data目录,并解压
-3、在py文件所在目录下,建立test目录,改目录用于存放解压出的图片文件和标签文件
-"""
-
-import struct
-import numpy as np
-import PIL.Image
-    
-def read_image(filename):
-    #打开文件
-    f = open(filename, 'rb')
-    
-    #读取文件内容
-    index = 0
-    buf = f.read()
-    
-    #关闭文件
-    f.close()
-    
-    #解析文件内容
-    #>IIII 表示使用大端规则,读取四个整型
-    magic, numImages, rows, columns = struct.unpack_from('>IIII', buf, index)
-    index += struct.calcsize('>IIII')
-    
-    for i in range(0, numImages):
-        # L代表灰度图片
-        image = PIL.Image.new('L', (columns, rows))
-        
-        for x in range(rows):
-            for y in range(columns):
-                # ‘>B' 读取一个字节
-                image.putpixel((y,x), int(struct.unpack_from('>B', buf, index)[0]))
-                index += struct.calcsize('>B')
-                
-        print('save ' + str(i) + 'image')
-        image.save('mnist_data/test/'+str(i)+'.png')
-        
-def read_label(filename, saveFilename):
-    f = open(filename, 'rb')
-    index = 0
-    buf = f.read()
-    f.close()
-    
-    magic, labels = struct.unpack_from('>II' , buf , index)
-    index += struct.calcsize('>II')
-    
-    labelArr = [0] * labels
-    
-    for x in range(labels):
-        labelArr[x] = int(struct.unpack_from('>B', buf, index)[0])
-        index += struct.calcsize('>B')
-    
-    save = open(saveFilename, 'w')
-    save.write(','.join(map(lambda x: str(x), labelArr)))
-    save.write('\n')
-    save.close()
-    print('save labels success')
-
-if __name__ == '__main__':
-    #注意t10k-images-idx3-ubyte里面一共有10,000张图片
-    read_image('mnist_data/t10k-images-idx3-ubyte')
-    read_label('mnist_data/t10k-labels-idx1-ubyte', 'mnist_data/test/label.txt')
-    

+ 109 - 0
ch04-TensorFlow基础/4.10-forward-prop.py

@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+@author: HuRuiFeng
+@file: 4.10-forward-prop.py
+@time: 2020/2/14 23:47
+@desc: 4.10 前向传播实战的示例代码
+"""
+
+import matplotlib.pyplot as plt
+import tensorflow as tf
+import tensorflow.keras.datasets as datasets
+
+plt.rcParams['font.size'] = 16
+plt.rcParams['font.family'] = ['STKaiti']
+plt.rcParams['axes.unicode_minus'] = False
+
+
+def load_data():
+    # 加载 MNIST 数据集
+    (x, y), (x_val, y_val) = datasets.mnist.load_data()
+    # 转换为浮点张量, 并缩放到-1~1
+    x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
+    # 转换为整形张量
+    y = tf.convert_to_tensor(y, dtype=tf.int32)
+    # one-hot 编码
+    y = tf.one_hot(y, depth=10)
+
+    # 改变视图, [b, 28, 28] => [b, 28*28]
+    x = tf.reshape(x, (-1, 28 * 28))
+
+    # 构建数据集对象
+    train_dataset = tf.data.Dataset.from_tensor_slices((x, y))
+    # 批量训练
+    train_dataset = train_dataset.batch(200)
+    return train_dataset
+
+
+def init_paramaters():
+    # 每层的张量都需要被优化,故使用 Variable 类型,并使用截断的正太分布初始化权值张量
+    # 偏置向量初始化为 0 即可
+    # 第一层的参数
+    w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
+    b1 = tf.Variable(tf.zeros([256]))
+    # 第二层的参数
+    w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
+    b2 = tf.Variable(tf.zeros([128]))
+    # 第三层的参数
+    w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
+    b3 = tf.Variable(tf.zeros([10]))
+    return w1, b1, w2, b2, w3, b3
+
+
+def train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001):
+    for step, (x, y) in enumerate(train_dataset):
+        with tf.GradientTape() as tape:
+            # 第一层计算, [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b,256] + [b, 256]
+            h1 = x @ w1 + tf.broadcast_to(b1, (x.shape[0], 256))
+            h1 = tf.nn.relu(h1)  # 通过激活函数
+
+            # 第二层计算, [b, 256] => [b, 128]
+            h2 = h1 @ w2 + b2
+            h2 = tf.nn.relu(h2)
+            # 输出层计算, [b, 128] => [b, 10]
+            out = h2 @ w3 + b3
+
+            # 计算网络输出与标签之间的均方差, mse = mean(sum(y-out)^2)
+            # [b, 10]
+            loss = tf.square(y - out)
+            # 误差标量, mean: scalar
+            loss = tf.reduce_mean(loss)
+
+            # 自动梯度,需要求梯度的张量有[w1, b1, w2, b2, w3, b3]
+            grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
+
+        # 梯度更新, assign_sub 将当前值减去参数值,原地更新
+        w1.assign_sub(lr * grads[0])
+        b1.assign_sub(lr * grads[1])
+        w2.assign_sub(lr * grads[2])
+        b2.assign_sub(lr * grads[3])
+        w3.assign_sub(lr * grads[4])
+        b3.assign_sub(lr * grads[5])
+
+        if step % 100 == 0:
+            print(epoch, step, 'loss:', loss.numpy())
+
+    return loss.numpy()
+
+
+def train(epochs):
+    losses = []
+    train_dataset = load_data()
+    w1, b1, w2, b2, w3, b3 = init_paramaters()
+    for epoch in range(epochs):
+        loss = train_epoch(epoch, train_dataset, w1, b1, w2, b2, w3, b3, lr=0.001)
+        losses.append(loss)
+
+    x = [i for i in range(0, epochs)]
+    # 绘制曲线
+    plt.plot(x, losses, color='blue', marker='s', label='训练')
+    plt.xlabel('Epoch')
+    plt.ylabel('MSE')
+    plt.legend()
+    plt.savefig('MNIST数据集的前向传播训练误差曲线.png')
+    plt.close()
+
+
+if __name__ == '__main__':
+    train(epochs=20)

BIN
ch04-TensorFlow基础/Broadcasting.pdf


BIN
ch04-TensorFlow基础/MNIST数据集的前向传播训练误差曲线.png


Diferenças do arquivo suprimidas por serem muito extensas
+ 7489 - 0
ch04-TensorFlow基础/ch04-TensorFlow基础.ipynb


BIN
ch04-TensorFlow基础/创建Tensor.pdf


BIN
ch04-TensorFlow基础/前向传播.pdf


BIN
ch04-TensorFlow基础/数学运算.pdf


BIN
ch04-TensorFlow基础/数据类型.pdf


BIN
ch04-TensorFlow基础/索引与切片-1.pdf


BIN
ch04-TensorFlow基础/索引与切片-2.pdf


BIN
ch04-TensorFlow基础/维度变换.pdf


+ 37 - 0
ch05-TensorFlow进阶/acc_topk.py

@@ -0,0 +1,37 @@
+import  tensorflow as tf
+import  os
+
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+tf.random.set_seed(2467)
+
+def accuracy(output, target, topk=(1,)):
+    maxk = max(topk)
+    batch_size = target.shape[0]
+
+    pred = tf.math.top_k(output, maxk).indices
+    pred = tf.transpose(pred, perm=[1, 0])
+    target_ = tf.broadcast_to(target, pred.shape)
+    # [10, b]
+    correct = tf.equal(pred, target_)
+
+    res = []
+    for k in topk:
+        correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
+        correct_k = tf.reduce_sum(correct_k)
+        acc = float(correct_k* (100.0 / batch_size) )
+        res.append(acc)
+
+    return res
+
+
+
+output = tf.random.normal([10, 6])
+output = tf.math.softmax(output, axis=1)
+target = tf.random.uniform([10], maxval=6, dtype=tf.int32)
+print('prob:', output.numpy())
+pred = tf.argmax(output, axis=1)
+print('pred:', pred.numpy())
+print('label:', target.numpy())
+
+acc = accuracy(output, target, topk=(1,2,3,4,5,6))
+print('top-1-6 acc:', acc)

+ 85 - 0
ch05-TensorFlow进阶/gradient_clip.py

@@ -0,0 +1,85 @@
+import  tensorflow as tf
+from    tensorflow import keras
+from    tensorflow.keras import datasets, layers, optimizers
+import  os
+
+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
+print(tf.__version__)
+
+(x, y), _ = datasets.mnist.load_data()
+x = tf.convert_to_tensor(x, dtype=tf.float32) / 50.
+y = tf.convert_to_tensor(y)
+y = tf.one_hot(y, depth=10)
+print('x:', x.shape, 'y:', y.shape)
+train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128).repeat(30)
+x,y = next(iter(train_db))
+print('sample:', x.shape, y.shape)
+# print(x[0], y[0])
+
+
+
+def main():
+
+    # 784 => 512
+    w1, b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)), tf.Variable(tf.zeros([512]))
+    # 512 => 256
+    w2, b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
+    # 256 => 10
+    w3, b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
+
+
+
+    optimizer = optimizers.SGD(lr=0.01)
+
+
+    for step, (x,y) in enumerate(train_db):
+
+        # [b, 28, 28] => [b, 784]
+        x = tf.reshape(x, (-1, 784))
+
+        with tf.GradientTape() as tape:
+
+            # layer1.
+            h1 = x @ w1 + b1
+            h1 = tf.nn.relu(h1)
+            # layer2
+            h2 = h1 @ w2 + b2
+            h2 = tf.nn.relu(h2)
+            # output
+            out = h2 @ w3 + b3
+            # out = tf.nn.relu(out)
+
+            # compute loss
+            # [b, 10] - [b, 10]
+            loss = tf.square(y-out)
+            # [b, 10] => [b]
+            loss = tf.reduce_mean(loss, axis=1)
+            # [b] => scalar
+            loss = tf.reduce_mean(loss)
+
+
+
+        # compute gradient
+        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
+        # print('==before==')
+        # for g in grads:
+        #     print(tf.norm(g))
+        
+        grads,  _ = tf.clip_by_global_norm(grads, 15)
+
+        # print('==after==')
+        # for g in grads:
+        #     print(tf.norm(g))
+        # update w' = w - lr*grad
+        optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w3, b3]))
+
+
+
+        if step % 100 == 0:
+            print(step, 'loss:', float(loss))
+
+
+
+
+if __name__ == '__main__':
+    main()

+ 0 - 0
ch05/mnist_tensor.py → ch05-TensorFlow进阶/mnist_tensor.py


BIN
ch05-TensorFlow进阶/合并与分割.pdf


BIN
ch05-TensorFlow进阶/填充与复制.pdf


BIN
ch05-TensorFlow进阶/张量排序.pdf


BIN
ch05-TensorFlow进阶/张量限幅.pdf


BIN
ch05-TensorFlow进阶/数据统计.pdf


BIN
ch05-TensorFlow进阶/高阶特性.pdf


+ 0 - 21
ch05/nb.py

@@ -1,21 +0,0 @@
-#%%
-import  tensorflow as tf
-from    tensorflow import keras
-from    tensorflow.keras import datasets
-import  os
-
-
-#%%
-a = tf.random.normal([4,35,8]) # 模拟成绩册A
-b = tf.random.normal([6,35,8]) # 模拟成绩册B
-tf.concat([a,b],axis=0) # 合并成绩册
-
-
-#%%
-x = tf.random.normal([2,784])
-w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
-b1 = tf.Variable(tf.zeros([256]))
-o1 = tf.matmul(x,w1) + b1  #
-o1 = tf.nn.relu(o1)
-
-#%%

+ 0 - 0
ch06/auto_efficency_regression.py → ch06-神经网络/auto_efficency_regression.py


Diferenças do arquivo suprimidas por serem muito extensas
+ 673 - 0
ch06-神经网络/ch06-神经网络.ipynb


+ 0 - 0
ch06/forward.py → ch06-神经网络/forward.py


+ 0 - 0
ch06/nb.py → ch06-神经网络/nb.py


BIN
ch06-神经网络/全接连层.pdf


BIN
ch06-神经网络/误差计算.pdf


BIN
ch06-神经网络/输出方式.pdf


BIN
ch07-反向传播算法/0.梯度下降-简介.pdf


BIN
ch07-反向传播算法/2.常见函数的梯度.pdf


+ 18 - 0
ch07-反向传播算法/2nd_derivative.py

@@ -0,0 +1,18 @@
+import tensorflow as tf
+
+w = tf.Variable(1.0)
+b = tf.Variable(2.0)
+x = tf.Variable(3.0)
+
+with tf.GradientTape() as t1:
+  with tf.GradientTape() as t2:
+    y = x * w + b
+  dy_dw, dy_db = t2.gradient(y, [w, b])
+d2y_dw2 = t1.gradient(dy_dw, w)
+
+print(dy_dw)
+print(dy_db)
+print(d2y_dw2)
+
+assert dy_dw.numpy() == 3.0
+assert d2y_dw2 is None

BIN
ch07-反向传播算法/3.激活函数及其梯度.pdf


BIN
ch07-反向传播算法/4.损失函数及其梯度.pdf


BIN
ch07-反向传播算法/5.单输出感知机梯度.pdf


BIN
ch07-反向传播算法/6.多输出感知机梯度.pdf


BIN
ch07-反向传播算法/7.链式法则.pdf


BIN
ch07-反向传播算法/8.多层感知机梯度.pdf


Diferenças do arquivo suprimidas por serem muito extensas
+ 64 - 0
ch07-反向传播算法/ch07-反向传播算法.ipynb


+ 0 - 0
ch07/chain_rule.py → ch07-反向传播算法/chain_rule.py


+ 24 - 0
ch07-反向传播算法/crossentropy_loss.py

@@ -0,0 +1,24 @@
+import tensorflow as tf 
+
+
+tf.random.set_seed(4323)
+
+x=tf.random.normal([1,3])
+
+w=tf.random.normal([3,2])
+
+b=tf.random.normal([2])
+
+y = tf.constant([0, 1])
+
+
+with tf.GradientTape() as tape:
+
+	tape.watch([w, b])
+	logits = (x@w+b)
+	loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y, logits, from_logits=True))
+
+grads = tape.gradient(loss, [w, b])
+print('w grad:', grads[0])
+
+print('b grad:', grads[1])

+ 0 - 0
ch07/himmelblau.py → ch07-反向传播算法/himmelblau.py


+ 26 - 0
ch07-反向传播算法/mse_grad.py

@@ -0,0 +1,26 @@
+import tensorflow as tf 
+
+
+
+
+x=tf.random.normal([1,3])
+
+w=tf.ones([3,2])
+
+b=tf.ones([2])
+
+y = tf.constant([0, 1])
+
+
+with tf.GradientTape() as tape:
+
+	tape.watch([w, b])
+	logits = tf.sigmoid(x@w+b) 
+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
+
+grads = tape.gradient(loss, [w, b])
+print('w grad:', grads[0])
+
+print('b grad:', grads[1])
+
+

+ 26 - 0
ch07-反向传播算法/multi_output_perceptron.py

@@ -0,0 +1,26 @@
+import tensorflow as tf 
+
+
+
+
+x=tf.random.normal([1,3])
+
+w=tf.ones([3,2])
+
+b=tf.ones([2])
+
+y = tf.constant([0, 1])
+
+
+with tf.GradientTape() as tape:
+
+	tape.watch([w, b])
+	logits = tf.sigmoid(x@w+b) 
+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
+
+grads = tape.gradient(loss, [w, b])
+print('w grad:', grads[0])
+
+print('b grad:', grads[1])
+
+

+ 223 - 0
ch07-反向传播算法/numpy-backward-prop.py

@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+@author: HuRuiFeng
+@file: 7.9-backward-prop.py
+@time: 2020/2/24 17:32
+@desc: 7.9 反向传播算法实战的代码
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+from sklearn.datasets import make_moons
+from sklearn.model_selection import train_test_split
+
+plt.rcParams['font.size'] = 16
+plt.rcParams['font.family'] = ['STKaiti']
+plt.rcParams['axes.unicode_minus'] = False
+
+
+def load_dataset():
+    # 采样点数
+    N_SAMPLES = 2000
+    # 测试数量比率
+    TEST_SIZE = 0.3
+    # 利用工具函数直接生成数据集
+    X, y = make_moons(n_samples=N_SAMPLES, noise=0.2, random_state=100)
+    # 将 2000 个点按着 7:3 分割为训练集和测试集
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
+    return X, y, X_train, X_test, y_train, y_test
+
+
+def make_plot(X, y, plot_name, XX=None, YY=None, preds=None, dark=False):
+    # 绘制数据集的分布, X 为 2D 坐标, y 为数据点的标签
+    if (dark):
+        plt.style.use('dark_background')
+    else:
+        sns.set_style("whitegrid")
+    plt.figure(figsize=(16, 12))
+    axes = plt.gca()
+    axes.set(xlabel="$x_1$", ylabel="$x_2$")
+    plt.title(plot_name, fontsize=30)
+    plt.subplots_adjust(left=0.20)
+    plt.subplots_adjust(right=0.80)
+    if XX is not None and YY is not None and preds is not None:
+        plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha=1, cmap=plt.cm.Spectral)
+        plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
+    # 绘制散点图,根据标签区分颜色
+    plt.scatter(X[:, 0], X[:, 1], c=y.ravel(), s=40, cmap=plt.cm.Spectral, edgecolors='none')
+    plt.savefig('数据集分布.svg')
+    plt.close()
+
+
+class Layer:
+    # 全连接网络层
+    def __init__(self, n_input, n_neurons, activation=None, weights=None,
+                 bias=None):
+        """
+        :param int n_input: 输入节点数
+        :param int n_neurons: 输出节点数
+        :param str activation: 激活函数类型
+        :param weights: 权值张量,默认类内部生成
+        :param bias: 偏置,默认类内部生成
+        """
+        # 通过正态分布初始化网络权值,初始化非常重要,不合适的初始化将导致网络不收敛
+        self.weights = weights if weights is not None else np.random.randn(n_input, n_neurons) * np.sqrt(1 / n_neurons)
+        self.bias = bias if bias is not None else np.random.rand(n_neurons) * 0.1
+        self.activation = activation  # 激活函数类型,如’sigmoid’
+        self.last_activation = None  # 激活函数的输出值o
+        self.error = None  # 用于计算当前层的delta 变量的中间变量
+        self.delta = None  # 记录当前层的delta 变量,用于计算梯度
+
+    # 网络层的前向传播函数实现如下,其中last_activation 变量用于保存当前层的输出值:
+    def activate(self, x):
+        # 前向传播函数
+        r = np.dot(x, self.weights) + self.bias  # X@W+b
+        # 通过激活函数,得到全连接层的输出o
+        self.last_activation = self._apply_activation(r)
+        return self.last_activation
+
+    # 上述代码中的self._apply_activation 函数实现了不同类型的激活函数的前向计算过程,
+    # 尽管此处我们只使用Sigmoid 激活函数一种。代码如下:
+    def _apply_activation(self, r):
+        # 计算激活函数的输出
+        if self.activation is None:
+            return r  # 无激活函数,直接返回
+        # ReLU 激活函数
+        elif self.activation == 'relu':
+            return np.maximum(r, 0)
+        # tanh 激活函数
+        elif self.activation == 'tanh':
+            return np.tanh(r)
+        # sigmoid 激活函数
+        elif self.activation == 'sigmoid':
+            return 1 / (1 + np.exp(-r))
+        return r
+
+    # 针对于不同类型的激活函数,它们的导数计算实现如下:
+    def apply_activation_derivative(self, r):
+        # 计算激活函数的导数
+        # 无激活函数,导数为1
+        if self.activation is None:
+            return np.ones_like(r)
+        # ReLU 函数的导数实现
+        elif self.activation == 'relu':
+            grad = np.array(r, copy=True)
+            grad[r > 0] = 1.
+            grad[r <= 0] = 0.
+            return grad
+        # tanh 函数的导数实现
+        elif self.activation == 'tanh':
+            return 1 - r ** 2
+        # Sigmoid 函数的导数实现
+        elif self.activation == 'sigmoid':
+            return r * (1 - r)
+        return r
+
+
+# 神经网络模型
+class NeuralNetwork:
+    def __init__(self):
+        self._layers = []  # 网络层对象列表
+
+    def add_layer(self, layer):
+        # 追加网络层
+        self._layers.append(layer)
+
+    # 网络的前向传播只需要循环调各个网络层对象的前向计算函数即可,代码如下:
+    # 前向传播
+    def feed_forward(self, X):
+        for layer in self._layers:
+            # 依次通过各个网络层
+            X = layer.activate(X)
+        return X
+
+    def backpropagation(self, X, y, learning_rate):
+        # 反向传播算法实现
+        # 前向计算,得到输出值
+        output = self.feed_forward(X)
+        for i in reversed(range(len(self._layers))):  # 反向循环
+            layer = self._layers[i]  # 得到当前层对象
+            # 如果是输出层
+            if layer == self._layers[-1]:  # 对于输出层
+                layer.error = y - output  # 计算2 分类任务的均方差的导数
+                # 关键步骤:计算最后一层的delta,参考输出层的梯度公式
+                layer.delta = layer.error * layer.apply_activation_derivative(output)
+            else:  # 如果是隐藏层
+                next_layer = self._layers[i + 1]  # 得到下一层对象
+                layer.error = np.dot(next_layer.weights, next_layer.delta)
+                # 关键步骤:计算隐藏层的delta,参考隐藏层的梯度公式
+                layer.delta = layer.error * layer.apply_activation_derivative(layer.last_activation)
+
+        # 循环更新权值
+        for i in range(len(self._layers)):
+            layer = self._layers[i]
+            # o_i 为上一网络层的输出
+            o_i = np.atleast_2d(X if i == 0 else self._layers[i - 1].last_activation)
+            # 梯度下降算法,delta 是公式中的负数,故这里用加号
+            layer.weights += layer.delta * o_i.T * learning_rate
+
+    def train(self, X_train, X_test, y_train, y_test, learning_rate, max_epochs):
+        # 网络训练函数
+        # one-hot 编码
+        y_onehot = np.zeros((y_train.shape[0], 2))
+        y_onehot[np.arange(y_train.shape[0]), y_train] = 1
+
+        # 将One-hot 编码后的真实标签与网络的输出计算均方误差,并调用反向传播函数更新网络参数,循环迭代训练集1000 遍即可
+        mses = []
+        accuracys = []
+        for i in range(max_epochs + 1):  # 训练1000 个epoch
+            for j in range(len(X_train)):  # 一次训练一个样本
+                self.backpropagation(X_train[j], y_onehot[j], learning_rate)
+            if i % 10 == 0:
+                # 打印出MSE Loss
+                mse = np.mean(np.square(y_onehot - self.feed_forward(X_train)))
+                mses.append(mse)
+                accuracy = self.accuracy(self.predict(X_test), y_test.flatten())
+                accuracys.append(accuracy)
+                print('Epoch: #%s, MSE: %f' % (i, float(mse)))
+                # 统计并打印准确率
+                print('Accuracy: %.2f%%' % (accuracy * 100))
+        return mses, accuracys
+
+    def predict(self, X):
+        return self.feed_forward(X)
+
+    def accuracy(self, X, y):
+        return np.sum(np.equal(np.argmax(X, axis=1), y)) / y.shape[0]
+
+
+def main():
+    X, y, X_train, X_test, y_train, y_test = load_dataset()
+    # 调用 make_plot 函数绘制数据的分布,其中 X 为 2D 坐标, y 为标签
+    make_plot(X, y, "Classification Dataset Visualization ")
+    plt.show()
+    nn = NeuralNetwork()  # 实例化网络类
+    nn.add_layer(Layer(2, 25, 'sigmoid'))  # 隐藏层 1, 2=>25
+    nn.add_layer(Layer(25, 50, 'sigmoid'))  # 隐藏层 2, 25=>50
+    nn.add_layer(Layer(50, 25, 'sigmoid'))  # 隐藏层 3, 50=>25
+    nn.add_layer(Layer(25, 2, 'sigmoid'))  # 输出层, 25=>2
+    mses, accuracys = nn.train(X_train, X_test, y_train, y_test, 0.01, 1000)
+
+    x = [i for i in range(0, 101, 10)]
+
+    # 绘制MES曲线
+    plt.title("MES Loss")
+    plt.plot(x, mses[:11], color='blue')
+    plt.xlabel('Epoch')
+    plt.ylabel('MSE')
+    plt.savefig('训练误差曲线.svg')
+    plt.close()
+
+    # 绘制Accuracy曲线
+    plt.title("Accuracy")
+    plt.plot(x, accuracys[:11], color='blue')
+    plt.xlabel('Epoch')
+    plt.ylabel('Accuracy')
+    plt.savefig('网络测试准确率.svg')
+    plt.close()
+
+
+if __name__ == '__main__':
+    main()

+ 14 - 0
ch07-反向传播算法/sigmoid_grad.py

@@ -0,0 +1,14 @@
+import tensorflow as tf 
+
+
+a = tf.linspace(-10., 10., 10)
+
+with tf.GradientTape() as tape:
+	tape.watch(a)
+	y = tf.sigmoid(a)
+
+
+grads = tape.gradient(y, [a])
+print('x:', a.numpy())
+print('y:', y.numpy())
+print('grad:', grads[0].numpy())

+ 26 - 0
ch07-反向传播算法/single_output_perceptron.py

@@ -0,0 +1,26 @@
+import tensorflow as tf 
+
+
+
+
+x=tf.random.normal([1,3])
+
+w=tf.ones([3,1])
+
+b=tf.ones([1])
+
+y = tf.constant([1])
+
+
+with tf.GradientTape() as tape:
+
+	tape.watch([w, b])
+	logits = tf.sigmoid(x@w+b) 
+	loss = tf.reduce_mean(tf.losses.MSE(y, logits))
+
+grads = tape.gradient(loss, [w, b])
+print('w grad:', grads[0])
+
+print('b grad:', grads[1])
+
+

+ 0 - 11
ch07/nb.py

@@ -1,11 +0,0 @@
-#%%
-import  tensorflow as tf
-from    tensorflow import keras
-from    tensorflow.keras import datasets, layers
-
-#%%
-def sigmoid(x): # sigmoid函数,也可以直接使用tf.nn.sigmoid
-    return 1 / (1 + tf.math.exp(-x))
-
-def derivative(x): # sigmoid导数的计算
-    return sigmoid(x)*(1-sigmoid(x))

BIN
ch08-Keras高层接口/1.Metrics.pdf


BIN
ch08-Keras高层接口/2.Compile&Fit.pdf


BIN
ch08-Keras高层接口/3.自定义层.pdf


BIN
ch08-Keras高层接口/Keras实战CIFAR10.pdf


+ 60 - 0
ch08-Keras高层接口/compile_fit.py

@@ -0,0 +1,60 @@
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+    """
+    x is a simple image, not a batch
+    """
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    x = tf.reshape(x, [28*28])
+    y = tf.cast(y, dtype=tf.int32)
+    y = tf.one_hot(y, depth=10)
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz)
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+sample = next(iter(db))
+print(sample[0].shape, sample[1].shape)
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+
+
+
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+		metrics=['accuracy']
+	)
+
+network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2)
+ 
+network.evaluate(ds_val)
+
+sample = next(iter(ds_val))
+x = sample[0]
+y = sample[1] # one-hot
+pred = network.predict(x) # [b, 10]
+# convert back to number 
+y = tf.argmax(y, axis=1)
+pred = tf.argmax(pred, axis=1)
+
+print(pred)
+print(y)

+ 107 - 0
ch08-Keras高层接口/keras_train.py

@@ -0,0 +1,107 @@
+import  os
+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
+
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+from 	tensorflow import keras
+
+
+
+def preprocess(x, y):
+    # [0~255] => [-1~1]
+    x = 2 * tf.cast(x, dtype=tf.float32) / 255. - 1.
+    y = tf.cast(y, dtype=tf.int32)
+    return x,y
+
+
+batchsz = 128
+# [50k, 32, 32, 3], [10k, 1]
+(x, y), (x_val, y_val) = datasets.cifar10.load_data()
+y = tf.squeeze(y)
+y_val = tf.squeeze(y_val)
+y = tf.one_hot(y, depth=10) # [50k, 10]
+y_val = tf.one_hot(y_val, depth=10) # [10k, 10]
+print('datasets:', x.shape, y.shape, x_val.shape, y_val.shape, x.min(), x.max())
+
+
+train_db = tf.data.Dataset.from_tensor_slices((x,y))
+train_db = train_db.map(preprocess).shuffle(10000).batch(batchsz)
+test_db = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+test_db = test_db.map(preprocess).batch(batchsz)
+
+
+sample = next(iter(train_db))
+print('batch:', sample[0].shape, sample[1].shape)
+
+
+class MyDense(layers.Layer):
+    # to replace standard layers.Dense()
+    def __init__(self, inp_dim, outp_dim):
+        super(MyDense, self).__init__()
+
+        self.kernel = self.add_variable('w', [inp_dim, outp_dim])
+        # self.bias = self.add_variable('b', [outp_dim])
+
+    def call(self, inputs, training=None):
+
+        x = inputs @ self.kernel
+        return x
+
+class MyNetwork(keras.Model):
+
+    def __init__(self):
+        super(MyNetwork, self).__init__()
+
+        self.fc1 = MyDense(32*32*3, 256)
+        self.fc2 = MyDense(256, 128)
+        self.fc3 = MyDense(128, 64)
+        self.fc4 = MyDense(64, 32)
+        self.fc5 = MyDense(32, 10)
+
+
+
+    def call(self, inputs, training=None):
+        """
+
+        :param inputs: [b, 32, 32, 3]
+        :param training:
+        :return:
+        """
+        x = tf.reshape(inputs, [-1, 32*32*3])
+        # [b, 32*32*3] => [b, 256]
+        x = self.fc1(x)
+        x = tf.nn.relu(x)
+        # [b, 256] => [b, 128]
+        x = self.fc2(x)
+        x = tf.nn.relu(x)
+        # [b, 128] => [b, 64]
+        x = self.fc3(x)
+        x = tf.nn.relu(x)
+        # [b, 64] => [b, 32]
+        x = self.fc4(x)
+        x = tf.nn.relu(x)
+        # [b, 32] => [b, 10]
+        x = self.fc5(x)
+
+        return x
+
+
+network = MyNetwork()
+network.compile(optimizer=optimizers.Adam(lr=1e-3),
+                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+                metrics=['accuracy'])
+network.fit(train_db, epochs=15, validation_data=test_db, validation_freq=1)
+
+network.evaluate(test_db)
+network.save_weights('ckpt/weights.ckpt')
+del network
+print('saved to ckpt/weights.ckpt')
+
+
+network = MyNetwork()
+network.compile(optimizer=optimizers.Adam(lr=1e-3),
+                loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+                metrics=['accuracy'])
+network.load_weights('ckpt/weights.ckpt')
+print('loaded weights from file.')
+network.evaluate(test_db)

+ 102 - 0
ch08-Keras高层接口/layer_model.py

@@ -0,0 +1,102 @@
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+from 	tensorflow import keras
+
+def preprocess(x, y):
+    """
+    x is a simple image, not a batch
+    """
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    x = tf.reshape(x, [28*28])
+    y = tf.cast(y, dtype=tf.int32)
+    y = tf.one_hot(y, depth=10)
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz)
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+sample = next(iter(db))
+print(sample[0].shape, sample[1].shape)
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+
+class MyDense(layers.Layer):
+
+	def __init__(self, inp_dim, outp_dim):
+		super(MyDense, self).__init__()
+
+		self.kernel = self.add_weight('w', [inp_dim, outp_dim])
+		self.bias = self.add_weight('b', [outp_dim])
+
+	def call(self, inputs, training=None):
+
+		out = inputs @ self.kernel + self.bias
+
+		return out 
+
+class MyModel(keras.Model):
+
+	def __init__(self):
+		super(MyModel, self).__init__()
+
+		self.fc1 = MyDense(28*28, 256)
+		self.fc2 = MyDense(256, 128)
+		self.fc3 = MyDense(128, 64)
+		self.fc4 = MyDense(64, 32)
+		self.fc5 = MyDense(32, 10)
+
+	def call(self, inputs, training=None):
+
+		x = self.fc1(inputs)
+		x = tf.nn.relu(x)
+		x = self.fc2(x)
+		x = tf.nn.relu(x)
+		x = self.fc3(x)
+		x = tf.nn.relu(x)
+		x = self.fc4(x)
+		x = tf.nn.relu(x)
+		x = self.fc5(x) 
+
+		return x
+
+
+network = MyModel()
+
+
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+		metrics=['accuracy']
+	)
+
+network.fit(db, epochs=5, validation_data=ds_val,
+              validation_freq=2)
+ 
+network.evaluate(ds_val)
+
+sample = next(iter(ds_val))
+x = sample[0]
+y = sample[1] # one-hot
+pred = network.predict(x) # [b, 10]
+# convert back to number 
+y = tf.argmax(y, axis=1)
+pred = tf.argmax(pred, axis=1)
+
+print(pred)
+print(y)

+ 92 - 0
ch08-Keras高层接口/metrics.py

@@ -0,0 +1,92 @@
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    y = tf.cast(y, dtype=tf.int32)
+
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
+
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+optimizer = optimizers.Adam(lr=0.01)
+
+acc_meter = metrics.Accuracy()
+loss_meter = metrics.Mean()
+
+
+for step, (x,y) in enumerate(db):
+
+    with tf.GradientTape() as tape:
+        # [b, 28, 28] => [b, 784]
+        x = tf.reshape(x, (-1, 28*28))
+        # [b, 784] => [b, 10]
+        out = network(x)
+        # [b] => [b, 10]
+        y_onehot = tf.one_hot(y, depth=10) 
+        # [b]
+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
+
+        loss_meter.update_state(loss)
+
+ 
+
+    grads = tape.gradient(loss, network.trainable_variables)
+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
+
+
+    if step % 100 == 0:
+
+        print(step, 'loss:', loss_meter.result().numpy()) 
+        loss_meter.reset_states()
+
+
+    # evaluate
+    if step % 500 == 0:
+        total, total_correct = 0., 0
+        acc_meter.reset_states()
+
+        for step, (x, y) in enumerate(ds_val): 
+            # [b, 28, 28] => [b, 784]
+            x = tf.reshape(x, (-1, 28*28))
+            # [b, 784] => [b, 10]
+            out = network(x) 
+
+
+            # [b, 10] => [b] 
+            pred = tf.argmax(out, axis=1) 
+            pred = tf.cast(pred, dtype=tf.int32)
+            # bool type 
+            correct = tf.equal(pred, y)
+            # bool tensor => int tensor => numpy
+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
+            total += x.shape[0]
+
+            acc_meter.update_state(y, pred)
+
+
+        print(step, 'Evaluate Acc:', total_correct/total, acc_meter.result().numpy())

+ 0 - 0
ch08/nb.py → ch08-Keras高层接口/nb.py


+ 0 - 0
ch08/pretained.py → ch08-Keras高层接口/pretained.py


+ 69 - 0
ch08-Keras高层接口/save_load_model.py

@@ -0,0 +1,69 @@
+import  os
+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
+
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+    """
+    x is a simple image, not a batch
+    """
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    x = tf.reshape(x, [28*28])
+    y = tf.cast(y, dtype=tf.int32)
+    y = tf.one_hot(y, depth=10)
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz)
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+sample = next(iter(db))
+print(sample[0].shape, sample[1].shape)
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+
+
+
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+		metrics=['accuracy']
+	)
+
+network.fit(db, epochs=3, validation_data=ds_val, validation_freq=2)
+ 
+network.evaluate(ds_val)
+
+network.save('model.h5')
+print('saved total model.')
+del network
+
+print('loaded model from file.')
+network = tf.keras.models.load_model('model.h5', compile=False)
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+        loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+        metrics=['accuracy']
+    )
+x_val = tf.cast(x_val, dtype=tf.float32) / 255.
+x_val = tf.reshape(x_val, [-1, 28*28])
+y_val = tf.cast(y_val, dtype=tf.int32)
+y_val = tf.one_hot(y_val, depth=10)
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(128)
+network.evaluate(ds_val)

+ 69 - 0
ch08-Keras高层接口/save_load_weight.py

@@ -0,0 +1,69 @@
+import  os
+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
+
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+    """
+    x is a simple image, not a batch
+    """
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    x = tf.reshape(x, [28*28])
+    y = tf.cast(y, dtype=tf.int32)
+    y = tf.one_hot(y, depth=10)
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz)
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+sample = next(iter(db))
+print(sample[0].shape, sample[1].shape)
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+
+
+
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+		metrics=['accuracy']
+	)
+
+network.fit(db, epochs=3, validation_data=ds_val, validation_freq=2)
+ 
+network.evaluate(ds_val)
+
+network.save_weights('weights.ckpt')
+print('saved weights.')
+del network
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+		metrics=['accuracy']
+	)
+network.load_weights('weights.ckpt')
+print('loaded weights!')
+network.evaluate(ds_val)

BIN
ch08-Keras高层接口/模型加载与保存.pdf


+ 224 - 0
ch09-过拟合/9.8-over-fitting-and-under-fitting.py

@@ -0,0 +1,224 @@
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+@author: HuRuiFeng
+@file: 9.8-over-fitting-and-under-fitting.py
+@time: 2020/2/25 21:14
+@desc: 9.8 过拟合问题实战的代码
+       from mpl_toolkits.mplot3d import Axes3D 这个必须添加,解决3d报错问题
+"""
+
+import matplotlib.pyplot as plt
+# 导入数据集生成工具
+import numpy as np
+import seaborn as sns
+from sklearn.datasets import make_moons
+from sklearn.model_selection import train_test_split
+from tensorflow.keras import layers, Sequential, regularizers
+from mpl_toolkits.mplot3d import Axes3D
+
+plt.rcParams['font.size'] = 16
+plt.rcParams['font.family'] = ['STKaiti']
+plt.rcParams['axes.unicode_minus'] = False
+
+OUTPUT_DIR = 'output_dir'
+N_EPOCHS = 500
+
+
+def load_dataset():
+    # 采样点数
+    N_SAMPLES = 1000
+    # 测试数量比率
+    TEST_SIZE = None
+
+    # 从 moon 分布中随机采样 1000 个点,并切分为训练集-测试集
+    X, y = make_moons(n_samples=N_SAMPLES, noise=0.25, random_state=100)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=42)
+    return X, y, X_train, X_test, y_train, y_test
+
+
+def make_plot(X, y, plot_name, file_name, XX=None, YY=None, preds=None, dark=False, output_dir=OUTPUT_DIR):
+    # 绘制数据集的分布, X 为 2D 坐标, y 为数据点的标签
+    if dark:
+        plt.style.use('dark_background')
+    else:
+        sns.set_style("whitegrid")
+    axes = plt.gca()
+    axes.set_xlim([-2, 3])
+    axes.set_ylim([-1.5, 2])
+    axes.set(xlabel="$x_1$", ylabel="$x_2$")
+    plt.title(plot_name, fontsize=20, fontproperties='SimHei')
+    plt.subplots_adjust(left=0.20)
+    plt.subplots_adjust(right=0.80)
+    if XX is not None and YY is not None and preds is not None:
+        plt.contourf(XX, YY, preds.reshape(XX.shape), 25, alpha=0.08, cmap=plt.cm.Spectral)
+        plt.contour(XX, YY, preds.reshape(XX.shape), levels=[.5], cmap="Greys", vmin=0, vmax=.6)
+    # 绘制散点图,根据标签区分颜色m=markers
+    markers = ['o' if i == 1 else 's' for i in y.ravel()]
+    mscatter(X[:, 0], X[:, 1], c=y.ravel(), s=20, cmap=plt.cm.Spectral, edgecolors='none', m=markers, ax=axes)
+    # 保存矢量图
+    plt.savefig(output_dir + '/' + file_name)
+    plt.close()
+
+
+def mscatter(x, y, ax=None, m=None, **kw):
+    import matplotlib.markers as mmarkers
+    if not ax: ax = plt.gca()
+    sc = ax.scatter(x, y, **kw)
+    if (m is not None) and (len(m) == len(x)):
+        paths = []
+        for marker in m:
+            if isinstance(marker, mmarkers.MarkerStyle):
+                marker_obj = marker
+            else:
+                marker_obj = mmarkers.MarkerStyle(marker)
+            path = marker_obj.get_path().transformed(
+                marker_obj.get_transform())
+            paths.append(path)
+        sc.set_paths(paths)
+    return sc
+
+
+def network_layers_influence(X_train, y_train):
+    # 构建 5 种不同层数的网络
+    for n in range(5):
+        # 创建容器
+        model = Sequential()
+        # 创建第一层
+        model.add(layers.Dense(8, input_dim=2, activation='relu'))
+        # 添加 n 层,共 n+2 层
+        for _ in range(n):
+            model.add(layers.Dense(32, activation='relu'))
+        # 创建最末层
+        model.add(layers.Dense(1, activation='sigmoid'))
+        # 模型装配与训练
+        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
+        # 绘制不同层数的网络决策边界曲线
+        # 可视化的 x 坐标范围为[-2, 3]
+        xx = np.arange(-2, 3, 0.01)
+        # 可视化的 y 坐标范围为[-1.5, 2]
+        yy = np.arange(-1.5, 2, 0.01)
+        # 生成 x-y 平面采样网格点,方便可视化
+        XX, YY = np.meshgrid(xx, yy)
+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
+        title = "网络层数:{0}".format(2 + n)
+        file = "网络容量_%i.png" % (2 + n)
+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/network_layers')
+
+
+def dropout_influence(X_train, y_train):
+    # 构建 5 种不同数量 Dropout 层的网络
+    for n in range(5):
+        # 创建容器
+        model = Sequential()
+        # 创建第一层
+        model.add(layers.Dense(8, input_dim=2, activation='relu'))
+        counter = 0
+        # 网络层数固定为 5
+        for _ in range(5):
+            model.add(layers.Dense(64, activation='relu'))
+        # 添加 n 个 Dropout 层
+        if counter < n:
+            counter += 1
+            model.add(layers.Dropout(rate=0.5))
+
+        # 输出层
+        model.add(layers.Dense(1, activation='sigmoid'))
+        # 模型装配
+        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+        # 训练
+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
+        # 绘制不同 Dropout 层数的决策边界曲线
+        # 可视化的 x 坐标范围为[-2, 3]
+        xx = np.arange(-2, 3, 0.01)
+        # 可视化的 y 坐标范围为[-1.5, 2]
+        yy = np.arange(-1.5, 2, 0.01)
+        # 生成 x-y 平面采样网格点,方便可视化
+        XX, YY = np.meshgrid(xx, yy)
+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
+        title = "无Dropout层" if n == 0 else "{0}层 Dropout层".format(n)
+        file = "Dropout_%i.png" % n
+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/dropout')
+
+
+def build_model_with_regularization(_lambda):
+    # 创建带正则化项的神经网络
+    model = Sequential()
+    model.add(layers.Dense(8, input_dim=2, activation='relu'))  # 不带正则化项
+    # 2-4层均是带 L2 正则化项
+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
+    model.add(layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(_lambda)))
+    # 输出层
+    model.add(layers.Dense(1, activation='sigmoid'))
+    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])  # 模型装配
+    return model
+
+
+def plot_weights_matrix(model, layer_index, plot_name, file_name, output_dir=OUTPUT_DIR):
+    # 绘制权值范围函数
+    # 提取指定层的权值矩阵
+    weights = model.layers[layer_index].get_weights()[0]
+    shape = weights.shape
+    # 生成和权值矩阵等大小的网格坐标
+    X = np.array(range(shape[1]))
+    Y = np.array(range(shape[0]))
+    X, Y = np.meshgrid(X, Y)
+    # 绘制3D图
+    fig = plt.figure()
+    ax = fig.gca(projection='3d')
+    ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
+    ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
+    ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
+    plt.title(plot_name, fontsize=20, fontproperties='SimHei')
+    # 绘制权值矩阵范围
+    ax.plot_surface(X, Y, weights, cmap=plt.get_cmap('rainbow'), linewidth=0)
+    # 设置坐标轴名
+    ax.set_xlabel('网格x坐标', fontsize=16, rotation=0, fontproperties='SimHei')
+    ax.set_ylabel('网格y坐标', fontsize=16, rotation=0, fontproperties='SimHei')
+    ax.set_zlabel('权值', fontsize=16, rotation=90, fontproperties='SimHei')
+    # 保存矩阵范围图
+    plt.savefig(output_dir + "/" + file_name + ".svg")
+    plt.close(fig)
+
+
+def regularizers_influence(X_train, y_train):
+    for _lambda in [1e-5, 1e-3, 1e-1, 0.12, 0.13]:  # 设置不同的正则化系数
+        # 创建带正则化项的模型
+        model = build_model_with_regularization(_lambda)
+        # 模型训练
+        model.fit(X_train, y_train, epochs=N_EPOCHS, verbose=1)
+        # 绘制权值范围
+        layer_index = 2
+        plot_title = "正则化系数:{}".format(_lambda)
+        file_name = "正则化网络权值_" + str(_lambda)
+        # 绘制网络权值范围图
+        plot_weights_matrix(model, layer_index, plot_title, file_name, output_dir=OUTPUT_DIR + '/regularizers')
+        # 绘制不同正则化系数的决策边界线
+        # 可视化的 x 坐标范围为[-2, 3]
+        xx = np.arange(-2, 3, 0.01)
+        # 可视化的 y 坐标范围为[-1.5, 2]
+        yy = np.arange(-1.5, 2, 0.01)
+        # 生成 x-y 平面采样网格点,方便可视化
+        XX, YY = np.meshgrid(xx, yy)
+        preds = model.predict_classes(np.c_[XX.ravel(), YY.ravel()])
+        title = "正则化系数:{}".format(_lambda)
+        file = "正则化_%g.svg" % _lambda
+        make_plot(X_train, y_train, title, file, XX, YY, preds, output_dir=OUTPUT_DIR + '/regularizers')
+
+
+def main():
+    X, y, X_train, X_test, y_train, y_test = load_dataset()
+    # 绘制数据集分布
+    make_plot(X, y, None, "月牙形状二分类数据集分布.svg")
+    # 网络层数的影响
+    network_layers_influence(X_train, y_train)
+    # Dropout的影响
+    dropout_influence(X_train, y_train)
+    # 正则化的影响
+    regularizers_influence(X_train, y_train)
+
+
+if __name__ == '__main__':
+    main()

BIN
ch09-过拟合/Regularization.pdf


+ 61 - 0
ch09-过拟合/compile_fit.py

@@ -0,0 +1,61 @@
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+    """
+    x is a simple image, not a batch
+    """
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    x = tf.reshape(x, [28*28])
+    y = tf.cast(y, dtype=tf.int32)
+    y = tf.one_hot(y, depth=10)
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz)
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+sample = next(iter(db))
+print(sample[0].shape, sample[1].shape)
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+
+
+
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+		metrics=['accuracy']
+	)
+
+network.fit(db, epochs=5, validation_data=ds_val,
+              validation_steps=2)
+ 
+network.evaluate(ds_val)
+
+sample = next(iter(ds_val))
+x = sample[0]
+y = sample[1] # one-hot
+pred = network.predict(x) # [b, 10]
+# convert back to number 
+y = tf.argmax(y, axis=1)
+pred = tf.argmax(pred, axis=1)
+
+print(pred)
+print(y)

+ 111 - 0
ch09-过拟合/dropout.py

@@ -0,0 +1,111 @@
+import  os
+os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
+
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    y = tf.cast(y, dtype=tf.int32)
+
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
+
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dropout(0.5), # 0.5 rate to drop
+                     layers.Dense(128, activation='relu'),
+                     layers.Dropout(0.5), # 0.5 rate to drop
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+optimizer = optimizers.Adam(lr=0.01)
+
+
+
+for step, (x,y) in enumerate(db):
+
+    with tf.GradientTape() as tape:
+        # [b, 28, 28] => [b, 784]
+        x = tf.reshape(x, (-1, 28*28))
+        # [b, 784] => [b, 10]
+        out = network(x, training=True)
+        # [b] => [b, 10]
+        y_onehot = tf.one_hot(y, depth=10) 
+        # [b]
+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
+
+
+        loss_regularization = []
+        for p in network.trainable_variables:
+            loss_regularization.append(tf.nn.l2_loss(p))
+        loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
+
+        loss = loss + 0.0001 * loss_regularization
+ 
+
+    grads = tape.gradient(loss, network.trainable_variables)
+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
+
+
+    if step % 100 == 0:
+
+        print(step, 'loss:', float(loss), 'loss_regularization:', float(loss_regularization)) 
+
+
+    # evaluate
+    if step % 500 == 0:
+        total, total_correct = 0., 0
+
+        for step, (x, y) in enumerate(ds_val): 
+            # [b, 28, 28] => [b, 784]
+            x = tf.reshape(x, (-1, 28*28))
+            # [b, 784] => [b, 10] 
+            out = network(x, training=True)  
+            # [b, 10] => [b] 
+            pred = tf.argmax(out, axis=1) 
+            pred = tf.cast(pred, dtype=tf.int32)
+            # bool type 
+            correct = tf.equal(pred, y)
+            # bool tensor => int tensor => numpy
+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
+            total += x.shape[0]
+
+        print(step, 'Evaluate Acc with drop:', total_correct/total)
+
+        total, total_correct = 0., 0
+
+        for step, (x, y) in enumerate(ds_val): 
+            # [b, 28, 28] => [b, 784]
+            x = tf.reshape(x, (-1, 28*28))
+            # [b, 784] => [b, 10] 
+            out = network(x, training=False)  
+            # [b, 10] => [b] 
+            pred = tf.argmax(out, axis=1) 
+            pred = tf.cast(pred, dtype=tf.int32)
+            # bool type 
+            correct = tf.equal(pred, y)
+            # bool tensor => int tensor => numpy
+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
+            total += x.shape[0]
+
+        print(step, 'Evaluate Acc without drop:', total_correct/total)

+ 0 - 0
ch09/lenna.png → ch09-过拟合/lenna.png


+ 0 - 0
ch09/lenna_crop.png → ch09-过拟合/lenna_crop.png


+ 0 - 0
ch09/lenna_crop2.png → ch09-过拟合/lenna_crop2.png


+ 0 - 0
ch09/lenna_eras.png → ch09-过拟合/lenna_eras.png


+ 0 - 0
ch09/lenna_eras2.png → ch09-过拟合/lenna_eras2.png


+ 0 - 0
ch09/lenna_flip.png → ch09-过拟合/lenna_flip.png


+ 0 - 0
ch09/lenna_flip2.png → ch09-过拟合/lenna_flip2.png


+ 0 - 0
ch09/lenna_guassian.png → ch09-过拟合/lenna_guassian.png


+ 0 - 0
ch09/lenna_perspective.png → ch09-过拟合/lenna_perspective.png


+ 0 - 0
ch09/lenna_resize.png → ch09-过拟合/lenna_resize.png


+ 0 - 0
ch09/lenna_rotate.png → ch09-过拟合/lenna_rotate.png


+ 0 - 0
ch09/lenna_rotate2.png → ch09-过拟合/lenna_rotate2.png


BIN
ch09-过拟合/misc.pdf


+ 88 - 0
ch09-过拟合/regularization.py

@@ -0,0 +1,88 @@
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    y = tf.cast(y, dtype=tf.int32)
+
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_val, y_val) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+db = tf.data.Dataset.from_tensor_slices((x,y))
+db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)
+
+ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+ds_val = ds_val.map(preprocess).batch(batchsz) 
+
+
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+optimizer = optimizers.Adam(lr=0.01)
+
+
+
+for step, (x,y) in enumerate(db):
+
+    with tf.GradientTape() as tape:
+        # [b, 28, 28] => [b, 784]
+        x = tf.reshape(x, (-1, 28*28))
+        # [b, 784] => [b, 10]
+        out = network(x)
+        # [b] => [b, 10]
+        y_onehot = tf.one_hot(y, depth=10) 
+        # [b]
+        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))
+
+
+        loss_regularization = []
+        for p in network.trainable_variables:
+            loss_regularization.append(tf.nn.l2_loss(p))
+        loss_regularization = tf.reduce_sum(tf.stack(loss_regularization))
+
+        loss = loss + 0.0001 * loss_regularization
+ 
+
+    grads = tape.gradient(loss, network.trainable_variables)
+    optimizer.apply_gradients(zip(grads, network.trainable_variables))
+
+
+    if step % 100 == 0:
+
+        print(step, 'loss:', float(loss), 'loss_regularization:', float(loss_regularization)) 
+
+
+    # evaluate
+    if step % 500 == 0:
+        total, total_correct = 0., 0
+
+        for step, (x, y) in enumerate(ds_val): 
+            # [b, 28, 28] => [b, 784]
+            x = tf.reshape(x, (-1, 28*28))
+            # [b, 784] => [b, 10]
+            out = network(x) 
+            # [b, 10] => [b] 
+            pred = tf.argmax(out, axis=1) 
+            pred = tf.cast(pred, dtype=tf.int32)
+            # bool type 
+            correct = tf.equal(pred, y)
+            # bool tensor => int tensor => numpy
+            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
+            total += x.shape[0]
+
+        print(step, 'Evaluate Acc:', total_correct/total)

+ 73 - 0
ch09-过拟合/train_evalute_test.py

@@ -0,0 +1,73 @@
+import  tensorflow as tf
+from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
+
+
+def preprocess(x, y):
+    """
+    x is a simple image, not a batch
+    """
+    x = tf.cast(x, dtype=tf.float32) / 255.
+    x = tf.reshape(x, [28*28])
+    y = tf.cast(y, dtype=tf.int32)
+    y = tf.one_hot(y, depth=10)
+    return x,y
+
+
+batchsz = 128
+(x, y), (x_test, y_test) = datasets.mnist.load_data()
+print('datasets:', x.shape, y.shape, x.min(), x.max())
+
+
+
+idx = tf.range(60000)
+idx = tf.random.shuffle(idx)
+x_train, y_train = tf.gather(x, idx[:50000]), tf.gather(y, idx[:50000])
+x_val, y_val = tf.gather(x, idx[-10000:]) , tf.gather(y, idx[-10000:])
+print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)
+db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
+db_train = db_train.map(preprocess).shuffle(50000).batch(batchsz)
+
+db_val = tf.data.Dataset.from_tensor_slices((x_val,y_val))
+db_val = db_val.map(preprocess).shuffle(10000).batch(batchsz)
+
+
+
+db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
+db_test = db_test.map(preprocess).batch(batchsz) 
+
+sample = next(iter(db_train))
+print(sample[0].shape, sample[1].shape)
+
+
+network = Sequential([layers.Dense(256, activation='relu'),
+                     layers.Dense(128, activation='relu'),
+                     layers.Dense(64, activation='relu'),
+                     layers.Dense(32, activation='relu'),
+                     layers.Dense(10)])
+network.build(input_shape=(None, 28*28))
+network.summary()
+
+
+
+
+network.compile(optimizer=optimizers.Adam(lr=0.01),
+		loss=tf.losses.CategoricalCrossentropy(from_logits=True),
+		metrics=['accuracy']
+	)
+
+network.fit(db_train, epochs=6, validation_data=db_val, validation_freq=2)
+
+print('Test performance:') 
+network.evaluate(db_test)
+ 
+
+sample = next(iter(db_test))
+x = sample[0]
+y = sample[1] # one-hot
+pred = network.predict(x) # [b, 10]
+# convert back to number 
+y = tf.argmax(y, axis=1)
+pred = tf.argmax(pred, axis=1)
+
+print(pred)
+print(y)

BIN
ch09-过拟合/交叉验证.pdf


BIN
ch09-过拟合/学习率与动量.pdf


BIN
ch09-过拟合/过拟合与欠拟合.pdf


+ 0 - 36
ch09/nb.py

@@ -1,36 +0,0 @@
-#%%
-import tensorflow as tf 
-from    tensorflow.keras import layers
-
-pip install -U scikit-learn
-
-#%%
-# 添加dropout操作
-x = tf.nn.dropout(x, rate=0.5)
-# 添加Dropout层
-model.add(layers.Dropout(rate=0.5))
-
-# 手动计算每个张量的范数
-loss_reg = lambda_ * tf.reduce_sum(tf.square(w))
-# 在层方式时添加范数函数
-Dense(256, activation='relu',
-                    kernel_regularizer=regularizers.l2(_lambda))
-
-#%%
-#                     
-# 创建网络参数w1,w2
-w1 = tf.random.normal([4,3])
-w2 = tf.random.normal([4,2])
-# 计算L1正则化项
-loss_reg = tf.reduce_sum(tf.math.abs(w1))\
-    + tf.reduce_sum(tf.math.abs(w2))
-
-
-# 计算L2正则化项
-loss_reg = tf.reduce_sum(tf.square(w1))\
-    + tf.reduce_sum(tf.square(w2))
-
-#%%
-loss_reg
-
-#%%

BIN
ch10-卷积神经网络/BatchNorm.pdf


BIN
ch10-卷积神经网络/CIFAR与VGG实战.pdf


BIN
ch10-卷积神经网络/ResNet与DenseNet.pdf


Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff