nb.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. #%%
  2. import tensorflow as tf
  3. from tensorflow import keras
  4. from tensorflow.keras import layers
  5. import matplotlib.pyplot as plt
  6. #%%
  7. x = tf.range(10)
  8. x = tf.random.shuffle(x)
  9. # 创建共10个单词,每个单词用长度为4的向量表示的层
  10. net = layers.Embedding(10, 4)
  11. out = net(x)
  12. out
  13. #%%
  14. net.embeddings
  15. net.embeddings.trainable
  16. net.trainable = False
  17. #%%
  18. # 从预训练模型中加载词向量表
  19. embed_glove = load_embed('glove.6B.50d.txt')
  20. # 直接利用预训练的词向量表初始化Embedding层
  21. net.set_weights([embed_glove])
  22. #%%
  23. cell = layers.SimpleRNNCell(3)
  24. cell.build(input_shape=(None,4))
  25. cell.trainable_variables
  26. #%%
  27. # 初始化状态向量
  28. h0 = [tf.zeros([4, 64])]
  29. x = tf.random.normal([4, 80, 100])
  30. xt = x[:,0,:]
  31. # 构建输入特征f=100,序列长度s=80,状态长度=64的Cell
  32. cell = layers.SimpleRNNCell(64)
  33. out, h1 = cell(xt, h0) # 前向计算
  34. print(out.shape, h1[0].shape)
  35. print(id(out), id(h1[0]))
  36. #%%
  37. h = h0
  38. # 在序列长度的维度解开输入,得到xt:[b,f]
  39. for xt in tf.unstack(x, axis=1):
  40. out, h = cell(xt, h) # 前向计算
  41. # 最终输出可以聚合每个时间戳上的输出,也可以只取最后时间戳的输出
  42. out = out
  43. #%%
  44. x = tf.random.normal([4,80,100])
  45. xt = x[:,0,:] # 取第一个时间戳的输入x0
  46. # 构建2个Cell,先cell0,后cell1
  47. cell0 = layers.SimpleRNNCell(64)
  48. cell1 = layers.SimpleRNNCell(64)
  49. h0 = [tf.zeros([4,64])] # cell0的初始状态向量
  50. h1 = [tf.zeros([4,64])] # cell1的初始状态向量
  51. out0, h0 = cell0(xt, h0)
  52. out1, h1 = cell1(out0, h1)
  53. #%%
  54. for xt in tf.unstack(x, axis=1):
  55. # xtw作为输入,输出为out0
  56. out0, h0 = cell0(xt, h0)
  57. # 上一个cell的输出out0作为本cell的输入
  58. out1, h1 = cell1(out0, h1)
  59. #%%
  60. print(x.shape)
  61. # 保存上一层的所有时间戳上面的输出
  62. middle_sequences = []
  63. # 计算第一层的所有时间戳上的输出,并保存
  64. for xt in tf.unstack(x, axis=1):
  65. out0, h0 = cell0(xt, h0)
  66. middle_sequences.append(out0)
  67. # 计算第二层的所有时间戳上的输出
  68. # 如果不是末层,需要保存所有时间戳上面的输出
  69. for xt in middle_sequences:
  70. out1, h1 = cell1(xt, h1)
  71. #%%
  72. layer = layers.SimpleRNN(64)
  73. x = tf.random.normal([4, 80, 100])
  74. out = layer(x)
  75. out.shape
  76. #%%
  77. layer = layers.SimpleRNN(64,return_sequences=True)
  78. out = layer(x)
  79. out
  80. #%%
  81. net = keras.Sequential([ # 构建2层RNN网络
  82. # 除最末层外,都需要返回所有时间戳的输出
  83. layers.SimpleRNN(64, return_sequences=True),
  84. layers.SimpleRNN(64),
  85. ])
  86. out = net(x)
  87. #%%
  88. W = tf.ones([2,2]) # 任意创建某矩阵
  89. eigenvalues = tf.linalg.eigh(W)[0] # 计算特征值
  90. eigenvalues
  91. #%%
  92. val = [W]
  93. for i in range(10): # 矩阵相乘n次方
  94. val.append([val[-1]@W])
  95. # 计算L2范数
  96. norm = list(map(lambda x:tf.norm(x).numpy(),val))
  97. plt.plot(range(1,12),norm)
  98. plt.xlabel('n times')
  99. plt.ylabel('L2-norm')
  100. plt.savefig('w_n_times_1.svg')
  101. #%%
  102. W = tf.ones([2,2])*0.4 # 任意创建某矩阵
  103. eigenvalues = tf.linalg.eigh(W)[0] # 计算特征值
  104. print(eigenvalues)
  105. val = [W]
  106. for i in range(10):
  107. val.append([val[-1]@W])
  108. norm = list(map(lambda x:tf.norm(x).numpy(),val))
  109. plt.plot(range(1,12),norm)
  110. plt.xlabel('n times')
  111. plt.ylabel('L2-norm')
  112. plt.savefig('w_n_times_0.svg')
  113. #%%
  114. a=tf.random.uniform([2,2])
  115. tf.clip_by_value(a,0.4,0.6) # 梯度值裁剪
  116. #%%
  117. #%%
  118. a=tf.random.uniform([2,2]) * 5
  119. # 按范数方式裁剪
  120. b = tf.clip_by_norm(a, 5)
  121. tf.norm(a),tf.norm(b)
  122. #%%
  123. w1=tf.random.normal([3,3]) # 创建梯度张量1
  124. w2=tf.random.normal([3,3]) # 创建梯度张量2
  125. # 计算global norm
  126. global_norm=tf.math.sqrt(tf.norm(w1)**2+tf.norm(w2)**2)
  127. # 根据global norm和max norm=2裁剪
  128. (ww1,ww2),global_norm=tf.clip_by_global_norm([w1,w2],2)
  129. # 计算裁剪后的张量组的global norm
  130. global_norm2 = tf.math.sqrt(tf.norm(ww1)**2+tf.norm(ww2)**2)
  131. print(global_norm, global_norm2)
  132. #%%
  133. with tf.GradientTape() as tape:
  134. logits = model(x) # 前向传播
  135. loss = criteon(y, logits) # 误差计算
  136. # 计算梯度值
  137. grads = tape.gradient(loss, model.trainable_variables)
  138. grads, _ = tf.clip_by_global_norm(grads, 25) # 全局梯度裁剪
  139. # 利用裁剪后的梯度张量更新参数
  140. optimizer.apply_gradients(zip(grads, model.trainable_variables))
  141. #%%
  142. x = tf.random.normal([2,80,100])
  143. xt = x[:,0,:] # 得到一个时间戳的输入
  144. cell = layers.LSTMCell(64) # 创建Cell
  145. # 初始化状态和输出List,[h,c]
  146. state = [tf.zeros([2,64]),tf.zeros([2,64])]
  147. out, state = cell(xt, state) # 前向计算
  148. id(out),id(state[0]),id(state[1])
  149. #%%
  150. net = layers.LSTM(4)
  151. net.build(input_shape=(None,5,3))
  152. net.trainable_variables
  153. #%%
  154. net = layers.GRU(4)
  155. net.build(input_shape=(None,5,3))
  156. net.trainable_variables
  157. #%%
  158. # 初始化状态向量
  159. h = [tf.zeros([2,64])]
  160. cell = layers.GRUCell(64) # 新建GRU Cell
  161. for xt in tf.unstack(x, axis=1):
  162. out, h = cell(xt, h)
  163. out.shape
  164. #%%