auto_efficency_regression.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. #%%
  2. from __future__ import absolute_import, division, print_function, unicode_literals
  3. import pathlib
  4. import os
  5. os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
  6. import matplotlib.pyplot as plt
  7. import pandas as pd
  8. import seaborn as sns
  9. import tensorflow as tf
  10. from tensorflow import keras
  11. from tensorflow.keras import layers, losses
  12. print(tf.__version__)
  13. # 在线下载汽车效能数据集
  14. dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
  15. # 效能(公里数每加仑),气缸数,排量,马力,重量
  16. # 加速度,型号年份,产地
  17. column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
  18. 'Acceleration', 'Model Year', 'Origin']
  19. raw_dataset = pd.read_csv(dataset_path, names=column_names,
  20. na_values = "?", comment='\t',
  21. sep=" ", skipinitialspace=True)
  22. dataset = raw_dataset.copy()
  23. # 查看部分数据
  24. dataset.tail()
  25. dataset.head()
  26. dataset
  27. #%%
  28. #%%
  29. # 统计空白数据,并清除
  30. dataset.isna().sum()
  31. dataset = dataset.dropna()
  32. dataset.isna().sum()
  33. dataset
  34. #%%
  35. # 处理类别型数据,其中origin列代表了类别1,2,3,分布代表产地:美国、欧洲、日本
  36. # 其弹出这一列
  37. origin = dataset.pop('Origin')
  38. # 根据origin列来写入新列
  39. dataset['USA'] = (origin == 1)*1.0
  40. dataset['Europe'] = (origin == 2)*1.0
  41. dataset['Japan'] = (origin == 3)*1.0
  42. dataset.tail()
  43. # 切分为训练集和测试集
  44. train_dataset = dataset.sample(frac=0.8,random_state=0)
  45. test_dataset = dataset.drop(train_dataset.index)
  46. #%% 统计数据
  47. sns.pairplot(train_dataset[["Cylinders", "Displacement", "Weight", "MPG"]],
  48. diag_kind="kde")
  49. #%%
  50. # 查看训练集的输入X的统计数据
  51. train_stats = train_dataset.describe()
  52. train_stats.pop("MPG")
  53. train_stats = train_stats.transpose()
  54. train_stats
  55. # 移动MPG油耗效能这一列为真实标签Y
  56. train_labels = train_dataset.pop('MPG')
  57. test_labels = test_dataset.pop('MPG')
  58. # 标准化数据
  59. def norm(x):
  60. return (x - train_stats['mean']) / train_stats['std']
  61. normed_train_data = norm(train_dataset)
  62. normed_test_data = norm(test_dataset)
  63. #%%
  64. print(normed_train_data.shape,train_labels.shape)
  65. print(normed_test_data.shape, test_labels.shape)
  66. #%%
  67. class Network(keras.Model):
  68. # 回归网络
  69. def __init__(self):
  70. super(Network, self).__init__()
  71. # 创建3个全连接层
  72. self.fc1 = layers.Dense(64, activation='relu')
  73. self.fc2 = layers.Dense(64, activation='relu')
  74. self.fc3 = layers.Dense(1)
  75. def call(self, inputs, training=None, mask=None):
  76. # 依次通过3个全连接层
  77. x = self.fc1(inputs)
  78. x = self.fc2(x)
  79. x = self.fc3(x)
  80. return x
  81. model = Network()
  82. model.build(input_shape=(None, 9))
  83. model.summary()
  84. optimizer = tf.keras.optimizers.RMSprop(0.001)
  85. train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values, train_labels.values))
  86. train_db = train_db.shuffle(100).batch(32)
  87. # # 未训练时测试
  88. # example_batch = normed_train_data[:10]
  89. # example_result = model.predict(example_batch)
  90. # example_result
  91. train_mae_losses = []
  92. test_mae_losses = []
  93. for epoch in range(200):
  94. for step, (x,y) in enumerate(train_db):
  95. with tf.GradientTape() as tape:
  96. out = model(x)
  97. loss = tf.reduce_mean(losses.MSE(y, out))
  98. mae_loss = tf.reduce_mean(losses.MAE(y, out))
  99. if step % 10 == 0:
  100. print(epoch, step, float(loss))
  101. grads = tape.gradient(loss, model.trainable_variables)
  102. optimizer.apply_gradients(zip(grads, model.trainable_variables))
  103. train_mae_losses.append(float(mae_loss))
  104. out = model(tf.constant(normed_test_data.values))
  105. test_mae_losses.append(tf.reduce_mean(losses.MAE(test_labels, out)))
  106. plt.figure()
  107. plt.xlabel('Epoch')
  108. plt.ylabel('MAE')
  109. plt.plot(train_mae_losses, label='Train')
  110. plt.plot(test_mae_losses, label='Test')
  111. plt.legend()
  112. # plt.ylim([0,10])
  113. plt.legend()
  114. plt.savefig('auto.svg')
  115. plt.show()
  116. #%%