{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from sklearn.naive_bayes import GaussianNB#导入了高斯朴素贝叶斯函数\n", "import numpy as np\n", "import random" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "下边是高斯朴素贝叶斯的使用方法,包括创建分类器、训练分类器以及使用分类器进行预测" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2]\n" ] } ], "source": [ "data = np.array([[-1,-1], [-2,-1], [-3,-2], [1,1], [2,1],[3,2]])\n", "label = np.array([1,1,1,2,2,2])\n", "clf = GaussianNB()\n", "clf.fit(data, label)\n", "print(clf.predict([[-0.8,3]]))" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pylab as pl\n", "\n", "#import numpy as np\n", "#import matplotlib.pyplot as plt\n", "#plt.ioff()\n", "\n", "def prettyPicture(clf, X_test, y_test):\n", " x_min = 0.0; x_max = 1.0\n", " y_min = 0.0; y_max = 1.0\n", "\n", " # Plot the decision boundary. For that, we will assign a color to each\n", " # point in the mesh [x_min, m_max]x[y_min, y_max].\n", " h = .01 # step size in the mesh\n", " xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n", " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", "\n", " # Put the result into a color plot\n", " Z = Z.reshape(xx.shape)\n", " plt.xlim(xx.min(), xx.max())\n", " plt.ylim(yy.min(), yy.max())\n", "\n", " plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)\n", "\n", " # Plot also the test points\n", " grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]\n", " bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]\n", " grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]\n", " bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]\n", "\n", " plt.scatter(grade_sig, bumpy_sig, color = \"b\", label=\"fast\")\n", " plt.scatter(grade_bkg, bumpy_bkg, color = \"r\", label=\"slow\")\n", " plt.legend()\n", " plt.xlabel(\"bumpiness\")\n", " plt.ylabel(\"grade\")\n", "\n", " plt.savefig(\"test.png\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# 此处产生一个训练数据集\n", "def makeTerrainData(n_points=1000):\n", "###############################################################################\n", "### make the toy dataset\n", " random.seed(42)\n", " grade = [random.random() for ii in range(0,n_points)]\n", " bumpy = [random.random() for ii in range(0,n_points)]\n", " error = [random.random() for ii in range(0,n_points)]\n", " y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)]\n", " for ii in range(0, len(y)):\n", " if grade[ii]>0.8 or bumpy[ii]>0.8:\n", " y[ii] = 1.0\n", "\n", "### split into train/test sets\n", " X = [[gg, ss] for gg, ss in zip(grade, bumpy)]\n", " split = int(0.75*n_points)\n", " X_train = X[0:split]\n", " X_test = X[split:]\n", " y_train = y[0:split]\n", " y_test = y[split:]\n", "\n", " grade_sig = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==0]\n", " bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==0]\n", " grade_bkg = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==1]\n", " bumpy_bkg = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==1]\n", "\n", "# training_data = {\"fast\":{\"grade\":grade_sig, \"bumpiness\":bumpy_sig}\n", "# , \"slow\":{\"grade\":grade_bkg, \"bumpiness\":bumpy_bkg}}\n", "\n", "\n", " grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]\n", " bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]\n", " grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]\n", " bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]\n", "\n", " test_data = {\"fast\":{\"grade\":grade_sig, \"bumpiness\":bumpy_sig}\n", " , \"slow\":{\"grade\":grade_bkg, \"bumpiness\":bumpy_bkg}}\n", "\n", " return X_train, y_train, X_test, y_test\n", "# return training_data, test_data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "分类器的正确率是 88.4 %\n" ] } ], "source": [ "f_train,l_train,f_test,l_test = makeTerrainData()\n", "clf = GaussianNB()\n", "clf.fit(f_train, l_train)\n", "result = clf.predict(f_test)\n", "#print(result)\n", "\n", "num = 0\n", "corr = 0\n", "#print(len(result),len(l_test))\n", "\n", "for x in range(0,len(result)):\n", " if result[x] == l_test[x]:\n", " corr+=1\n", " num += 1\n", "print(\"分类器的正确率是\",corr/num*100,\"%\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }