{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.naive_bayes import GaussianNB#导入了高斯朴素贝叶斯函数\n",
    "import numpy as np\n",
    "import random"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "下边是高斯朴素贝叶斯的使用方法，包括创建分类器、训练分类器以及使用分类器进行预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2]\n"
     ]
    }
   ],
   "source": [
    "data = np.array([[-1,-1], [-2,-1], [-3,-2], [1,1], [2,1],[3,2]])\n",
    "label = np.array([1,1,1,2,2,2])\n",
    "clf = GaussianNB()\n",
    "clf.fit(data, label)\n",
    "print(clf.predict([[-0.8,3]]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pylab as pl\n",
    "\n",
    "#import numpy as np\n",
    "#import matplotlib.pyplot as plt\n",
    "#plt.ioff()\n",
    "\n",
    "def prettyPicture(clf, X_test, y_test):\n",
    "    x_min = 0.0; x_max = 1.0\n",
    "    y_min = 0.0; y_max = 1.0\n",
    "\n",
    "    # Plot the decision boundary. For that, we will assign a color to each\n",
    "    # point in the mesh [x_min, m_max]x[y_min, y_max].\n",
    "    h = .01  # step size in the mesh\n",
    "    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n",
    "    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
    "\n",
    "    # Put the result into a color plot\n",
    "    Z = Z.reshape(xx.shape)\n",
    "    plt.xlim(xx.min(), xx.max())\n",
    "    plt.ylim(yy.min(), yy.max())\n",
    "\n",
    "    plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)\n",
    "\n",
    "    # Plot also the test points\n",
    "    grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
    "    bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
    "    grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
    "    bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
    "\n",
    "    plt.scatter(grade_sig, bumpy_sig, color = \"b\", label=\"fast\")\n",
    "    plt.scatter(grade_bkg, bumpy_bkg, color = \"r\", label=\"slow\")\n",
    "    plt.legend()\n",
    "    plt.xlabel(\"bumpiness\")\n",
    "    plt.ylabel(\"grade\")\n",
    "\n",
    "    plt.savefig(\"test.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 此处产生一个训练数据集\n",
    "def makeTerrainData(n_points=1000):\n",
    "###############################################################################\n",
    "### make the toy dataset\n",
    "    random.seed(42)\n",
    "    grade = [random.random() for ii in range(0,n_points)]\n",
    "    bumpy = [random.random() for ii in range(0,n_points)]\n",
    "    error = [random.random() for ii in range(0,n_points)]\n",
    "    y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)]\n",
    "    for ii in range(0, len(y)):\n",
    "        if grade[ii]>0.8 or bumpy[ii]>0.8:\n",
    "            y[ii] = 1.0\n",
    "\n",
    "### split into train/test sets\n",
    "    X = [[gg, ss] for gg, ss in zip(grade, bumpy)]\n",
    "    split = int(0.75*n_points)\n",
    "    X_train = X[0:split]\n",
    "    X_test  = X[split:]\n",
    "    y_train = y[0:split]\n",
    "    y_test  = y[split:]\n",
    "\n",
    "    grade_sig = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==0]\n",
    "    bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==0]\n",
    "    grade_bkg = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==1]\n",
    "    bumpy_bkg = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==1]\n",
    "\n",
    "#    training_data = {\"fast\":{\"grade\":grade_sig, \"bumpiness\":bumpy_sig}\n",
    "#            , \"slow\":{\"grade\":grade_bkg, \"bumpiness\":bumpy_bkg}}\n",
    "\n",
    "\n",
    "    grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
    "    bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
    "    grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
    "    bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
    "\n",
    "    test_data = {\"fast\":{\"grade\":grade_sig, \"bumpiness\":bumpy_sig}\n",
    "            , \"slow\":{\"grade\":grade_bkg, \"bumpiness\":bumpy_bkg}}\n",
    "\n",
    "    return X_train, y_train, X_test, y_test\n",
    "#    return training_data, test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "分类器的正确率是 88.4 %\n"
     ]
    }
   ],
   "source": [
    "f_train,l_train,f_test,l_test = makeTerrainData()\n",
    "clf = GaussianNB()\n",
    "clf.fit(f_train, l_train)\n",
    "result = clf.predict(f_test)\n",
    "#print(result)\n",
    "\n",
    "num = 0\n",
    "corr = 0\n",
    "#print(len(result),len(l_test))\n",
    "\n",
    "for x in range(0,len(result)):\n",
    "    if result[x] == l_test[x]:\n",
    "        corr+=1\n",
    "    num += 1\n",
    "print(\"分类器的正确率是\",corr/num*100,\"%\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}