ソースを参照

Add files via upload

L-zl 5 年 前
コミット
ff90bbaff9
13 ファイル変更1823 行追加2 行削除
  1. 264 0
      Adaboost.ipynb
  2. 194 0
      Guassian Naive Bayes.ipynb
  3. 227 0
      KNneighbor.ipynb
  4. 344 0
      Linear_regression.ipynb
  5. BIN
      Linear_regression.png
  6. 0 2
      README.md
  7. 256 0
      Random_Forest.ipynb
  8. BIN
      Random_Forest_acc.png
  9. 123 0
      SVM.ipynb
  10. 102 0
      feature scaling.ipynb
  11. 85 0
      ipynb_importer.py
  12. 178 0
      naïve bayes.ipynb
  13. 50 0
      prettyPicture.py

ファイルの差分が大きいため隠しています
+ 264 - 0
Adaboost.ipynb


+ 194 - 0
Guassian Naive Bayes.ipynb

@@ -0,0 +1,194 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.naive_bayes import GaussianNB#导入了高斯朴素贝叶斯函数\n",
+    "import numpy as np\n",
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "下边是高斯朴素贝叶斯的使用方法,包括创建分类器、训练分类器以及使用分类器进行预测"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[2]\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = np.array([[-1,-1], [-2,-1], [-3,-2], [1,1], [2,1],[3,2]])\n",
+    "label = np.array([1,1,1,2,2,2])\n",
+    "clf = GaussianNB()\n",
+    "clf.fit(data, label)\n",
+    "print(clf.predict([[-0.8,3]]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pylab as pl\n",
+    "\n",
+    "#import numpy as np\n",
+    "#import matplotlib.pyplot as plt\n",
+    "#plt.ioff()\n",
+    "\n",
+    "def prettyPicture(clf, X_test, y_test):\n",
+    "    x_min = 0.0; x_max = 1.0\n",
+    "    y_min = 0.0; y_max = 1.0\n",
+    "\n",
+    "    # Plot the decision boundary. For that, we will assign a color to each\n",
+    "    # point in the mesh [x_min, m_max]x[y_min, y_max].\n",
+    "    h = .01  # step size in the mesh\n",
+    "    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\n",
+    "    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
+    "\n",
+    "    # Put the result into a color plot\n",
+    "    Z = Z.reshape(xx.shape)\n",
+    "    plt.xlim(xx.min(), xx.max())\n",
+    "    plt.ylim(yy.min(), yy.max())\n",
+    "\n",
+    "    plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)\n",
+    "\n",
+    "    # Plot also the test points\n",
+    "    grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
+    "    bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
+    "    grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
+    "    bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
+    "\n",
+    "    plt.scatter(grade_sig, bumpy_sig, color = \"b\", label=\"fast\")\n",
+    "    plt.scatter(grade_bkg, bumpy_bkg, color = \"r\", label=\"slow\")\n",
+    "    plt.legend()\n",
+    "    plt.xlabel(\"bumpiness\")\n",
+    "    plt.ylabel(\"grade\")\n",
+    "\n",
+    "    plt.savefig(\"test.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 此处产生一个训练数据集\n",
+    "def makeTerrainData(n_points=1000):\n",
+    "###############################################################################\n",
+    "### make the toy dataset\n",
+    "    random.seed(42)\n",
+    "    grade = [random.random() for ii in range(0,n_points)]\n",
+    "    bumpy = [random.random() for ii in range(0,n_points)]\n",
+    "    error = [random.random() for ii in range(0,n_points)]\n",
+    "    y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii]) for ii in range(0,n_points)]\n",
+    "    for ii in range(0, len(y)):\n",
+    "        if grade[ii]>0.8 or bumpy[ii]>0.8:\n",
+    "            y[ii] = 1.0\n",
+    "\n",
+    "### split into train/test sets\n",
+    "    X = [[gg, ss] for gg, ss in zip(grade, bumpy)]\n",
+    "    split = int(0.75*n_points)\n",
+    "    X_train = X[0:split]\n",
+    "    X_test  = X[split:]\n",
+    "    y_train = y[0:split]\n",
+    "    y_test  = y[split:]\n",
+    "\n",
+    "    grade_sig = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==0]\n",
+    "    bumpy_sig = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==0]\n",
+    "    grade_bkg = [X_train[ii][0] for ii in range(0, len(X_train)) if y_train[ii]==1]\n",
+    "    bumpy_bkg = [X_train[ii][1] for ii in range(0, len(X_train)) if y_train[ii]==1]\n",
+    "\n",
+    "#    training_data = {\"fast\":{\"grade\":grade_sig, \"bumpiness\":bumpy_sig}\n",
+    "#            , \"slow\":{\"grade\":grade_bkg, \"bumpiness\":bumpy_bkg}}\n",
+    "\n",
+    "\n",
+    "    grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
+    "    bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]\n",
+    "    grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
+    "    bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]\n",
+    "\n",
+    "    test_data = {\"fast\":{\"grade\":grade_sig, \"bumpiness\":bumpy_sig}\n",
+    "            , \"slow\":{\"grade\":grade_bkg, \"bumpiness\":bumpy_bkg}}\n",
+    "\n",
+    "    return X_train, y_train, X_test, y_test\n",
+    "#    return training_data, test_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "分类器的正确率是 88.4 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "f_train,l_train,f_test,l_test = makeTerrainData()\n",
+    "clf = GaussianNB()\n",
+    "clf.fit(f_train, l_train)\n",
+    "result = clf.predict(f_test)\n",
+    "#print(result)\n",
+    "\n",
+    "num = 0\n",
+    "corr = 0\n",
+    "#print(len(result),len(l_test))\n",
+    "\n",
+    "for x in range(0,len(result)):\n",
+    "    if result[x] == l_test[x]:\n",
+    "        corr+=1\n",
+    "    num += 1\n",
+    "print(\"分类器的正确率是\",corr/num*100,\"%\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 227 - 0
KNneighbor.ipynb

@@ -0,0 +1,227 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-08-11T07:24:24.912061Z",
+     "start_time": "2019-08-11T07:24:24.894997Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#!/usr/bin/python\n",
+    "import random\n",
+    "\n",
+    "\n",
+    "def makeTerrainData(n_points=100000):\n",
+    "    ###############################################################################\n",
+    "    # make the toy dataset\n",
+    "    random.seed(42)\n",
+    "    grade = [random.random() for ii in range(0, n_points)]\n",
+    "    bumpy = [random.random() for ii in range(0, n_points)]\n",
+    "    error = [random.random() for ii in range(0, n_points)]\n",
+    "    y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii])\n",
+    "         for ii in range(0, n_points)]\n",
+    "    for ii in range(0, len(y)):\n",
+    "        if grade[ii] > 0.8 or bumpy[ii] > 0.8:\n",
+    "            y[ii] = 1.0\n",
+    "\n",
+    "# split into train/test sets\n",
+    "    X = [[gg, ss] for gg, ss in zip(grade, bumpy)]\n",
+    "    split = int(0.75*n_points)\n",
+    "    X_train = X[0:split]\n",
+    "    X_test = X[split:]\n",
+    "    y_train = y[0:split]\n",
+    "    y_test = y[split:]\n",
+    "\n",
+    "    grade_sig = [X_train[ii][0]\n",
+    "                 for ii in range(0, len(X_train)) if y_train[ii] == 0]\n",
+    "    bumpy_sig = [X_train[ii][1]\n",
+    "                 for ii in range(0, len(X_train)) if y_train[ii] == 0]\n",
+    "    grade_bkg = [X_train[ii][0]\n",
+    "                 for ii in range(0, len(X_train)) if y_train[ii] == 1]\n",
+    "    bumpy_bkg = [X_train[ii][1]\n",
+    "                 for ii in range(0, len(X_train)) if y_train[ii] == 1]\n",
+    "\n",
+    "    training_data = {\"fast\": {\"grade\": grade_sig, \"bumpiness\": bumpy_sig}, \"slow\": {\n",
+    "        \"grade\": grade_bkg, \"bumpiness\": bumpy_bkg}}\n",
+    "\n",
+    "    grade_sig = [X_test[ii][0]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
+    "    bumpy_sig = [X_test[ii][1]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
+    "    grade_bkg = [X_test[ii][0]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
+    "    bumpy_bkg = [X_test[ii][1]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
+    "\n",
+    "    test_data = {\"fast\": {\"grade\": grade_sig, \"bumpiness\": bumpy_sig},\n",
+    "                 \"slow\": {\"grade\": grade_bkg, \"bumpiness\": bumpy_bkg}}\n",
+    "\n",
+    "    return X_train, y_train, X_test, y_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-08-11T07:24:25.888057Z",
+     "start_time": "2019-08-11T07:24:25.874599Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#!/usr/bin/python\n",
+    "\n",
+    "import base64\n",
+    "import json\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pylab as pl\n",
+    "\n",
+    "\n",
+    "def prettyPicture(clf, X_test, y_test):\n",
+    "    x_min = 0.0\n",
+    "    x_max = 1.0\n",
+    "    y_min = 0.0\n",
+    "    y_max = 1.0\n",
+    "\n",
+    "    # Plot the decision boundary. For that, we will assign a color to each\n",
+    "    # point in the mesh [x_min, m_max]x[y_min, y_max].\n",
+    "    h = .01  # step size in the mesh\n",
+    "    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n",
+    "                         np.arange(y_min, y_max, h))\n",
+    "    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
+    "\n",
+    "    # Put the result into a color plot\n",
+    "    Z = Z.reshape(xx.shape)\n",
+    "    plt.xlim(xx.min(), xx.max())\n",
+    "    plt.ylim(yy.min(), yy.max())\n",
+    "\n",
+    "    plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)\n",
+    "\n",
+    "    # Plot also the test points\n",
+    "    grade_sig = [X_test[ii][0]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
+    "    bumpy_sig = [X_test[ii][1]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
+    "    grade_bkg = [X_test[ii][0]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
+    "    bumpy_bkg = [X_test[ii][1]\n",
+    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
+    "\n",
+    "    plt.scatter(grade_sig, bumpy_sig, color=\"b\", label=\"fast\")\n",
+    "    plt.scatter(grade_bkg, bumpy_bkg, color=\"r\", label=\"slow\")\n",
+    "    plt.legend()\n",
+    "    plt.xlabel(\"bumpiness\")\n",
+    "    plt.ylabel(\"grade\")\n",
+    "\n",
+    "    #plt.savefig(\"test.png\",dpi=300)\n",
+    "\n",
+    "\n",
+    "def output_image(name, format, bytes):\n",
+    "    image_start = \"BEGIN_IMAGE_f9825uweof8jw9fj4r8\"\n",
+    "    image_end = \"END_IMAGE_0238jfw08fjsiufhw8frs\"\n",
+    "    data = {}\n",
+    "    data['name'] = name\n",
+    "    data['format'] = format\n",
+    "    data['bytes'] = base64.encodestring(bytes)\n",
+    "    print(image_start+json.dumps(data)+image_end)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-08-11T07:24:27.027564Z",
+     "start_time": "2019-08-11T07:24:27.022499Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-08-11T07:24:29.714375Z",
+     "start_time": "2019-08-11T07:24:28.724489Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training data lenth is 75000\n",
+      "training time: 0.072 s\n",
+      "predicting time: 0.679 s\n",
+      "23902 25000\n",
+      "The accuracy is 95.608 %\n"
+     ]
+    }
+   ],
+   "source": [
+    "from time import time\n",
+    "from sklearn import neighbors\n",
+    "\n",
+    "features_train, labels_train, features_test, labels_test = makeTerrainData()\n",
+    "print(\"Training data lenth is\", len(features_train))\n",
+    "\n",
+    "\n",
+    "clf = neighbors.KNeighborsClassifier()\n",
+    "t0 = time()\n",
+    "clf = clf.fit(features_train, labels_train)\n",
+    "print(\"training time:\", round(time() - t0, 3), \"s\")\n",
+    "\n",
+    "t1 = time()\n",
+    "result = clf.predict(features_test)\n",
+    "print(\"predicting time:\", round(time() - t1, 3), \"s\")\n",
+    "corr = 0\n",
+    "for x in range(0, len(result)):\n",
+    "    if result[x] == labels_test[x]:\n",
+    "        corr = corr + 1\n",
+    "print(corr, len(result))\n",
+    "num = float(len(result))\n",
+    "acc = round(corr/num*100, 3)\n",
+    "print(\"The accuracy is\", acc, \"%\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

ファイルの差分が大きいため隠しています
+ 344 - 0
Linear_regression.ipynb


BIN
Linear_regression.png


+ 0 - 2
README.md

@@ -1,3 +1 @@
 # Intro-to-Mechaine-Learning
-
-Udacity course.

ファイルの差分が大きいため隠しています
+ 256 - 0
Random_Forest.ipynb


BIN
Random_Forest_acc.png


ファイルの差分が大きいため隠しています
+ 123 - 0
SVM.ipynb


+ 102 - 0
feature scaling.ipynb

@@ -0,0 +1,102 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 使用 Sklearn 进行特征缩放操作"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.preprocessing import MinMaxScaler\n",
+    "import numpy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.        ]\n",
+      " [0.41666667]\n",
+      " [1.        ]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "weights = numpy.array([[115],[140],[175]])\n",
+    "scaler = MinMaxScaler()\n",
+    "# fit 会找到 min 值和 max 值,而 transform 会把数据转换到这个范围内\n",
+    "# 所以使用 fit_transform 相当于同时做了两件事\n",
+    "rescaled_weights = scaler.fit_transform(weights)\n",
+    "print(rescaled_weights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  },
+  "varInspector": {
+   "cols": {
+    "lenName": 16,
+    "lenType": 16,
+    "lenVar": 40
+   },
+   "kernels_config": {
+    "python": {
+     "delete_cmd_postfix": "",
+     "delete_cmd_prefix": "del ",
+     "library": "var_list.py",
+     "varRefreshCmd": "print(var_dic_list())"
+    },
+    "r": {
+     "delete_cmd_postfix": ") ",
+     "delete_cmd_prefix": "rm(",
+     "library": "var_list.r",
+     "varRefreshCmd": "cat(var_dic_list()) "
+    }
+   },
+   "types_to_exclude": [
+    "module",
+    "function",
+    "builtin_function_or_method",
+    "instance",
+    "_Feature"
+   ],
+   "window_display": false
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

+ 85 - 0
ipynb_importer.py

@@ -0,0 +1,85 @@
+import io, os,sys,types
+from IPython import get_ipython
+from nbformat import read
+from IPython.core.interactiveshell import InteractiveShell
+
+class NotebookFinder(object):
+    """Module finder that locates Jupyter Notebooks"""
+    def __init__(self):
+        self.loaders = {}
+
+    def find_module(self, fullname, path=None):
+        nb_path = find_notebook(fullname, path)
+        if not nb_path:
+            return
+
+        key = path
+        if path:
+            # lists aren't hashable
+            key = os.path.sep.join(path)
+
+        if key not in self.loaders:
+            self.loaders[key] = NotebookLoader(path)
+        return self.loaders[key]
+
+def find_notebook(fullname, path=None):
+    """find a notebook, given its fully qualified name and an optional path
+
+    This turns "foo.bar" into "foo/bar.ipynb"
+    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
+    does not exist.
+    """
+    name = fullname.rsplit('.', 1)[-1]
+    if not path:
+        path = ['']
+    for d in path:
+        nb_path = os.path.join(d, name + ".ipynb")
+        if os.path.isfile(nb_path):
+            return nb_path
+        # let import Notebook_Name find "Notebook Name.ipynb"
+        nb_path = nb_path.replace("_", " ")
+        if os.path.isfile(nb_path):
+            return nb_path
+
+class NotebookLoader(object):
+    """Module Loader for Jupyter Notebooks"""
+    def __init__(self, path=None):
+        self.shell = InteractiveShell.instance()
+        self.path = path
+
+    def load_module(self, fullname):
+        """import a notebook as a module"""
+        path = find_notebook(fullname, self.path)
+
+        print ("importing Jupyter notebook from %s" % path)
+
+        # load the notebook object
+        with io.open(path, 'r', encoding='utf-8') as f:
+            nb = read(f, 4)
+
+
+        # create the module and add it to sys.modules
+        # if name in sys.modules:
+        #    return sys.modules[name]
+        mod = types.ModuleType(fullname)
+        mod.__file__ = path
+        mod.__loader__ = self
+        mod.__dict__['get_ipython'] = get_ipython
+        sys.modules[fullname] = mod
+
+        # extra work to ensure that magics that would affect the user_ns
+        # actually affect the notebook module's ns
+        save_user_ns = self.shell.user_ns
+        self.shell.user_ns = mod.__dict__
+
+        try:
+          for cell in nb.cells:
+            if cell.cell_type == 'code':
+                # transform the input to executable Python
+                code = self.shell.input_transformer_manager.transform_cell(cell.source)
+                # run the code in themodule
+                exec(code, mod.__dict__)
+        finally:
+            self.shell.user_ns = save_user_ns
+        return mod
+sys.meta_path.append(NotebookFinder())

ファイルの差分が大きいため隠しています
+ 178 - 0
naïve bayes.ipynb


+ 50 - 0
prettyPicture.py

@@ -0,0 +1,50 @@
+#!/usr/bin/python
+  
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab as pl
+
+
+def prettyPicture(clf, X_test, y_test):
+    x_min = 0.0; x_max = 1.0
+    y_min = 0.0; y_max = 1.0
+    
+    # Plot the decision boundary. For that, we will assign a color to each
+    # point in the mesh [x_min, m_max]x[y_min, y_max].
+    h = .01  # step size in the mesh
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
+
+    # Put the result into a color plot
+    Z = Z.reshape(xx.shape)
+    plt.xlim(xx.min(), xx.max())
+    plt.ylim(yy.min(), yy.max())
+
+    plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)
+
+    # Plot also the test points
+    grade_sig = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==0]
+    bumpy_sig = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==0]
+    grade_bkg = [X_test[ii][0] for ii in range(0, len(X_test)) if y_test[ii]==1]
+    bumpy_bkg = [X_test[ii][1] for ii in range(0, len(X_test)) if y_test[ii]==1]
+
+    plt.scatter(grade_sig, bumpy_sig, color = "b", label="fast")
+    plt.scatter(grade_bkg, bumpy_bkg, color = "r", label="slow")
+    plt.legend()
+    plt.xlabel("bumpiness")
+    plt.ylabel("grade")
+
+    #plt.savefig("test.png")
+
+import base64
+import json
+
+
+def output_image(name, format, bytes):
+    image_start = "BEGIN_IMAGE_f9825uweof8jw9fj4r8"
+    image_end = "END_IMAGE_0238jfw08fjsiufhw8frs"
+    data = {}
+    data['name'] = name
+    data['format'] = format
+    data['bytes'] = base64.encodestring(bytes)
+    print(image_start+json.dumps(data)+image_end)

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません