{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-08-11T07:24:24.912061Z", "start_time": "2019-08-11T07:24:24.894997Z" } }, "outputs": [], "source": [ "#!/usr/bin/python\n", "import random\n", "\n", "\n", "def makeTerrainData(n_points=100000):\n", " ###############################################################################\n", " # make the toy dataset\n", " random.seed(42)\n", " grade = [random.random() for ii in range(0, n_points)]\n", " bumpy = [random.random() for ii in range(0, n_points)]\n", " error = [random.random() for ii in range(0, n_points)]\n", " y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii])\n", " for ii in range(0, n_points)]\n", " for ii in range(0, len(y)):\n", " if grade[ii] > 0.8 or bumpy[ii] > 0.8:\n", " y[ii] = 1.0\n", "\n", "# split into train/test sets\n", " X = [[gg, ss] for gg, ss in zip(grade, bumpy)]\n", " split = int(0.75*n_points)\n", " X_train = X[0:split]\n", " X_test = X[split:]\n", " y_train = y[0:split]\n", " y_test = y[split:]\n", "\n", " grade_sig = [X_train[ii][0]\n", " for ii in range(0, len(X_train)) if y_train[ii] == 0]\n", " bumpy_sig = [X_train[ii][1]\n", " for ii in range(0, len(X_train)) if y_train[ii] == 0]\n", " grade_bkg = [X_train[ii][0]\n", " for ii in range(0, len(X_train)) if y_train[ii] == 1]\n", " bumpy_bkg = [X_train[ii][1]\n", " for ii in range(0, len(X_train)) if y_train[ii] == 1]\n", "\n", " training_data = {\"fast\": {\"grade\": grade_sig, \"bumpiness\": bumpy_sig}, \"slow\": {\n", " \"grade\": grade_bkg, \"bumpiness\": bumpy_bkg}}\n", "\n", " grade_sig = [X_test[ii][0]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 0]\n", " bumpy_sig = [X_test[ii][1]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 0]\n", " grade_bkg = [X_test[ii][0]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 1]\n", " bumpy_bkg = [X_test[ii][1]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 1]\n", "\n", " test_data = {\"fast\": {\"grade\": grade_sig, \"bumpiness\": bumpy_sig},\n", " \"slow\": {\"grade\": grade_bkg, \"bumpiness\": bumpy_bkg}}\n", "\n", " return X_train, y_train, X_test, y_test" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-08-11T07:24:25.888057Z", "start_time": "2019-08-11T07:24:25.874599Z" } }, "outputs": [], "source": [ "#!/usr/bin/python\n", "\n", "import base64\n", "import json\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pylab as pl\n", "\n", "\n", "def prettyPicture(clf, X_test, y_test):\n", " x_min = 0.0\n", " x_max = 1.0\n", " y_min = 0.0\n", " y_max = 1.0\n", "\n", " # Plot the decision boundary. For that, we will assign a color to each\n", " # point in the mesh [x_min, m_max]x[y_min, y_max].\n", " h = .01 # step size in the mesh\n", " xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", " np.arange(y_min, y_max, h))\n", " Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n", "\n", " # Put the result into a color plot\n", " Z = Z.reshape(xx.shape)\n", " plt.xlim(xx.min(), xx.max())\n", " plt.ylim(yy.min(), yy.max())\n", "\n", " plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)\n", "\n", " # Plot also the test points\n", " grade_sig = [X_test[ii][0]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 0]\n", " bumpy_sig = [X_test[ii][1]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 0]\n", " grade_bkg = [X_test[ii][0]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 1]\n", " bumpy_bkg = [X_test[ii][1]\n", " for ii in range(0, len(X_test)) if y_test[ii] == 1]\n", "\n", " plt.scatter(grade_sig, bumpy_sig, color=\"b\", label=\"fast\")\n", " plt.scatter(grade_bkg, bumpy_bkg, color=\"r\", label=\"slow\")\n", " plt.legend()\n", " plt.xlabel(\"bumpiness\")\n", " plt.ylabel(\"grade\")\n", "\n", " #plt.savefig(\"test.png\",dpi=300)\n", "\n", "\n", "def output_image(name, format, bytes):\n", " image_start = \"BEGIN_IMAGE_f9825uweof8jw9fj4r8\"\n", " image_end = \"END_IMAGE_0238jfw08fjsiufhw8frs\"\n", " data = {}\n", " data['name'] = name\n", " data['format'] = format\n", " data['bytes'] = base64.encodestring(bytes)\n", " print(image_start+json.dumps(data)+image_end)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-08-11T07:24:27.027564Z", "start_time": "2019-08-11T07:24:27.022499Z" } }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-08-11T07:24:29.714375Z", "start_time": "2019-08-11T07:24:28.724489Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training data lenth is 75000\n", "training time: 0.072 s\n", "predicting time: 0.679 s\n", "23902 25000\n", "The accuracy is 95.608 %\n" ] } ], "source": [ "from time import time\n", "from sklearn import neighbors\n", "\n", "features_train, labels_train, features_test, labels_test = makeTerrainData()\n", "print(\"Training data lenth is\", len(features_train))\n", "\n", "\n", "clf = neighbors.KNeighborsClassifier()\n", "t0 = time()\n", "clf = clf.fit(features_train, labels_train)\n", "print(\"training time:\", round(time() - t0, 3), \"s\")\n", "\n", "t1 = time()\n", "result = clf.predict(features_test)\n", "print(\"predicting time:\", round(time() - t1, 3), \"s\")\n", "corr = 0\n", "for x in range(0, len(result)):\n", " if result[x] == labels_test[x]:\n", " corr = corr + 1\n", "print(corr, len(result))\n", "num = float(len(result))\n", "acc = round(corr/num*100, 3)\n", "print(\"The accuracy is\", acc, \"%\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }