{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-11T07:24:24.912061Z",
     "start_time": "2019-08-11T07:24:24.894997Z"
    }
   },
   "outputs": [],
   "source": [
    "#!/usr/bin/python\n",
    "import random\n",
    "\n",
    "\n",
    "def makeTerrainData(n_points=100000):\n",
    "    ###############################################################################\n",
    "    # make the toy dataset\n",
    "    random.seed(42)\n",
    "    grade = [random.random() for ii in range(0, n_points)]\n",
    "    bumpy = [random.random() for ii in range(0, n_points)]\n",
    "    error = [random.random() for ii in range(0, n_points)]\n",
    "    y = [round(grade[ii]*bumpy[ii]+0.3+0.1*error[ii])\n",
    "         for ii in range(0, n_points)]\n",
    "    for ii in range(0, len(y)):\n",
    "        if grade[ii] > 0.8 or bumpy[ii] > 0.8:\n",
    "            y[ii] = 1.0\n",
    "\n",
    "# split into train/test sets\n",
    "    X = [[gg, ss] for gg, ss in zip(grade, bumpy)]\n",
    "    split = int(0.75*n_points)\n",
    "    X_train = X[0:split]\n",
    "    X_test = X[split:]\n",
    "    y_train = y[0:split]\n",
    "    y_test = y[split:]\n",
    "\n",
    "    grade_sig = [X_train[ii][0]\n",
    "                 for ii in range(0, len(X_train)) if y_train[ii] == 0]\n",
    "    bumpy_sig = [X_train[ii][1]\n",
    "                 for ii in range(0, len(X_train)) if y_train[ii] == 0]\n",
    "    grade_bkg = [X_train[ii][0]\n",
    "                 for ii in range(0, len(X_train)) if y_train[ii] == 1]\n",
    "    bumpy_bkg = [X_train[ii][1]\n",
    "                 for ii in range(0, len(X_train)) if y_train[ii] == 1]\n",
    "\n",
    "    training_data = {\"fast\": {\"grade\": grade_sig, \"bumpiness\": bumpy_sig}, \"slow\": {\n",
    "        \"grade\": grade_bkg, \"bumpiness\": bumpy_bkg}}\n",
    "\n",
    "    grade_sig = [X_test[ii][0]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
    "    bumpy_sig = [X_test[ii][1]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
    "    grade_bkg = [X_test[ii][0]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
    "    bumpy_bkg = [X_test[ii][1]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
    "\n",
    "    test_data = {\"fast\": {\"grade\": grade_sig, \"bumpiness\": bumpy_sig},\n",
    "                 \"slow\": {\"grade\": grade_bkg, \"bumpiness\": bumpy_bkg}}\n",
    "\n",
    "    return X_train, y_train, X_test, y_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-11T07:24:25.888057Z",
     "start_time": "2019-08-11T07:24:25.874599Z"
    }
   },
   "outputs": [],
   "source": [
    "#!/usr/bin/python\n",
    "\n",
    "import base64\n",
    "import json\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pylab as pl\n",
    "\n",
    "\n",
    "def prettyPicture(clf, X_test, y_test):\n",
    "    x_min = 0.0\n",
    "    x_max = 1.0\n",
    "    y_min = 0.0\n",
    "    y_max = 1.0\n",
    "\n",
    "    # Plot the decision boundary. For that, we will assign a color to each\n",
    "    # point in the mesh [x_min, m_max]x[y_min, y_max].\n",
    "    h = .01  # step size in the mesh\n",
    "    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n",
    "                         np.arange(y_min, y_max, h))\n",
    "    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])\n",
    "\n",
    "    # Put the result into a color plot\n",
    "    Z = Z.reshape(xx.shape)\n",
    "    plt.xlim(xx.min(), xx.max())\n",
    "    plt.ylim(yy.min(), yy.max())\n",
    "\n",
    "    plt.pcolormesh(xx, yy, Z, cmap=pl.cm.seismic)\n",
    "\n",
    "    # Plot also the test points\n",
    "    grade_sig = [X_test[ii][0]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
    "    bumpy_sig = [X_test[ii][1]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 0]\n",
    "    grade_bkg = [X_test[ii][0]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
    "    bumpy_bkg = [X_test[ii][1]\n",
    "                 for ii in range(0, len(X_test)) if y_test[ii] == 1]\n",
    "\n",
    "    plt.scatter(grade_sig, bumpy_sig, color=\"b\", label=\"fast\")\n",
    "    plt.scatter(grade_bkg, bumpy_bkg, color=\"r\", label=\"slow\")\n",
    "    plt.legend()\n",
    "    plt.xlabel(\"bumpiness\")\n",
    "    plt.ylabel(\"grade\")\n",
    "\n",
    "    #plt.savefig(\"test.png\",dpi=300)\n",
    "\n",
    "\n",
    "def output_image(name, format, bytes):\n",
    "    image_start = \"BEGIN_IMAGE_f9825uweof8jw9fj4r8\"\n",
    "    image_end = \"END_IMAGE_0238jfw08fjsiufhw8frs\"\n",
    "    data = {}\n",
    "    data['name'] = name\n",
    "    data['format'] = format\n",
    "    data['bytes'] = base64.encodestring(bytes)\n",
    "    print(image_start+json.dumps(data)+image_end)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-11T07:24:27.027564Z",
     "start_time": "2019-08-11T07:24:27.022499Z"
    }
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-11T07:24:29.714375Z",
     "start_time": "2019-08-11T07:24:28.724489Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training data lenth is 75000\n",
      "training time: 0.072 s\n",
      "predicting time: 0.679 s\n",
      "23902 25000\n",
      "The accuracy is 95.608 %\n"
     ]
    }
   ],
   "source": [
    "from time import time\n",
    "from sklearn import neighbors\n",
    "\n",
    "features_train, labels_train, features_test, labels_test = makeTerrainData()\n",
    "print(\"Training data lenth is\", len(features_train))\n",
    "\n",
    "\n",
    "clf = neighbors.KNeighborsClassifier()\n",
    "t0 = time()\n",
    "clf = clf.fit(features_train, labels_train)\n",
    "print(\"training time:\", round(time() - t0, 3), \"s\")\n",
    "\n",
    "t1 = time()\n",
    "result = clf.predict(features_test)\n",
    "print(\"predicting time:\", round(time() - t1, 3), \"s\")\n",
    "corr = 0\n",
    "for x in range(0, len(result)):\n",
    "    if result[x] == labels_test[x]:\n",
    "        corr = corr + 1\n",
    "print(corr, len(result))\n",
    "num = float(len(result))\n",
    "acc = round(corr/num*100, 3)\n",
    "print(\"The accuracy is\", acc, \"%\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}