|
|
@@ -0,0 +1,191 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {
|
|
|
+ "id": "RlHTqK9CeZlQ"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import numpy as np\n",
|
|
|
+ "import tensorflow as tf\n",
|
|
|
+ "from tensorflow import keras\n",
|
|
|
+ "%matplotlib inline \n",
|
|
|
+ "import matplotlib.pyplot as plt"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {
|
|
|
+ "id": "GbiiGCT2epyL"
|
|
|
+ },
|
|
|
+ "source": [
|
|
|
+ "## Higher derivates wrt multi-dimensional tensors\n",
|
|
|
+ "Recall that if $ x \\in \\mathbb{R}^d$ and $y \\in \\mathbb{R}^D$, then $g := grad(y,x) \\in \\mathbb{R}^d$ with $g_i = \\sum_{j=1}^D \\frac{\\partial y_j}{\\partial x_i}$\n",
|
|
|
+ "\n",
|
|
|
+ "Let us consider the following example. Define two tensors\n",
|
|
|
+ "$$x = \\begin{bmatrix}x_1\\\\x_2\\\\x_3\\end{bmatrix}= \\begin{bmatrix}2.0\\\\3.0\\\\4.0\\end{bmatrix}, \\quad y = \\begin{bmatrix}y_1\\\\y_2\\\\y_3\\end{bmatrix}= \\begin{bmatrix}-1.0\\\\2.0\\\\-4.0\\end{bmatrix}$$\n",
|
|
|
+ "and the concatenated tensor\n",
|
|
|
+ "$$\n",
|
|
|
+ "z = \\begin{bmatrix}x_1 & y_1\\\\x_2 & y_2\\\\x_3 & y_3\\end{bmatrix} = \\begin{bmatrix}2.0 & -1.0 \\\\3.0 &2.0 \\\\4.0 & -4.0\\end{bmatrix}\n",
|
|
|
+ "$$\n",
|
|
|
+ "Let us define a scalar tensor\n",
|
|
|
+ "$$u = 3 x \\cdot x + 2 x\\cdot y + y \\cdot y = \\sum_{i=1}^3(3x_i^2 + 2x_i y_i + y_i^2)$$\n",
|
|
|
+ "Then the following is true for the gradient operations on $u$\n",
|
|
|
+ "$$\n",
|
|
|
+ "a := grad(u,x) = \\begin{bmatrix} \\frac{\\partial u}{\\partial x_1} \\\\ \\frac{\\partial u}{\\partial x_2} \\\\ \\frac{\\partial u}{\\partial x_3}\\end{bmatrix} =\n",
|
|
|
+ "\\begin{bmatrix} 6x_1 + 2y_1 \\\\ 6x_2 + 2y_2 \\\\ 6x_3 + 2y_3\\end{bmatrix} = \\begin{bmatrix} 10 \\\\ 22 \\\\ 16\\end{bmatrix}\n",
|
|
|
+ "$$ \n",
|
|
|
+ "$$\n",
|
|
|
+ "b := grad(a,x) = \\begin{bmatrix} \\sum_{j=1}^3 \\frac{\\partial a_j}{\\partial x_1} \\\\ \\sum_{j=1}^3 \\frac{\\partial a_j}{\\partial x_2} \\\\ \\sum_{j=1}^3 \\frac{\\partial a_j}{\\partial x_3}\\end{bmatrix} =\n",
|
|
|
+ "\\begin{bmatrix} 6 \\\\ 6 \\\\ 6\\end{bmatrix}\n",
|
|
|
+ "$$ \n",
|
|
|
+ "$$\n",
|
|
|
+ "c := grad(u,y) = \\begin{bmatrix} \\frac{\\partial u}{\\partial y_1} \\\\ \\frac{\\partial u}{\\partial y_2} \\\\ \\frac{\\partial u}{\\partial y_3}\\end{bmatrix} =\n",
|
|
|
+ "\\begin{bmatrix} 2x_1 + 2y_1 \\\\ 2x_2 + 2y_2 \\\\ 2x_3 + 2y_3\\end{bmatrix} = \\begin{bmatrix} 2 \\\\ 10 \\\\ 0\\end{bmatrix}\n",
|
|
|
+ "$$ \n",
|
|
|
+ "$$\n",
|
|
|
+ "d := grad(c,y) = \\begin{bmatrix} \\sum_{j=1}^3 \\frac{\\partial c_j}{\\partial y_1} \\\\ \\sum_{j=1}^3 \\frac{\\partial c_j}{\\partial y_2} \\\\ \\sum_{j=1}^3 \\frac{\\partial c_j}{\\partial y_3}\\end{bmatrix} =\n",
|
|
|
+ "\\begin{bmatrix} 2 \\\\ 2 \\\\ 2\\end{bmatrix}\n",
|
|
|
+ "$$ \n",
|
|
|
+ "$$\n",
|
|
|
+ "e := grad(u,z) = \\begin{bmatrix} \\frac{\\partial u}{\\partial z_{11}} & \\frac{\\partial u}{\\partial z_{12}} \\\\ \\frac{\\partial u}{\\partial z_{21}} & \\frac{\\partial u}{\\partial z_{22}} \\\\ \\frac{\\partial u}{\\partial z_{31}} & \\frac{\\partial u}{\\partial z_{32}}\\end{bmatrix} =\n",
|
|
|
+ "\\begin{bmatrix} 6x_1 + 2y_1 & 2x_1 + 2y_1 \\\\ 6x_2 + 2y_2 & 2x_2 + 2y_2 \\\\ 6x_3 + 2y_3 & 2x_3 + 2y_3\\end{bmatrix} = \\begin{bmatrix} 10 &2 \\\\ 22 & 10 \\\\ 16 & 0\\end{bmatrix}\n",
|
|
|
+ "$$ \n",
|
|
|
+ "$$\n",
|
|
|
+ "f := grad(e,z) = \\begin{bmatrix} \\sum_{i=1}^3\\sum_{j=1}^2\\frac{\\partial e_{ij}}{\\partial z_{11}} & \\sum_{i=1}^3\\sum_{j=1}^2\\frac{\\partial e_{ij}}{\\partial z_{12}} \\\\ \\sum_{i=1}^3\\sum_{j=1}^2\\frac{\\partial e_{ij}}{\\partial z_{21}} & \\sum_{i=1}^3\\sum_{j=1}^2\\frac{\\partial e_{ij}}{\\partial z_{22}} \\\\ \\sum_{i=1}^3\\sum_{j=1}^2\\frac{\\partial e_{ij}}{\\partial z_{31}} & \\sum_{i=1}^3\\sum_{j=1}^2\\frac{\\partial e_{ij}}{\\partial z_{32}}\\end{bmatrix} =\n",
|
|
|
+ "\\begin{bmatrix} 8 & 4 \\\\ 8 & 4 \\\\8 & 4\\end{bmatrix}\n",
|
|
|
+ "$$ \n",
|
|
|
+ "**Note that the columns of $f$ are not equal $b$ or $d$. This is because the computation of $f$ also involved cross-derivative terms. This is demonstrated below**"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "base_uri": "https://localhost:8080/"
|
|
|
+ },
|
|
|
+ "id": "xWpCZ0Tlejz_",
|
|
|
+ "outputId": "f658c4e3-4de7-4cf0-d26a-ac19dfb91189"
|
|
|
+ },
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\n",
|
|
|
+ "x: [[2.]\n",
|
|
|
+ " [3.]\n",
|
|
|
+ " [4.]]\n",
|
|
|
+ "\n",
|
|
|
+ "y: [[-1.]\n",
|
|
|
+ " [ 2.]\n",
|
|
|
+ " [-4.]]\n",
|
|
|
+ "\n",
|
|
|
+ "z: [[ 2. -1.]\n",
|
|
|
+ " [ 3. 2.]\n",
|
|
|
+ " [ 4. -4.]]\n",
|
|
|
+ "\n",
|
|
|
+ "u: [[ 9.]\n",
|
|
|
+ " [43.]\n",
|
|
|
+ " [32.]]\n",
|
|
|
+ "\n",
|
|
|
+ "grad(u,x): [[10.]\n",
|
|
|
+ " [22.]\n",
|
|
|
+ " [16.]]\n",
|
|
|
+ "\n",
|
|
|
+ "grad(dux,x): [[6.]\n",
|
|
|
+ " [6.]\n",
|
|
|
+ " [6.]]\n",
|
|
|
+ "\n",
|
|
|
+ "grad(u,y): [[ 2.]\n",
|
|
|
+ " [10.]\n",
|
|
|
+ " [ 0.]]\n",
|
|
|
+ "\n",
|
|
|
+ "grad(duy,y): [[2.]\n",
|
|
|
+ " [2.]\n",
|
|
|
+ " [2.]]\n",
|
|
|
+ "\n",
|
|
|
+ "grad(u,z): [[10. 2.]\n",
|
|
|
+ " [22. 10.]\n",
|
|
|
+ " [16. 0.]]\n",
|
|
|
+ "\n",
|
|
|
+ "grad(du,z): [[8. 4.]\n",
|
|
|
+ " [8. 4.]\n",
|
|
|
+ " [8. 4.]]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "x = tf.Variable([[2.0],[3.0],[4.0]])\n",
|
|
|
+ "y = tf.Variable([[-1.0],[2.0],[-4.0]])\n",
|
|
|
+ "z = tf.Variable(tf.concat((x,y),axis=1))\n",
|
|
|
+ "\n",
|
|
|
+ "print('\\nx: ',x.numpy())\n",
|
|
|
+ "print('\\ny: ',y.numpy())\n",
|
|
|
+ "print('\\nz: ',z.numpy())\n",
|
|
|
+ "\n",
|
|
|
+ "with tf.GradientTape(persistent=True) as t1:\n",
|
|
|
+ " with tf.GradientTape(persistent=True) as t2:\n",
|
|
|
+ " u = 3*x*x + 2*x*y + y*y\n",
|
|
|
+ " dux = t2.gradient(u,x)\n",
|
|
|
+ " duy = t2.gradient(u,y)\n",
|
|
|
+ "d2ux = t1.gradient(dux,x) \n",
|
|
|
+ "d2uy = t1.gradient(duy,y) \n",
|
|
|
+ "\n",
|
|
|
+ "del t1,t2\n",
|
|
|
+ "\n",
|
|
|
+ "print('\\nu: ',u.numpy())\n",
|
|
|
+ "print('\\ngrad(u,x): ',dux.numpy())\n",
|
|
|
+ "print('\\ngrad(dux,x): ',d2ux.numpy())\n",
|
|
|
+ "print('\\ngrad(u,y): ',duy.numpy())\n",
|
|
|
+ "print('\\ngrad(duy,y): ',d2uy.numpy())\n",
|
|
|
+ "\n",
|
|
|
+ "with tf.GradientTape() as t1:\n",
|
|
|
+ " with tf.GradientTape() as t2:\n",
|
|
|
+ " u = 3*z[:,0]*z[:,0] + 2*z[:,0]*z[:,1] + z[:,1]*z[:,1]\n",
|
|
|
+ " duz = t2.gradient(u,z)\n",
|
|
|
+ "d2uz = t1.gradient(duz,z) \n",
|
|
|
+ "\n",
|
|
|
+ "print('\\ngrad(u,z): ',duz.numpy())\n",
|
|
|
+ "print('\\ngrad(du,z): ',d2uz.numpy())"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {
|
|
|
+ "id": "Bd3KqR3aobdz"
|
|
|
+ },
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "colab": {
|
|
|
+ "collapsed_sections": [],
|
|
|
+ "name": "MultiDimGardient.ipynb",
|
|
|
+ "provenance": []
|
|
|
+ },
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.8.12"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 0
|
|
|
+}
|