|
@@ -21,8 +21,13 @@ $$\Delta \theta_j = -\eta \cfrac{\partial E_k}{\partial \theta_j}$$
|
|
|
$$
|
|
$$
|
|
|
\begin{aligned}
|
|
\begin{aligned}
|
|
|
\cfrac{\partial E_k}{\partial \theta_j} &= \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot\cfrac{\partial \hat{y}_j^k}{\partial \theta_j} \\
|
|
\cfrac{\partial E_k}{\partial \theta_j} &= \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot\cfrac{\partial \hat{y}_j^k}{\partial \theta_j} \\
|
|
|
-&= (\hat{y}_j^k-y_j^k) \cdot f^{\prime}(\beta_j-\theta_j) \cdot (-1) \\
|
|
|
|
|
-&= -(\hat{y}_j^k-y_j^k)f^{\prime}(\beta_j-\theta_j) \\
|
|
|
|
|
|
|
+&= \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot\cfrac{\partial [f(\beta_j-\theta_j)]}{\partial \theta_j} \\
|
|
|
|
|
+&=\cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot f^{\prime}(\beta_j-\theta_j) \times (-1) \\
|
|
|
|
|
+&=\cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot f\left(\beta_{j}-\theta_{j}\right)\times\left[1-f\left(\beta_{j}-\theta_{j}\right)\right] \times (-1) \\
|
|
|
|
|
+&=\cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \hat{y}_j^k\left(1-\hat{y}_j^k\right) \times (-1) \\
|
|
|
|
|
+&=\cfrac{\partial\left[ \cfrac{1}{2} \sum\limits_{j=1}^{l}\left(\hat{y}_{j}^{k}-y_{j}^{k}\right)^{2}\right]}{\partial \hat{y}_{j}^{k}} \cdot \hat{y}_j^k\left(1-\hat{y}_j^k\right) \times (-1) \\
|
|
|
|
|
+&=\cfrac{1}{2}\times 2(\hat{y}_j^k-y_j^k)\times 1 \cdot\hat{y}_j^k\left(1-\hat{y}_j^k\right) \times (-1) \\
|
|
|
|
|
+&=(y_j^k-\hat{y}_j^k)\hat{y}_j^k\left(1-\hat{y}_j^k\right) \\
|
|
|
&= g_j
|
|
&= g_j
|
|
|
\end{aligned}
|
|
\end{aligned}
|
|
|
$$
|
|
$$
|
|
@@ -46,7 +51,7 @@ $$
|
|
|
\end{aligned}
|
|
\end{aligned}
|
|
|
$$
|
|
$$
|
|
|
所以
|
|
所以
|
|
|
-$$\Delta v_{ih} = -\eta \cdot -e_h \cdot x_i=\eta e_h x_i$$
|
|
|
|
|
|
|
+$$\Delta v_{ih} =-\eta \cfrac{\partial E_k}{\partial v_{ih}} =\eta e_h x_i$$
|
|
|
## 5.14
|
|
## 5.14
|
|
|
$$\Delta \gamma_h= -\eta e_h$$
|
|
$$\Delta \gamma_h= -\eta e_h$$
|
|
|
[推导]:因为
|
|
[推导]:因为
|
|
@@ -57,8 +62,10 @@ $$
|
|
|
\cfrac{\partial E_k}{\partial \gamma_h} &= \sum_{j=1}^{l} \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \cfrac{\partial \hat{y}_j^k}{\partial \beta_j} \cdot \cfrac{\partial \beta_j}{\partial b_h} \cdot \cfrac{\partial b_h}{\partial \gamma_h} \\
|
|
\cfrac{\partial E_k}{\partial \gamma_h} &= \sum_{j=1}^{l} \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \cfrac{\partial \hat{y}_j^k}{\partial \beta_j} \cdot \cfrac{\partial \beta_j}{\partial b_h} \cdot \cfrac{\partial b_h}{\partial \gamma_h} \\
|
|
|
&= \sum_{j=1}^{l} \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \cfrac{\partial \hat{y}_j^k}{\partial \beta_j} \cdot \cfrac{\partial \beta_j}{\partial b_h} \cdot f^{\prime}(\alpha_h-\gamma_h) \cdot (-1) \\
|
|
&= \sum_{j=1}^{l} \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \cfrac{\partial \hat{y}_j^k}{\partial \beta_j} \cdot \cfrac{\partial \beta_j}{\partial b_h} \cdot f^{\prime}(\alpha_h-\gamma_h) \cdot (-1) \\
|
|
|
&= -\sum_{j=1}^{l} \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \cfrac{\partial \hat{y}_j^k}{\partial \beta_j} \cdot w_{hj} \cdot f^{\prime}(\alpha_h-\gamma_h)\\
|
|
&= -\sum_{j=1}^{l} \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \cfrac{\partial \hat{y}_j^k}{\partial \beta_j} \cdot w_{hj} \cdot f^{\prime}(\alpha_h-\gamma_h)\\
|
|
|
|
|
+&= -\sum_{j=1}^{l} \cfrac{\partial E_k}{\partial \hat{y}_j^k} \cdot \cfrac{\partial \hat{y}_j^k}{\partial \beta_j} \cdot w_{hj} \cdot b_h(1-b_h)\\
|
|
|
|
|
+&= \sum_{j=1}^{l}g_j\cdot w_{hj} \cdot b_h(1-b_h)\\
|
|
|
&=e_h
|
|
&=e_h
|
|
|
\end{aligned}
|
|
\end{aligned}
|
|
|
$$
|
|
$$
|
|
|
所以
|
|
所以
|
|
|
-$$\Delta \gamma_h= -\eta e_h$$
|
|
|
|
|
|
|
+$$\Delta \gamma_h=-\eta\cfrac{\partial E_k}{\partial \gamma_h} = -\eta e_h$$
|