import numpy as np
x = np.array([[0,0,1],
[0,1,1],
[1,0,1],
[1,1,1]])
y = np.array([[0],
[1],
[1],
[0]])
print("x=",x)
print ("y=",y)
num_epochs = 600000
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1
print("syn0=",syn0)
print("syn1=",syn1)
#定义sigmoid函数和导数
def nonlin(x,deriv=False):
#可以这么求导是因为后面经过激活函数求值以后,实际上就是本身函数的值
if(deriv==True):
return x*(1-x)
return 1/(1+np.exp(-x))
xx=np.arange(-10,10,0.1)
ys=nonlin(xx)
yd=nonlin(xx)*(1-nonlin(xx))
import matplotlib.pyplot as plt
plt.plot(xx,ys)
plt.title("sigmoid")
plt.show()
plt.plot(xx,yd)
plt.title("derivative of sigmoid")
plt.show()
从上面可知,sigmoid函数经过求导后,每次损失0.75的值,可见网络层次深的时候很容易造成梯度消失
for j in range(num_epochs):
#训练三层的神经网络
k0 = x
k1 = nonlin(np.dot(k0, syn0))
k2 = nonlin(np.dot(k1, syn1))
#计算损失
k2_error = y - k2
if (j% 100000) == 0:
print ("Error:" ,str(np.mean(np.abs(k2_error))))
print ("k2:",k2)
#计算反向传播的方向
k2_delta = k2_error*nonlin(k2, deriv=True)
#计算损失
k1_error = k2_delta.dot(syn1.T)
#计算反向传播的方向
k1_delta= k1_error * nonlin(k1,deriv=True)
#修改权重
syn1 += k1.T.dot(k2_delta)
syn0 += k0.T.dot(k1_delta)