网盘地址:
链接: https://pan.baidu.com/s/1Pn5jnZBO_X-VGeNk0W72Pg
提取码: db54
一般的卷积神经网络由多个卷积层构成,每个卷积层中通常会进行如下几个操作:
图像通过多个不同的卷积核的滤波,并加偏置(bias),特取出局部特征,每个卷积核会映射出一个新的2D图像。
将前面卷积核的滤波输出结果,进行非线性的激活函数处理。目前最常见的是使用ReLU函数,而以前Sigmoid函数用得比较多。
对激活函数的结果再进行池化操作(即降采样,比如将2*2的图片将为1*1的图片),目前一般是使用最大池化,保留最显著的特征,并提升模型的畸变容忍能力。
总结一下,CNN的要点是局部连接(local Connection)、权值共享(Weight Sharing)和池化层(Pooling)中的降采样(Down-Sampling)。
#Conv.py import numpy as np from scipy import signal def Conv(x, W): (wrow, wcol, numFilters) = W.shape (xrow, xcol) = x.shape yrow = xrow - wrow + 1 ycol = xcol - wcol + 1 y = np.zeros((yrow, ycol, numFilters)) for k in range(numFilters): filter = W[:, :, k] filter = np.rot90(np.squeeze(filter), 2) y[:, :, k] = signal.convolve2d(x, filter, 'valid') return y
#LoadMnistData.py """ Originally written by Martin Thoma https://martin-thoma.com/classify-mnist-with-pybrain/ """ from struct import unpack import gzip from numpy import uint8, zeros, float32 # Read input images and labels(0-9). # Return it as list of tuples. # def LoadMnistData(imagefile, labelfile): # Open the images with gzip in read binary mode images = gzip.open(imagefile, 'rb') labels = gzip.open(labelfile, 'rb') # Read the binary data # We have to get big endian unsigned int. So we need '>I' # Get metadata for images images.read(4) # skip the magic_number number_of_images = images.read(4) number_of_images = unpack('>I', number_of_images)[0] rows = images.read(4) rows = unpack('>I', rows)[0] cols = images.read(4) cols = unpack('>I', cols)[0] # Get metadata for labels labels.read(4) # skip the magic_number N = labels.read(4) N = unpack('>I', N)[0] if number_of_images != N: raise Exception('number of labels did not match the number of images') # Get the data x = zeros((N, rows, cols), dtype=float32) # Initialize numpy array y = zeros((N, 1), dtype=uint8) # Initialize numpy array for i in range(N): if i % 1000 == 0: print("i: %i" % i) for row in range(rows): for col in range(cols): tmp_pixel = images.read(1) # Just a single byte tmp_pixel = unpack('>B', tmp_pixel)[0] x[i][row][col] = tmp_pixel tmp_label = labels.read(1) y[i] = unpack('>B', tmp_label)[0] return (x, y)
#MnistConv.py import numpy as np from scipy import signal from Softmax import * from ReLU import * from Conv import * from Pool import * def MnistConv(W1, W5, Wo, X, D): alpha = 0.01 beta = 0.95 momentum1 = np.zeros_like(W1) momentum5 = np.zeros_like(W5) momentumo = np.zeros_like(Wo) N = len(D) bsize = 100 blist = np.arange(0, N, bsize) for batch in range(len(blist)): dW1 = np.zeros_like(W1) dW5 = np.zeros_like(W5) dWo = np.zeros_like(Wo) begin = blist[batch] for k in range(begin, begin+bsize): # Forward pass = inference x = X[k, :, :] y1 = Conv(x, W1) y2 = ReLU(y1) y3 = Pool(y2) y4 = np.reshape(y3, (-1, 1)) v5 = np.matmul(W5, y4) y5 = ReLU(v5) v = np.matmul(Wo, y5) y = Softmax(v) # one-hot encoding d = np.zeros((10, 1)) d[D[k][0]][0] = 1 # Backpropagation e = d - y delta = e e5 = np.matmul(Wo.T, delta) # Hidden(ReLU) delta5 = (y5 > 0) * e5 e4 = np.matmul(W5.T, delta5) # Pooling layer e3 = np.reshape(e4, y3.shape) e2 = np.zeros_like(y2) # pooling W3 = np.ones_like(y2) / (2*2) for c in range(20): e2[:, :, c] = np.kron(e3[:, :, c], np.ones((2, 2))) * W3[:, :, c] delta2 = (y2 > 0) * e2 delta1_x = np.zeros_like(W1) for c in range(20): delta1_x[:, :, c] = signal.convolve2d(x[:, :], np.rot90(delta2[:, :, c], 2), 'valid') dW1 = dW1 + delta1_x dW5 = dW5 + np.matmul(delta5, y4.T) dWo = dWo + np.matmul(delta, y5.T) dW1 = dW1 / bsize dW5 = dW5 / bsize dWo = dWo / bsize momentum1 = alpha*dW1 + beta*momentum1 W1 = W1 + momentum1 momentum5 = alpha*dW5 + beta*momentum5 W5 = W5 + momentum5 momentumo = alpha*dWo + beta*momentumo Wo = Wo + momentumo return W1, W5, Wo
#Pool.py import numpy as np from scipy import signal def Pool(x): (xrow, xcol, numFilters) = x.shape y = np.zeros((int(xrow/2), int(xcol/2), numFilters)) for k in range(numFilters): filter = np.ones((2,2)) / (2*2) image = signal.convolve2d(x[:, :, k], filter, 'valid') y[:, :, k] = image[::2, ::2] return y
#ReLU.py import numpy as np def ReLU(x): return np.maximum(0, x)
#Sigmoid.py import numpy as np def Sigmoid(x): return 1.0 / (1.0 + np.exp(-x))
#Softmax.py import numpy as np def Softmax(x): x = np.subtract(x, np.max(x)) # prevent overflow ex = np.exp(x) return ex / np.sum(ex)
#TestMnistConv.py import numpy as np from scipy import signal from LoadMnistData import * from Softmax import * from ReLU import * from Conv import * from Pool import * from MnistConv import * # Learn # Images, Labels = LoadMnistData('MNIST\\t10k-images-idx3-ubyte.gz', 'MNIST\\t10k-labels-idx1-ubyte.gz') Images = np.divide(Images, 255) W1 = 1e-2 * np.random.randn(9, 9, 20) W5 = np.random.uniform(-1, 1, (100, 2000)) * np.sqrt(6) / np.sqrt(360 + 2000) Wo = np.random.uniform(-1, 1, ( 10, 100)) * np.sqrt(6) / np.sqrt( 10 + 100) X = Images[0:8000, :, :] D = Labels[0:8000] for _epoch in range(3): print(_epoch) W1, W5, Wo = MnistConv(W1, W5, Wo, X, D) # Test # X = Images[8000:10000, :, :] D = Labels[8000:10000] acc = 0 N = len(D) for k in range(N): x = X[k, :, :] y1 = Conv(x, W1) y2 = ReLU(y1) y3 = Pool(y2) y4 = np.reshape(y3, (-1, 1)) v5 = np.matmul(W5, y4) y5 = ReLU(v5) v = np.matmul(Wo, y5) y = Softmax(v) i = np.argmax(y) if i == D[k][0]: acc = acc + 1 acc = acc / N print("Accuracy is : ", acc)