卷积神经网络python实例

CNN最大的特点在于卷积的权值共享结构,可以大幅减少神经网络的参数量,防止过拟合的同时又降低了神经网络模型的复杂度。在CNN中,第一个卷积层会直接接受图像像素级的输入,每一个卷积操作只处理一小块图像,进行卷积变化后再传到后面的网络,每一层卷积都会提取数据中最有效的特征。这种方法可以提取到图像中最基础的特征,比如不同方向的边或者拐角,而后再进行组合和抽象形成更高阶的特征。

网盘地址:
链接: https://pan.baidu.com/s/1Pn5jnZBO_X-VGeNk0W72Pg
提取码: db54

一般的卷积神经网络由多个卷积层构成,每个卷积层中通常会进行如下几个操作:

图像通过多个不同的卷积核的滤波,并加偏置(bias),特取出局部特征,每个卷积核会映射出一个新的2D图像。

将前面卷积核的滤波输出结果,进行非线性的激活函数处理。目前最常见的是使用ReLU函数,而以前Sigmoid函数用得比较多。

对激活函数的结果再进行池化操作(即降采样,比如将2*2的图片将为1*1的图片),目前一般是使用最大池化,保留最显著的特征,并提升模型的畸变容忍能力。

总结一下,CNN的要点是局部连接(local Connection)、权值共享(Weight Sharing)和池化层(Pooling)中的降采样(Down-Sampling)。

#Conv.py
import numpy as np
from scipy import signal


def Conv(x, W):
    (wrow, wcol, numFilters) = W.shape
    (xrow, xcol)             = x.shape
    
    yrow = xrow - wrow + 1
    ycol = xcol - wcol + 1 
    
    y = np.zeros((yrow, ycol, numFilters))
    
    for k in range(numFilters):
        filter = W[:, :, k]
        filter = np.rot90(np.squeeze(filter), 2)
        y[:, :, k] = signal.convolve2d(x, filter, 'valid')
    
    return y
#LoadMnistData.py
"""
  Originally written by Martin Thoma
    https://martin-thoma.com/classify-mnist-with-pybrain/
"""

from struct import unpack
import gzip
from numpy import uint8, zeros, float32


# Read input images and labels(0-9).
# Return it as list of tuples.
#
def LoadMnistData(imagefile, labelfile):
    # Open the images with gzip in read binary mode
    images = gzip.open(imagefile, 'rb')
    labels = gzip.open(labelfile, 'rb')

    # Read the binary data
    # We have to get big endian unsigned int. So we need '>I'

    # Get metadata for images
    images.read(4)  # skip the magic_number
    number_of_images = images.read(4)
    number_of_images = unpack('>I', number_of_images)[0]
    rows = images.read(4)
    rows = unpack('>I', rows)[0]
    cols = images.read(4)
    cols = unpack('>I', cols)[0]

    # Get metadata for labels
    labels.read(4)  # skip the magic_number
    N = labels.read(4)
    N = unpack('>I', N)[0]

    if number_of_images != N:
        raise Exception('number of labels did not match the number of images')

    # Get the data
    x = zeros((N, rows, cols), dtype=float32)  # Initialize numpy array
    y = zeros((N, 1), dtype=uint8)  # Initialize numpy array
    for i in range(N):
        if i % 1000 == 0:
            print("i: %i" % i)
            
        for row in range(rows):
            for col in range(cols):
                tmp_pixel = images.read(1)  # Just a single byte
                tmp_pixel = unpack('>B', tmp_pixel)[0]
                x[i][row][col] = tmp_pixel

        tmp_label = labels.read(1)
        y[i] = unpack('>B', tmp_label)[0]

    return (x, y)
#MnistConv.py
import numpy as np
from scipy import signal
from Softmax import *
from ReLU import *
from Conv import *
from Pool import *


def MnistConv(W1, W5, Wo, X, D):
    alpha = 0.01
    beta  = 0.95
    
    momentum1 = np.zeros_like(W1)
    momentum5 = np.zeros_like(W5)
    momentumo = np.zeros_like(Wo)
    
    N = len(D)
    
    bsize = 100
    blist = np.arange(0, N, bsize)
    
    for batch in range(len(blist)):
        dW1 = np.zeros_like(W1)
        dW5 = np.zeros_like(W5)
        dWo = np.zeros_like(Wo)
        
        begin = blist[batch]
        
        for k in range(begin, begin+bsize):
            # Forward pass = inference     
            x  = X[k, :, :]
            y1 = Conv(x, W1)
            y2 = ReLU(y1)
            y3 = Pool(y2)
            y4 = np.reshape(y3, (-1, 1))
            v5 = np.matmul(W5, y4)
            y5 = ReLU(v5)
            v  = np.matmul(Wo, y5)
            y  = Softmax(v)

            # one-hot encoding
            d = np.zeros((10, 1))
            d[D[k][0]][0] = 1 
            
            # Backpropagation
            e     = d - y
            delta = e
            
            e5     = np.matmul(Wo.T, delta)    # Hidden(ReLU)
            delta5 = (y5 > 0) * e5
            
            e4 = np.matmul(W5.T, delta5)       # Pooling layer
            
            e3 = np.reshape(e4, y3.shape)
            
            e2 = np.zeros_like(y2)             # pooling
            W3 = np.ones_like(y2) / (2*2)            
            for c in range(20):
                e2[:, :, c] = np.kron(e3[:, :, c], np.ones((2, 2))) * W3[:, :, c]
                
            delta2 = (y2 > 0) * e2
            
            delta1_x = np.zeros_like(W1)            
            for c in range(20):
                delta1_x[:, :, c] = signal.convolve2d(x[:, :], np.rot90(delta2[:, :, c], 2), 'valid')
            
            
            dW1 = dW1 + delta1_x
            dW5 = dW5 + np.matmul(delta5, y4.T)
            dWo = dWo + np.matmul(delta, y5.T)
            
        dW1 = dW1 / bsize
        dW5 = dW5 / bsize
        dWo = dWo / bsize
        
        momentum1 = alpha*dW1 + beta*momentum1
        W1        = W1 + momentum1
        
        momentum5 = alpha*dW5 + beta*momentum5
        W5        = W5 + momentum5
        
        momentumo = alpha*dWo + beta*momentumo 
        Wo        = Wo + momentumo
        
    return W1, W5, Wo
#Pool.py
import numpy as np
from scipy import signal

    
def Pool(x):
    (xrow, xcol, numFilters) = x.shape
    y = np.zeros((int(xrow/2), int(xcol/2), numFilters))
    
    for k in range(numFilters):
        filter = np.ones((2,2)) / (2*2)
        image  = signal.convolve2d(x[:, :, k], filter, 'valid')
        
        y[:, :, k] = image[::2, ::2]

    return y

#ReLU.py
import numpy as np


def ReLU(x):
    return np.maximum(0, x)
#Sigmoid.py
import numpy as np


def Sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))
#Softmax.py
import numpy as np


def Softmax(x):
    x  = np.subtract(x, np.max(x))        # prevent overflow
    ex = np.exp(x)
    
    return ex / np.sum(ex)
#TestMnistConv.py
import numpy as np
from scipy import signal
from LoadMnistData import *
from Softmax import *
from ReLU import *
from Conv import *
from Pool import *
from MnistConv import *


# Learn
#
Images, Labels = LoadMnistData('MNIST\\t10k-images-idx3-ubyte.gz', 'MNIST\\t10k-labels-idx1-ubyte.gz')
Images = np.divide(Images, 255)

W1 = 1e-2 * np.random.randn(9, 9, 20)
W5 = np.random.uniform(-1, 1, (100, 2000)) * np.sqrt(6) / np.sqrt(360 + 2000)
Wo = np.random.uniform(-1, 1, ( 10,  100)) * np.sqrt(6) / np.sqrt( 10 +  100)

X = Images[0:8000, :, :]
D = Labels[0:8000]
    
for _epoch in range(3):
    print(_epoch)
    W1, W5, Wo = MnistConv(W1, W5, Wo, X, D)

    
# Test
#
X = Images[8000:10000, :, :]
D = Labels[8000:10000]

acc = 0
N   = len(D)
for k  in range(N):
    x  = X[k, :, :]

    y1 = Conv(x, W1)
    y2 = ReLU(y1)
    y3 = Pool(y2)
    y4 = np.reshape(y3, (-1, 1))
    v5 = np.matmul(W5, y4)
    y5 = ReLU(v5)
    v  = np.matmul(Wo, y5)
    y  = Softmax(v)
    
    i = np.argmax(y)
    if i == D[k][0]:
        acc = acc + 1
        
acc = acc / N
print("Accuracy is : ", acc)


发表评论

邮箱地址不会被公开。 必填项已用*标注