Stay Hungry.Stay Foolish.

一个丑陋的神经网络实现

Michael Nielsen 是一位物理学家、科学作家、计算机编程研究人员。他写了一本书:

Neural Networks and Deep Learning: A free online book explaining the core ideas behind artificial neural networks and deep learning

用通俗易懂的语言从初学者角度图文并茂地讲述了机器学习的一些知识(神经网络、卷积神经网络、反向传播算法、一些优化技巧等),而且还附有代码实现。非常建议英文好的初学者去阅读这六章电子书

本文是一个丑陋的神经网络实现,代码来自: http://neuralnetworksanddeeplearning.com/chap1.html

import random
import numpy as np
import os
import pickle
import sys
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist_path = os.path.join(
    os.path.dirname(__file__), os.pardir, os.pardir, 'MNIST_data')

class Network(object):
    path = os.path.join(os.path.dirname(__file__), 'network.pickle')

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes

        if os.path.exists(self.path):
            with open(self.path, 'rb') as f:
                save = pickle.load(f)
                self.biases = save['biases']
                self.weights = save['weights']
                self.epochs = save['epochs']
                self.accuracies = save['accuracies']
        else:
            self.biases = [np.random.randn(x, 1) for x in sizes[1:]]
            self.weights = [
                np.random.randn(x, y) for x, y in zip(sizes[1:], sizes[:-1])
            ]
            self.accuracies = []
            self.epochs = 0

    def save(self, epochs):
        if os.path.exists(self.path):
            os.remove(self.path)
        with open(self.path, 'wb') as f:
            save = {
                'weights': self.weights,
                'biases': self.biases,
                'epochs': epochs + self.epochs,
                'accuracies': self.accuracies
            }
            pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)

    def feedforward(self, a):
        for w, b in zip(self.weights, self.biases):
            a = sigmod(np.matmul(w, a) + b)
        return a

    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        print('Start from epochs {}'.format(self.epochs))
        if test_data is not None:
            n_test = len(test_data)
        n = len(training_data)
        for i in range(epochs):
            random.shuffle(training_data)
            mini_batches = [
                training_data[j:j + mini_batch_size]
                for j in range(0, n, mini_batch_size)
            ]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            if test_data is not None:
                print("Epochs {}: {}".format(i + 1, self.evaluate(test_data)))
            else:
                print("Epochs {} completed".format(i))
            sys.stdout.flush()
        self.save(epochs)
        print('Training ended. Saved!')

    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(x.shape) for x in self.biases]
        nabla_w = [np.zeros(x.shape) for x in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [
            w - (eta / len(mini_batch)) * nw
            for w, nw in zip(self.weights, nabla_w)
        ]
        self.biases = [
            b - (eta / len(mini_batch)) * nb
            for b, nb in zip(self.biases, nabla_b)
        ]

    def backprop(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        activation = x
        activations = [x]
        zs = []
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmod(z)
            activations.append(activation)
        delta = self.cost_derivative(activations[-1], y) * sigmod_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())
        for num in range(2, self.num_layers):
            z = zs[-num]
            sp = sigmod_prime(z)
            delta = np.dot(self.weights[-num + 1].transpose(), delta) * sp
            nabla_b[-num] = delta
            nabla_w[-num] = np.dot(delta, activations[-num - 1].transpose())
        return nabla_b, nabla_w

    def cost_derivative(self, output_activations, y):
        return output_activations - y

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y))
                        for x, y in test_data]
        accuracy = sum(int(x == y)
                       for x, y in test_results) * 1.0 / len(test_data)
        self.accuracies.append(accuracy)
        return accuracy

    def predication(self, input):
        result = self.feedforward(input)
        return result

def sigmod(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmod_prime(z):
    return sigmod(z) * (1 - sigmod(z))

def data(samples, labels):
    return [(np.reshape(x, (784, 1)), np.reshape(y, (10, 1)))
            for x, y in zip(samples, labels)]

if __name__ == '__main__':
    mnist = input_data.read_data_sets(mnist_path, one_hot=True)
    train_data = data(mnist.train.images, mnist.train.labels)
    test_data = data(mnist.test.images, mnist.test.labels)

    print('Train ', len(train_data))
    print('Test  ', len(test_data))
    net = Network([784, 30, 10])
    net.SGD(
        training_data=train_data,
        epochs=30,
        mini_batch_size=10,
        eta=3.0,
        test_data=test_data)

    plt.figure(1)
    for i in range(9):
        sample = train_data[np.random.randint(0, len(test_data))]
        image = sample[0]
        number = np.argmax(sample[1])
        predication = net.predication(image)
        number_predicated = np.argmax(predication)
        plt.subplot(3, 3, i + 1)
        plt.title('{}, (Real:{})'.format(number_predicated, number))
        plt.imshow(np.reshape(image, [28, 28]))
    plt.tight_layout()
    plt.figure(2)
    plt.plot(net.accuracies)
    plt.title('accuracies')
    plt.show()

评估

为了评估代码,使用了 MINIST 数据集(借用 Tensorflow 数据集),结果如下:

Train  55000
Test   10000
Start from epochs 0
Epochs 1: 0.7315
Epochs 2: 0.7454
Epochs 3: 0.7488
Epochs 4: 0.7524
Epochs 5: 0.7628
Epochs 6: 0.9333
Epochs 7: 0.9396
Epochs 8: 0.9413
Epochs 9: 0.9364
Epochs 10: 0.9393
Epochs 11: 0.9449
Epochs 12: 0.9461
Epochs 13: 0.9451
Epochs 14: 0.9495
Epochs 15: 0.9446
Epochs 16: 0.947
Epochs 17: 0.9452
Epochs 18: 0.9469
Epochs 19: 0.9506
Epochs 20: 0.95
Epochs 21: 0.9504
Epochs 22: 0.9459
Epochs 23: 0.9507
Epochs 24: 0.9505
Epochs 25: 0.9504
Epochs 26: 0.9469
Epochs 27: 0.9504
Epochs 28: 0.9508
Epochs 29: 0.9491
Epochs 30: 0.9492
Training ended. Saved!

90 epochs 之后, 准确率 达到了 0.95。如果采用更深的网络结构并合理的设置超参数,准确率因该会有所提高。

image

image

⬆️

写的不错,帮助到了您,赞助一下主机费~

扫一扫,用支付宝赞赏
扫一扫,用微信赞赏
2017-04-25 11:28

莫名的看不懂这是啥

2017-04-25 11:29

看不懂这是啥

2017-07-23 13:28

试一下评论效果

2017-08-09 21:07

kankan xiaoguo