Okay, but what about this deep-learning stuff?

Imports

From Python

from typing import List

From Pypi

from graphviz import Digraph
import numpy

Typing

This is to develop some type hinting.

Vector = List[float]
Matrix = List[Vector]

What is this about?

I previously looked at a model with multiple inputs and outputs to predict whether a team would win or lose and how the fans would feel in response to the outcome. Now I'm going to stack the network on top of another one to 'deepen' the network.

graph = Digraph(comment="Hidden Layers", format="png")
# input layer
graph.node("A", "Toes")
graph.node("B", "Wins")
graph.node("C", "Fans")

# Hidden Layer
graph.node("D", "H1")
graph.node("E", "H2")
graph.node("F", "H3")

# Output Layer
graph.node("G", "Hurt")
graph.node("H", "Win")
graph.node("I", "Sad")

# Input to hidden edges
graph.edges(["AD", "AE", "AF",
             "BD", "BE", "BF",
             "CD", "CE", "CF",
])

# Hidden to output egdes
graph.edges(["DG", "DH", "DI",
             "EG", "EH", "EI",
             "FG", "FH", "FI",
])
graph.render("graphs/hidden_layer.dot")
graph

hidden_layer.dot.png

These networks between the input and output layers are called hidden layers.

Okay, so how do you implement that?

It works like our previous model except that you insert an extra vector-matrix-multiplication call between the inputs and outputs. For this example I'm going to do it as a class so that I can check the hidden layer's values more easily, but otherwise you would do it using matrices and vectors.

class HiddenLayer:
    """Implements a neural network with one hidden layer

    Args:
     inputs: vector of input values
     input_to_hidden_weights: vector of weights for the first layer
     hidden_to_output_weights: vector of weights of the second layer
    """
    def __init__(self, inputs: Vector, input_to_hidden_weights: Vector,
                 hidden_to_output_weights: Vector) -> None:
        self.inputs = inputs
        self.input_to_hidden_weights = input_to_hidden_weights
        self.hidden_to_output_weights = hidden_to_output_weights
        self._hidden_output = None
        self._predictions = None
        return

    @property
    def hidden_output(self) -> Vector:
        """the output of the hidden layer"""
        if self._hidden_output is None:
            self._hidden_output = self.vector_matrix_multiplication(
                self.inputs,
                self.input_to_hidden_weights)
        return self._hidden_output

    @property
    def predictions(self) -> Vector:
        """Predictions for the inputs"""
        if self._predictions is None:
            self._predictions = self.vector_matrix_multiplication(
                self.hidden_output,
                self.hidden_to_output_weights,
            )
        return self._predictions

    def vector_matrix_multiplication(self, vector: Vector,
                                     matrix: Matrix) -> Vector:
        """calculates the dot-product for each row of the matrix

       Args:
        vector: input with one cell for each row in the matrix
        matrix: input with rows of the same length as the vector

       Returns:
        vector: dot-products for the vector and matrix rows
       """
        vector_length = len(vector)
        assert vector_length == len(matrix)
        rows = range(len(matrix))
        return [self.dot_product(vector, matrix[row]) for row in rows]

    def dot_product(self, vector_1:Vector, vector_2:Vector) -> float:
        """Calculate the dot-product of the two vectors

       Returns:
        dot-product: the dot product of the two vectors
       """
        vector_length = len(vector_1)
        assert vector_length == len(vector_2)
        entries = range(vector_length)
        return sum((vector_1[entry] * vector_2[entry] for entry in entries))

Let's try it out

The Input Layer To Hidden Layer Weights

Toes Wins Fans  
0.1 0.2 -0.1 h0
-0.1 0.1 0.9 h1
0.1 0.4 0.1 h2
input_to_hidden_weights = [[0.1, 0.2, -0.1],
                           [-0.1, 0.1, 0.9],
                           [0.1, 0.4, 0.1]]

The Weights From the Hidden Layer to the Outputs

h0 h1 h2  
0.3 1.1 -0.3 hurt
0.1 0.2 0.0 won
0.0 1.3 0.1 sad
hidden_layer_to_output_weights = [[0.3, 1.1, -0.3],
                                  [0.1, 0.2, 0.0],
                                  [0.0, 1.3, 0.1]]

Testing it Out

toes = [8.5, 9.5, 9.9, 9.0]
wins = [0.65, 0.8, 0.8, 0.9]
fans = [1.2, 1.3, 0.5, 1.0]
expected_hiddens = [0.86, 0.295, 1.23]
expected_outputs = [0.214, 0.145, 0.507]
first_input = [toes[0], wins[0], fans[0]]
network = HiddenLayer(first_input,
                       input_to_hidden_weights,
                       hidden_layer_to_output_weights)
hidden_outputs = network.hidden_output
expected_actual = zip(expected_hiddens, hidden_outputs)
tolerance = 0.1**5
labels = "h0 h1 h2".split()
print("|Node | Expected | Actual")
print("|-+-+-|")
for index, (expected, actual) in enumerate(expected_actual):
    print("|{} | {}| {:.2f}".format(labels[index], expected, actual))
    assert abs(expected - actual) < tolerance
Node Expected Actual
h0 0.86 0.86
h1 0.295 0.29
h2 1.23 1.23
outputs = network.predictions
expected_actual = zip(expected_outputs, outputs)
tolerance = 0.1**3
labels = "Hurt Won Sad".split()
print("|Node | Expected | Actual")
print("|-+-+-|")
for index, (expected, actual) in enumerate(expected_actual):
    print("|{} | {}| {:.3f}".format(labels[index], expected, actual))
    assert abs(expected - actual) < tolerance
Node Expected Actual
Hurt 0.214 0.214
Won 0.145 0.145
Sad 0.507 0.506

Okay, but can we do that with numpy?

def one_hidden_layer(inputs: Vector, weights: Matrix) -> numpy.ndarray:
    """Converts arguments to numpy and calculates predictions

    Args:
     inputs: array of inputs
     weights: matrix with two rows of weights

    Returns:
     predictions: predicted values for each output node
    """
    inputs, weights = numpy.array(inputs), numpy.array(weights)
    hidden = inputs.dot(weights[0].T)
    return hidden.dot(weights[1].T)

One thing to watch out for here is that the dot product won't raise an error if you don't transpose the weights, but you will get the wrong values.

weights = [input_to_hidden_weights,
           hidden_layer_to_output_weights]
outputs = one_hidden_layer(first_input, weights)
expected_actual = zip(expected_outputs, outputs)
tolerance = 0.1**3
labels = "Hurt Won Sad".split()
print("|Node | Expected | Actual")
print("|-+-+-|")
for index, (expected, actual) in enumerate(expected_actual):
    print("|{} | {}| {:.3f}".format(labels[index], expected, actual))
    assert abs(expected - actual) < tolerance
Node Expected Actual
Hurt 0.214 0.214
Won 0.145 0.145
Sad 0.507 0.506

Okay, so what was this about again?

This showed that you can stack networks up and have the outputs of one layer feed into the next until you reach the output layer. This is called Forward Propagation. Although I mentioned deep-learning in the title this really isn't an example yet, it's more like a multilayer perceptron, but it's deeper than two-layers, anyway.