Chatbot Tutorial

Introduction

This is a walk-through the pytorch Chatbot Tutorial which builds a chatbot using a recurrent Sequence-to-Sequence model trained on the Cornell Movie-Dialogs Corpus.

Set Up

Imports

Python

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
from collections import defaultdict, namedtuple
import codecs
from pathlib import Path
from typing import Dict, List, Union
from zipfile import ZipFile
import csv
import os
import subprocess

PyPi

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
from dotenv import load_dotenv
import requests
import torch

This Project

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
from neurotic.tangles.timer import Timer

Setup the Timer

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
TIMER = Timer()

Load Dotenv

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
load_dotenv("../../.env")

Check CUDA

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using {}".format(device))
Using cuda

Some Type Hints

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
OptionalList = Union[list, None]

Some Constants

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
ENCODING = "iso-8859-1"

The Data

Download

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
class MovieData:
    """Dowload and ready the movie data
    Args:
     download_path: Path to the folder to store the data
     url: download url for the zip file
     chunk_size: bytes to read from stream during download
     clean_up: remove the extra downloaded files
    """
    def __init__(self,
                 download_path: Path,
                 url: str=("http://www.cs.cornell.edu/~cristian/data/"
                           "cornell_movie_dialogs_corpus.zip"),
                 chunk_size=1024,
                 clean_up: bool=True) -> None:
        self.download_path = download_path
        self.url = url
        self.chunk_size = chunk_size
        self.clean_up = clean_up
        self._zip_path = None
        self._data_path = None
        self._zip_file = None
        return

    @property
    def zip_path(self) -> Path:
        """Path to the downloaded zip file"""
        if self._zip_path is None:
            self._zip_path = self.download_path.joinpath(Path(self.url).name)
        return self._zip_path

    @property
    def data_path(self) -> Path:
        """Path to the unzipped file"""
        if self._data_path is None:
            self._data_path = self.download_path.joinpath(
                Path(self.zip_path).stem)
        return self._data_path

    @property
    def zip_file(self) -> ZipFile:
        """the Zip file for the zipped data"""
        if self._zip_file is None:
            self._zip_file = ZipFile(self.zip_path)
        return self._zip_file

    def clean(self) -> None:
        """remove the extra downloaded files"""
        os.remove(self.zip_path)
        return

    def __call__(self) -> None:
        """downloads and prepares the file if needed"""
        if not self.data_path.is_dir():
            if not self.zip_path.is_file():
                response = requests.get(self.url, stream=True)
                with self.zip_path.open("wb") as writer:
                    for chunk in response.iter_content(chunk_size=self.chunk_size):
                        if chunk:
                            writer.write(chunk)
            unpacked = []
            for name in self.zip_file.namelist():
                name = Path(name)
                # there's extra folders and hidden files in there that I'll avoid
                if name.suffix in (".pdf", ".txt") and not name.name.startswith("."):
                    self.zip_file.extract(str(name), path=self.data_path)
                    unpacked.append(name)
            assert self.data_path.is_dir()
            if self.clean_up:
                # there is a sub-folder in the unzipped folder so move the
                # the files up one
                for to_move in unpacked:
                    self.data_path.joinpath(to_move).rename(
                        self.data_path.joinpath(to_move.name))

                # now delete the temporary file
                os.remove(self.zip_path)
                if unpacked:
                    # now remove the sub-folder
                    self.data_path.joinpath(unpacked[0].parent).rmdir()
        return

Now let's download and unpack the data.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
datasets = Path(os.environ.get("DATASETS")).expanduser()
assert datasets.is_dir()
movie_data = MovieData(datasets, clean_up=True)
movie_data()
for name in movie_data.data_path.iterdir():
    print(" - {}".format(name.name))
  • chameleons.pdf
  • conversation_line_pairs.tsv
  • movie_conversations.txt
  • movie_characters_metadata.txt
  • movie_lines.txt
  • movie_titles_metadata.txt
  • raw_script_urls.txt
  • README.txt
/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
class MovieFile:
    urls = "raw_script_urls.txt"
    readme = "README.txt"
    lines = "movie_lines.txt"
    characters = "movie_characters_metadata.txt"
    conversations = "movie_conversations.txt"
    titles = "movie_titles_metadata.txt"

Movie Lines

Here's an excerpt from the README.txt file:

In all files the field separator is " $ "

  • movie_lines.txt
    • contains the actual text of each utterance
    • fields:
      • lineID
      • characterID (who uttered this phrase)
      • movieID
      • character name
      • text of the utterance

Movie Line Data

To load the lines I'm going to make a namedtuple.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
MovieLine = namedtuple("MovieLine", ["line_id",
                                     "character_id",
                                     "movie_id",
                                     "character_name",
                                     "text"])

LineData = Dict[str, MovieLine]
LineFields = MovieLine(**{field: index
                          for index, field in enumerate(MovieLine._fields)})

A Line Loader

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
class MovieLines:
    """loads the movie dialog lines

    Args:
     path: path to the source file
     separator: column-separator
     encoding: the file encoding type (e.g. UTF-8)
    """
    def __init__(self, path: Path, separator: str=" +++$+++ ",
                 encoding="UTF-8") -> None:
        self.path = path
        self.separator = separator
        self.encoding = encoding
        self._lines = None
        return

    @property
    def lines(self) -> LineData:
        """Dictionary Of Lines in the Data"""
        if self._lines is None:
            self._lines = {}
            with self.path.open(encoding=self.encoding) as reader:
                for line in reader:
                    tokens = line.strip().split(self.separator)

                    text = tokens[LineFields.text] if len(tokens) == len(LineFields) else ""
                    movie_line = MovieLine(line_id=tokens[LineFields.line_id],
                                           character_id=tokens[LineFields.character_id],
                                           movie_id=tokens[LineFields.movie_id],
                                           character_name=tokens[LineFields.character_name],
                                           text=text,
                    )
                    self._lines[movie_line.line_id] = movie_line
        return self._lines

    def head(self, lines: int=5, get: bool=False) -> OptionalList:
        """show the first lines

        Args:
         lines: number of lines to read
         get: if true, return the lines
        """
        output = [] if get else None
        with self.path.open() as reader:
            for index, line in enumerate(reader):
                line = line.rstrip()
                print(line)
                if get:
                    output.append(line)
                if index + 1 >= lines:
                    break
        return output
/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
movie_lines = MovieLines(movie_data.data_path.joinpath(MovieFile.lines), encoding=ENCODING)
output_lines = movie_lines.head(10)
L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!
L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!
L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.
L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?
L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.
L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow
L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.
L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No
L870 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I'm kidding.  You know how sometimes you just become this "persona"?  And you don't know how to quit?
L869 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Like my fear of wearing pastels?

As note in the README.txt those strange characters are how the columns are separated (I guess so that the commas could be kept in the text). The Line IDs seem to be in reverse oredr, and don't seem to have all the lines - unless they're out of order and just looking at the head is misleading. For reference the movie for the lines I showed (the dialog between Bianca and Cameron) is from 12 Things I Hate About You. For some reason they both encode the chraracters and give their names - u0 is BIANCA.

If you poke around in the file you'll find that there's something peculiar about the characters in it.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
output = subprocess.run(["file", "-i", str(movie_lines.path)], stdout=subprocess.PIPE)
print(output.stdout)
b'/home/athena/data/datasets/cornell_movie_dialogs_corpus/movie_lines.txt: text/plain; charset=unknown-8bit\n'

It doesn't look like standard ASCII, but I wonder if it matters. In the pytorch tutorial they give the encoding as iso-8859-1, although I can't find any documentation for this, but since they gave it, I guess we can use it.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
ENCODING = "iso-8859-1"

I'm using it in MovieLines too so I defined ENCODING at the top of the notebook, this is just to show where it came from.

Conversations

The movie-lines file has all the movie-conversations together, but we want conversations between characters. For that you need to group the lines using the movie_conversations.txt file.

  • movie_conversations.txt
    • the structure of the conversations
    • fields
      • characterID of the first character involved in the conversation
      • characterID of the second character involved in the conversation
      • movieID of the movie in which the conversation occurred
      • list of the utterances that make the conversation, in chronological order: ['lineID1','lineID2',É,'lineIDN'] has to be matched with movie_lines.txt to reconstruct the actual content

You can see that the README has some kind of funky character in it (the third item in the order list). Weird.

A Conversation Holder

A conversation is a list of lines said by characters to each other. Although the dialog file is presumably in order, we want to be able to partition lines that are part of a single conversation - a verbal interaction between two characters.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
ConversationIDs = namedtuple("ConversationIDs", ["character_id_1",
                                                 "character_id_2",
                                                 "movie_id",
                                                 "lines"])
ConversationFields = ConversationIDs(
    **{field: index
       for index, field in enumerate(ConversationIDs._fields)})
ConversationData = List[ConversationIDs]

A Conversations Builder

This is code to pull the lines out and group them by conversation.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
class Conversations:
    """Holds the conversations

    Args:
     path: path to the conversations file
     moviez: object with the movie lines
     encoding: the encoding for the file
     separator: the column separator
    """
    def __init__(self,
                 path: Path,
                 movies: MovieLines,
                 separator: str=" +++$+++ ",
                 encoding:str="UTF-8") -> None:
        self.path = path
        self.movies = movies
        self.separator = separator
        self.encoding = encoding
        self._conversations = None
        self._sentence_pairs = None
        return

    @property
    def conversations(self) -> ConversationData:
        """The list of conversation line data
        """
        if self._conversations is None:
            self._conversations = []
            with self.path.open(encoding=self.encoding) as reader:
                for line in reader:
                    tokens = line.strip().split(self.separator)
                    line_ids = eval(tokens[ConversationFields.lines])
                    lines = [self.movies.lines[line_id] for line_id in line_ids]
                    self._conversations.append(
                        ConversationIDs(
                            character_id_1=tokens[ConversationFields.character_id_1],
                            character_id_2=tokens[ConversationFields.character_id_2],
                            movie_id=tokens[ConversationFields.movie_id],
                            lines = lines,
                        ))
        return self._conversations

    @property
    def sentence_pairs(self) -> list:
        """paired-sentences from the conversations"""
        if self._sentence_pairs is None:
            self._sentence_pairs = []
            for conversation in self.conversations:
                for index in range(len(conversation.lines) - 1):
                    utterance = conversation.lines[index].text
                    response = conversation.lines[index + 1].text
                    # you might not always have pairs
                    if utterance and response:
                        self._sentence_pairs.append([utterance, response])
        return self._sentence_pairs

    def head(self, count: int=5) -> None:
        """Print the first lines

        Args:
         count: how many lines to print
        """
        with self.path.open(encoding=self.encoding) as reader:
            so_far = 0
            for line in reader:
                print(line.rstrip())
                so_far += 1
                if so_far >= count:
                    break
        return

Now I'll build the conversations from the file.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
conversations_path = movie_data.data_path.joinpath(MovieFile.conversations)
conversations = Conversations(conversations_path, movie_lines, encoding=ENCODING)
conversations.head()
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L194', 'L195', 'L196', 'L197']
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L198', 'L199']
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L200', 'L201', 'L202', 'L203']
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L204', 'L205', 'L206']
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L207', 'L208']

Store the Processed Lines

Since we've transformed the data we should store it to avoid needing to transform it again later.

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
with TIMER:
    processed_path = movie_data.data_path.joinpath("conversation_line_pairs.tsv")
    delimiter = str(codecs.decode("\t", "unicode_escape"))
    NEWLINE = "\n"
    with processed_path.open("w", encoding="utf-8") as outputfile:
        writer = csv.writer(outputfile, delimiter=delimiter)
        for pair in conversations.sentence_pairs:
            writer.writerow(pair)
Started: 2019-02-18 18:44:01.624014
Ended: 2019-02-18 18:44:04.127445
Elapsed: 0:00:02.503431

Check Our Stored File

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
with processed_path.open() as reader:
    count = 0
    for line in reader:
        print(repr(line))
        count += 1
        if count == 5:
            break
"Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.\tWell, I thought we'd start with pronunciation, if that's okay with you.\n"
"Well, I thought we'd start with pronunciation, if that's okay with you.\tNot the hacking and gagging and spitting part.  Please.\n"
"Not the hacking and gagging and spitting part.  Please.\tOkay... then how 'bout we try out some French cuisine.  Saturday?  Night?\n"
"You're asking me out.  That's so cute. What's your name again?\tForget it.\n"
"No, no, it's my fault -- we didn't have a proper introduction ---\tCameron.\n"

A Vocabulary

/home/athena/.virtualenvs/In-Too-Deep/bin/python3: No module named virtualfish
PADDING, START_OF_SENTENCE, END_OF_SENTENCE = 0, 1, 2

class Vocabulary:
    """A class to hold words and sentences

    Args:
     name: name of the vocabulary
     token_delimiter: what to split sentences on
    """
    def __init__(self, name: str, token_delimiter: str=" ") -> None:
        self.name = name
        self.trimmed = False
        self.token_delimiter = token_delimiter
        self.word_to_index = {}
        self._word_to_count = None
        self._index_to_word = None
        return

    @property
    def word_to_count(self) -> defaultdict:
        """map of word to word count"""
        if self._word_to_count is None:
            self._word_to_count = defaultdict(lambda: 1)
        return self._word_to_count

    @property
    def index_to_word(self) -> dict:
        """map of word-index back to the word"""
        if self._index_to_word is None:
            self._index_to_word = dict(
                PADDING="PAD",
                START_OF_SENTENCE="SOS",
                END_OF_SENTENCE="EOS",
            )
        return self._index_to_word

    @property
    def word_count(self) -> int:
        """the number of words in our vocabulary"""
        return len(self.index_to_word)

    def add_sentence(self, sentence: str) -> None:
        """Adds the words in the sentence to our dictionary

        Args:
         sentence: string of words
        """
        for word in sentence.split(self.token_delimiter):
            self.add_word(word)
        return

    def add_word(self, word: str) -> None:
        """add the word to our vocabulary

        Args:
         word: word to add
        """
        if word not in self.word_to_index:
            self.word_to_index[word] = self.word_count
            self.index_to_word[self.word_count] = word
        else:
            self.word_to_count[word] += 1
        return

    def trim(self, minimum: int) -> None:
        """Trim words below the minimum

        .. warning:: This will only work once, even if you change the
          minimum. set self.trimmed to False if you want to do it again

        Args:
         minimum: lowest acceptible count for a word
        """
        if self.trimmed:
            return
        self.trimmed = True
        keepers = []
        for word, count in self.word_to_count.items():
            if count >= minimum:
                keepers.append(word)
        print("Keep: {}/{} = {:.2f}".format(len(keepers),
                                            len(self.word_count),
                                            len(keepers)/len(self.word_count)))
        self.reset()
        for word in keepers:
            self.add_word(word)
        return

    def reset(self) -> None:
        """Resets the dictionaries"""
        self.word_to_index = {}
        self._word_to_count = None
        self._index_to_word = None
        return

Preparing the Data For Model-Training

TV Script Generation

Act I - The Call To Adventure

What is this about, then?

We want to create a model that can generate scripts for you. To do I'll use part of the Seinfeld dataset of scripts hosted on kaggle to create an RNN to create "fake" TV scripts that emulate the Seinfeld ones.

Set Up

Imports

  • Python
    from collections import Counter
    from functools import partial
    from pathlib import Path
    from typing import Collection
    import os
    import pickle
    
  • PyPi
    from dotenv import load_dotenv
    from tabulate import tabulate
    from torch import nn
    from torch.utils.data import TensorDataset, DataLoader
    import hvplot.pandas
    import numpy
    import pandas
    import torch
    
  • This Project
    from bartleby_the_penguin.tangles.embed_bokeh import EmbedBokeh
    
  • Support Code
    from udacity.project_tv_script_generation import helper
    import udacity.project_tv_script_generation.problem_unittests as unittests
    

Load Dotenv

load_dotenv()

The Folder Path

This is the path for saving files for this post.

FOLDER_PATH = Path("../../../files/posts/nano/tv-script-generation/"
                   "tv-script-generation/")
if not FOLDER_PATH.is_dir():
    FOLDER_PATH.mkdir(parents=True)

The Bokeh Embedder

This sets up the bokeh files and HTML.

Embed = partial(EmbedBokeh, folder_path=FOLDER_PATH)

Check CUDA

Make sure that we can use CUDA.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
assert device.type == "cuda", 'No GPU found. Please use a GPU to train your neural network.'
print("Using {}".format(device))

Some Types

WordIndices = Collection[int]

Get the Data

Scripts

class Scripts:
    """Seinfeld Scripts

    Args:
     environment_key: environment variable with the source location
     dialog_only: remove descriptive columns
    """
    def __init__(self, environment_key: str="SCRIPTS", dialog_only: bool=True) -> None:
        self.environment_key = environment_key
        self.dialog_only = dialog_only
        self._script_blob = None
        self._path = None
        self._lines = None
        self._tokens = None
        self._line_tokens = None
        return

    @property
    def path(self) -> Path:
        """The path to the file"""
        if self._path is None:
            load_dotenv(".env")
            self._path = Path(os.environ.get("SCRIPTS")).expanduser()
            assert self._path.is_file()
        return self._path

    @property
    def script_blob(self) -> str:
        """The input file as a string"""
        if self._script_blob is None:
            with open(self.path) as reader:
                self._script_blob = reader.read()
        return self._script_blob

    @property
    def line_tokens(self) -> list:
        """list of tokens for each line"""
        if self._line_tokens is None:
            self._line_tokens = [line.split(" ") for line in self.lines]
        return self._line_tokens

    @property
    def lines(self) -> list:
        """The lines of the script"""
        if self._lines is None:
            lines = self.script_blob.split("\n")
            if self.dialog_only:
                lines = lines[1:]
                lines = [(",").join(line.split(",")[2:-3]) for line in lines]
            self._lines = lines
        return self._lines

    @property
    def tokens(self) -> Counter:
        """The tokens and their counts"""
        if self._tokens is None:
            self._tokens = Counter()
            for token in self.script_blob.split():
                self._tokens[token] += 1
        return self._tokens

Script Inspector

This is just to help with some preliminary exploratory data analysis.

class ScriptInspector:
    """gets some basic counts

    Args:
     scripts: object with the scripts
    """
    def __init__(self, scripts: Scripts=None) -> None:
        self._scripts = scripts
        self._line_count = None
        self._count_per_line = None
        self._mean_words_per_line = None
        self._median_words_per_line = None
        self._max_words_per_line = None
        self._min_words_per_line = None
        self._token_count = None
        return

    @property
    def scripts(self) -> Scripts:
        """The scripts object"""
        if self._scripts is None:
            self._scripts = Scripts()
        return self._scripts

    @property
    def line_count(self) -> int:
        """Number of lines in the source"""
        if self._line_count is None:
            self._line_count = len(self.scripts.lines)
        return self._line_count

    @property
    def count_per_line(self) -> list:
        """tokens per line"""
        if self._count_per_line is None:
            self._count_per_line = [len(tokens)
                                    for tokens in self.scripts.line_tokens]
        return self._count_per_line

    @property
    def mean_words_per_line(self) -> float:
        """Average number of words per line"""
        if self._mean_words_per_line is None:
            self._mean_words_per_line = (sum(self.count_per_line)
                                         /self.line_count)
        return self._mean_words_per_line

    @property
    def median_words_per_line(self) -> float:
        """Median words per line in the scripts"""
        if self._median_words_per_line is None:
            self._median_words_per_line = numpy.median(self.count_per_line)
        return self._median_words_per_line

    @property
    def max_words_per_line(self) -> int:
        """Count of words in longest line"""
        if self._max_words_per_line is None:
            self._max_words_per_line = max(self.count_per_line)
        return self._max_words_per_line

    @property
    def min_words_per_line(self) -> int:
        """Count of words in shortest line"""
        if self._min_words_per_line is None:
            self._min_words_per_line = min(self.count_per_line)
        return self._min_words_per_line

    @property
    def token_count(self) -> int:
        """Number of tokens in the text"""
        if self._token_count is None:
            self._token_count = sum(self.scripts.tokens.values())
        return self._token_count

    def most_common_tokens(self, count: int=10) -> list:
        """token, count tuples in descending rank

        Args:
         count: number of tuples to return in the list
        """
        if count > 0:
            return self.scripts.tokens.most_common(count)
        return self.scripts.tokens.most_common()[count:]

    def line_range(self, start: int=0, stop: int=10) -> list:
        """lines within range

        Args:
         start: index of first line
         stop: upper bound for last line
        """
        return self.scripts.lines[start:stop]

The scripts aren't really in a format that is optimized for pandas, at least not for this initial look, so we'll just load it as text.

inspector = ScriptInspector()

Explore the Data

view_line_range = (0, 10)
words_per_line = pandas.DataFrame(inspector.count_per_line,
                                  columns=["line_counts"])
print(words_per_line.shape)
(54617, 1)

Dataset Statistics

lines = (("Number of unique tokens", "{:,}".format(inspector.token_count)),
         ("Number of lines", "{:,}".format(inspector.line_count)),
         ("Words in longest line", "{:,}".format(inspector.max_words_per_line)),
         ("Average number of words in each line", "{:.2f}".format(
             inspector.mean_words_per_line)),
         ("Median Words Per Line", "{:.2f}".format(
             inspector.median_words_per_line)),
         ("Words in shortest line", "{}".format(inspector.min_words_per_line))
)
print(tabulate(lines, headers="Statistic Value".split(), tablefmt="orgtbl"))
Statistic Value
Number of unique tokens 550,996
Number of lines 54,617
Words in longest line 363
Average number of words in each line 10.01
Median Words Per Line 7.00
Words in shortest line 1

Why would a line have 363 words?

index = words_per_line.line_counts.idxmax()
print(inspector.count_per_line[index])
print(inspector.scripts.lines[index])
363
"The dating world is not a fun world...its a pressure world, its a world of tension, its a world of pain...and you know, if a woman comes over to my house, I gotta get that bathroom ready, cause she needs things. Women need equipment. I dont know what they need. I know I dont have it, I know that- You know what they need, women seem to need a lot of cotton-balls. This is the one Im- always has been one of the amazing things to me...I have no cotton-balls, were all human beings, what is the story? Ive never had one...I never bought one, I never needed one, Ive never been in a situation, when I thought to myself I could use a cotton-ball right now. I can certainly get out of this mess. Women need them and they dont need one or two, they need thousands of them, they need bags, theyre like peat moss bags, have you ever seen these giant bags? Theyre huge and two days later, theyre out, theyre gone, the, the bag is empty, where are the cotton-balls, ladies? What are you doin with them? The only time I ever see em is in the bottom of your little waste basket, theres two or three, that look like theyve been through some horrible experience... tortured, interrogated, I dont know what happened to them. I once went out with a girl whos left a little zip-lock-baggy of cotton-balls over at my house. I dont know what to do with them, I took them out, I put them on my kitchen floor like little tumbleweeds. I thought maybe the cockroaches would see it, figure this is a dead town. Lets move on. The dating world is a world of pressure. Lets face it a date is a job interview that lasts all night. The only difference between a date and a job interview is not many job interviews is there a chance youll end up naked at the end of it. You know? Well, Bill, the boss thinks youre the man for the position, why dont you strip down and meet some of the people youll be workin with?"

This is one of Seinfeld's stand up routines, so I don't think it's, strictly speaking, a line, or at least not a line of dialog.

What about one word?

print(inspector.scripts.lines[words_per_line.line_counts.idxmin()])
Ha.

There's probably a lot of one word lines ("Yes", "No", etc.).

Plot the Words Per Line

plot = words_per_line.line_counts.hvplot.kde(title="Word Counts Per Line Distribution")
plotter = plot.opts(width=600, height=600, tools=["hover"])
Embed(plotter, "line_counts.js")()
plot = words_per_line.line_counts.hvplot.box(title="Words Per Line")
plot = plot.opts(tools=["hover"])
Embed(plot, "line_counts_boxplot.js")()

Most Used Words

>>>>>>> d51aea0b1ff0725156523a28363e1f7bc18d91e0

lines = ((token, "{:,}".format(count))
         for token, count in inspector.most_common_tokens())
print(tabulate(lines,
               tablefmt="orgtbl", headers=["Token", "Count"]))
Token Count
the 16,373
I 13,911
you 12,831
a 12,096
to 11,594
of 5,490
and 5,210
in 4,741
is 4,283
that 4,047

So it looks like the stop words are the most common, as you might expect.

words, counts = zip(*inspector.most_common_tokens(20))
top_twenty = pandas.DataFrame([counts], columns=words).T.reset_index()
top_twenty.columns = ["Word", "Count"]
layout = top_twenty.hvplot.bar(x="Word", y="Count",
                               title="Twenty Most Used Words",
                               colormap="Category20")
layout.opts(height=500, width=600)
Embed(layout, "top_twenty.js")()

The First five Lines

for line in inspector.line_range(stop=5):
    print(line)
"Do you know what this is all about? Do you know, why were here? To be out, this is out...and out is one of the single most enjoyable experiences of life. People...did you ever hear people talking about We should go out? This is what theyre talking about...this whole thing, were all out now, no one is home. Not one person here is home, were all out! There are people tryin to find us, they dont know where we are. (on an imaginary phone) Did you ring?, I cant find him. Where did he go? He didnt tell me where he was going. He must have gone out. You wanna go out you get ready, you pick out the clothes, right? You take the shower, you get all ready, get the cash, get your friends, the car, the spot, the reservation...Then youre standing around, whatta you do? You go We gotta be getting back. Once youre out, you wanna get back! You wanna go to sleep, you wanna get up, you wanna go out again tomorrow, right? Where ever you are in life, its my feeling, youve gotta go."
"(pointing at Georges shirt) See, to me, that button is in the worst possible spot. The second button literally makes or breaks the shirt, look at it. Its too high! Its in no-mans-land. You look like you live with your mother."
Are you through?
"You do of course try on, when you buy?"
"Yes, it was purple, I liked it, I dont actually recall considering the buttons."

I took out the header and the identifying columns so this is just the dialog part of the data. It looks like they left in all the punctuation except for apostrophes for some reason.

Pre-Processing the Text

The first thing to do to any dataset is pre-processing. Implement the following pre-processing functions below:

  • Lookup Table
  • Tokenize Punctuation

Lookup Table

To create a word embedding, you first need to transform the words to ids. In this function, create two dictionaries:

  • Dictionary to go from the words to an ID, we'll call it vocab_to_int
  • Dictionary to go from the ID to word, we'll call it int_to_vocab

Return these dictionaries in the following tuple (vocab_to_int, int_to_vocab)

def create_lookup_tables(text: list) -> tuple:
    """
    Create lookup tables for vocabulary

    Args:
     text The text of tv scripts split into words

    Returns: 
     A tuple of dicts (vocab_to_int, int_to_vocab)
    """
    text = set(text)
    vocabulary_to_index = {token: index for index, token in enumerate(text)}
    index_to_vocabulary = {index: token for index, token in enumerate(text)}
    return vocabulary_to_index, index_to_vocabulary
test_text = '''
Moe_Szyslak Moe's Tavern Where the elite meet to drink
Bart_Simpson Eh yeah hello is Mike there Last name Rotch
Moe_Szyslak Hold on I'll check Mike Rotch Mike Rotch Hey has anybody seen Mike Rotch lately
Moe_Szyslak Listen you little puke One of these days I'm gonna catch you and I'm gonna carve my name on your back with an ice pick
Moe_Szyslak Whats the matter Homer You're not your normal effervescent self
Homer_Simpson I got my problems Moe Give me another one
Moe_Szyslak Homer hey you should not drink to forget your problems
Barney_Gumble Yeah you should only drink to enhance your social skills'''
unittests.test_create_lookup_tables(create_lookup_tables)
Tests Passed

Tokenize Punctuation

We'll be splitting the script into a word array using spaces as delimiters. However, punctuations like periods and exclamation marks can create multiple ids for the same word. For example, "bye" and "bye!" would generate two different word ids.

Implement the function token_lookup to return a dict that will be used to tokenize symbols like "!" into "||Exclamation_Mark||". Create a dictionary for the following symbols where the symbol is the key and value is the token:

  • Period ( . )
  • Comma ( , )
  • Quotation Mark ( " )
  • Semicolon ( ; )
  • Exclamation mark ( ! )
  • Question mark ( ? )
  • Left Parentheses ( ( )
  • Right Parentheses ( ) )
  • Dash ( - )
  • Return ( \n )

This dictionary will be used to tokenize the symbols and add the delimiter (space) around it. This separates each symbols as its own word, making it easier for the neural network to predict the next word. Make sure you don't use a value that could be confused as a word; for example, instead of using the value "dash", try using something like "||dash||".

def token_lookup():
    """
    Generate a dict to turn punctuation into a token.

    Returns:
     Tokenized dictionary where the key is the punctuation and the value is the token
    """
    tokens = {'.': "period",
              ',': 'comma',
              '"': 'quotation',
              ';': 'semicolon',
              '!': 'exclamation',
              '?': 'question',
              '(': 'leftparenthesis',
              ')': 'rightparenthesis',
              '-': 'dash',
              '\n': 'newline'}
    return {token: '**{}**'.format(coded) for token,coded in tokens.items()}
unittests.test_tokenize(token_lookup)

Pre-process all the data and save it

Running the code cell below will pre-process all the data and save it to file. You're encouraged to look at the code for preprocess_and_save_data in the helpers.py file to see what it's doing in detail, but you do not need to change this code.

text = helper.load_data(inspector.scripts.path)
text = text[81:]
token_dict = token_lookup()
for key, token in token_dict.items():
    text = text.replace(key, ' {} '.format(token))
text = text.lower()
text = text.split()
vocab_to_int, int_to_vocab = create_lookup_tables(text + list(helper.SPECIAL_WORDS.values()))
int_text = [vocab_to_int[word] for word in text]
pre_processed = inspector.scripts.path.parent.joinpath('preprocess.pkl')
with pre_processed.open("wb") as writer:
    pickle.dump((int_text, vocab_to_int, int_to_vocab, token_dict), writer)

Check Point

This is your first checkpoint. If you ever decide to come back to this notebook or have to restart the notebook, you can start from here. The preprocessed data has been saved to disk.

pre_processed = inspector.scripts.path.parent.joinpath('preprocess.pkl')
with pre_processed.open("rb") as reader:
    int_text, vocab_to_int, int_to_vocab, token_dict = pickle.load(reader)

Act II - The Departure

Build the Neural Network

In this section, you'll build the components necessary to build an RNN by implementing the RNN Module and forward and backpropagation functions.

Input

Let's start with the preprocessed input data. We'll use TensorDataset to provide a known format to our dataset; in combination with DataLoader, it will handle batching, shuffling, and other dataset iteration functions.

You can create data with TensorDataset by passing in feature and target tensors. Then create a DataLoader as usual.

data = TensorDataset(feature_tensors, target_tensors)
data_loader = torch.utils.data.DataLoader(data, 
                                          batch_size=batch_size)

Batching

Implement the batch_data function to batch words data into chunks of size batch_size using the TensorDataset and DataLoader classes.

You can batch words using the DataLoader, but it will be up to you to create feature_tensors and target_tensors of the correct size and content for a given sequence_length.

For example, say we have these as input:

words = [1, 2, 3, 4, 5, 6, 7]
sequence_length = 4

Your first feature_tensor should contain the values:

[1, 2, 3, 4]

And the corresponding target_tensor should just be the next "word"/tokenized word value:

5

This should continue with the second feature_tensor, target_tensor being:

[2, 3, 4, 5]  # features
6             # target
def train_test_split(words: WordIndices, sequence_length: int) -> tuple:
    """Breaks the words into a training and a test set

    Args:
     words: the IDs of the TV scripts
     sequence_length: the sequence length of each training instance

    Returns:
     tuple of training tensors, target tensors
    """
    training, testing = [], []
    for start in range(len(words) - sequence_length):
        training.append(words[start:start+sequence_length])
        testing.append(words[start + sequence_length])
    return torch.Tensor(training), torch.Tensor(testing)
words = list(range(1, 8))
sequence_length = 4
training, testing = train_test_split(words, sequence_length)
assert training[0] == torch.Tensor([1, 2, 3, 4])
assert testing[0] == torch.Tensor(5)
assert training[1] == torch.Tensor([2, 3, 4, 5])
assert testing[1] == torch.Tensor(6)
assert training[2] == torch.Tensor([3, 4, 5, 6])
assert testing[2] == torch.Tensor(7)
assert len(training) == torch.Tensor(3)
assert len(testing) == torch.Tensor(3)
def batch_data(words: WordIndices, sequence_length: int, batch_size: int) -> DataLoader:
    """
    Batch the neural network data using DataLoader

    Args:
     - words: The word ids of the TV scripts
     - sequence_length: The sequence length of each batch
     - batch_size: The size of each batch; the number of sequences in a batch
    Returns: 
     DataLoader with batched data
    """
    training, target = train_test_split(words, sequence_length)
    data = TensorDataset(training, target)
    return DataLoader(data)

There is no test for this function, but you are encouraged to create tests of your own.

Test your dataloader

You'll have to modify this code to test a batching function, but it should look fairly similar.

Below, we're generating some test text data and defining a dataloader using the function you defined, above. Then, we are getting some sample batch of inputs `sample_x` and targets `sample_y` from our dataloader.

Your code should return something like the following (likely in a different order, if you shuffled your data):

torch.Size([10, 5])
tensor([[ 28,  29,  30,  31,  32],
        [ 21,  22,  23,  24,  25],
        [ 17,  18,  19,  20,  21],
        [ 34,  35,  36,  37,  38],
        [ 11,  12,  13,  14,  15],
        [ 23,  24,  25,  26,  27],
        [  6,   7,   8,   9,  10],
        [ 38,  39,  40,  41,  42],
        [ 25,  26,  27,  28,  29],
        [  7,   8,   9,  10,  11]])

torch.Size([10])
tensor([ 33,  26,  22,  39,  16,  28,  11,  43,  30,  12])

Sizes

Your sample_x should be of size `(batch_size, sequence_length)` or (10, 5) in this case and sample_y should just have one dimension: batch_size (10).

Values

You should also notice that the targets, sample_y, are the next value in the ordered test_text data. So, for an input sequence `[ 28, 29, 30, 31, 32]` that ends with the value `32`, the corresponding output should be `33`.

test_text = range(50)
t_loader = batch_data(test_text, sequence_length=5, batch_size=10)

data_iter = iter(t_loader)
sample_x, sample_y = data_iter.next()

print(sample_x.shape)
print(sample_x)
print()
print(sample_y.shape)
print(sample_y)

Build the Neural Network

Implement an RNN using PyTorch's [Module class](http://pytorch.org/docs/master/nn.html#torch.nn.Module). You may choose to use a GRU or an LSTM. To complete the RNN, you'll have to implement the following functions for the class:

  • `__init__` - The initialize function.
  • `init_hidden` - The initialization function for an LSTM/GRU hidden state
  • `forward` - Forward propagation function.

The initialize function should create the layers of the neural network and save them to the class. The forward propagation function will use these layers to run forward propagation and generate an output and a hidden state.

*The output of this model should be the *last batch of word scores** after a complete sequence has been processed. That is, for each input sequence of words, we only want to output the word scores for a single, most likely, next word.

Hints

  1. Make sure to stack the outputs of the lstm to pass to your fully-connected layer, you can do this with `lstm_output = lstm_output.contiguous().view(-1, self.hidden_dim)`
  2. You can get the last batch of word scores by shaping the output of the final, fully-connected layer like so:
# reshape into (batch_size, seq_length, output_size)
output = output.view(batch_size, -1, self.output_size)
# get last batch
out = output[:, -1]
import torch.nn as nn

class RNN(nn.Module):

    def __init__(self, vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5):
        """
        Initialize the PyTorch RNN Module
        :param vocab_size: The number of input dimensions of the neural network (the size of the vocabulary)
        :param output_size: The number of output dimensions of the neural network
        :param embedding_dim: The size of embeddings, should you choose to use them        
        :param hidden_dim: The size of the hidden layer outputs
        :param dropout: dropout to add in between LSTM/GRU layers
        """
        super(RNN, self).__init__()
        # TODO: Implement function

        # set class variables

        # define model layers


    def forward(self, nn_input, hidden):
        """
        Forward propagation of the neural network
        :param nn_input: The input to the neural network
        :param hidden: The hidden state        
        :return: Two Tensors, the output of the neural network and the latest hidden state
        """
        # TODO: Implement function   

        # return one batch of output word scores and the hidden state
        return None, None


    def init_hidden(self, batch_size):
        '''
        Initialize the hidden state of an LSTM/GRU
        :param batch_size: The batch_size of the hidden state
        :return: hidden state of dims (n_layers, batch_size, hidden_dim)
        '''
        # Implement function

        # initialize hidden state with zero weights, and move to GPU if available

        return None

tests.test_rnn(RNN, train_on_gpu)

Define forward and backpropagation

Use the RNN class you implemented to apply forward and back propagation. This function will be called, iteratively, in the training loop as follows:

loss = forward_back_prop(decoder, decoder_optimizer, criterion, inp, target)

And it should return the average loss over a batch and the hidden state returned by a call to `RNN(inp, hidden)`. Recall that you can get this loss by computing it, as usual, and calling `loss.item()`.

If a GPU is available, you should move your data to that GPU device, here.

def forward_back_prop(rnn, optimizer, criterion, inp, target, hidden):
    """
    Forward and backward propagation on the neural network
    :param decoder: The PyTorch Module that holds the neural network
    :param decoder_optimizer: The PyTorch optimizer for the neural network
    :param criterion: The PyTorch loss function
    :param inp: A batch of input to the neural network
    :param target: The target output for the batch of input
    :return: The loss and the latest hidden state Tensor
    """

    # TODO: Implement Function

    # move data to GPU, if available

    # perform backpropagation and optimization

    # return the loss over a batch and the hidden state produced by our model
    return None, None

# Note that these tests aren't completely extensive.
# they are here to act as general checks on the expected outputs of your functions
"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
tests.test_forward_back_prop(RNN, forward_back_prop, train_on_gpu)

Neural Network Training

With the structure of the network complete and data ready to be fed in the neural network, it's time to train it.

  • Train Loop

    The training loop is implemented for you in the `train_decoder` function. This function will train the network over all the batches for the number of epochs given. The model progress will be shown every number of batches. This number is set with the `show_every_n_batches` parameter. You'll set this parameter along with other parameters in the next section.

    def train_rnn(rnn, batch_size, optimizer, criterion, n_epochs, show_every_n_batches=100):
        batch_losses = []
    
        rnn.train()
    
        print("Training for %d epoch(s)..." % n_epochs)
        for epoch_i in range(1, n_epochs + 1):
    
            # initialize hidden state
            hidden = rnn.init_hidden(batch_size)
    
            for batch_i, (inputs, labels) in enumerate(train_loader, 1):
    
                # make sure you iterate over completely full batches, only
                n_batches = len(train_loader.dataset)//batch_size
                if(batch_i > n_batches):
                    break
    
                # forward, back prop
                loss, hidden = forward_back_prop(rnn, optimizer, criterion, inputs, labels, hidden)          
                # record loss
                batch_losses.append(loss)
    
                # printing loss stats
                if batch_i % show_every_n_batches == 0:
                    print('Epoch: {:>4}/{:<4}  Loss: {}\n'.format(
                        epoch_i, n_epochs, np.average(batch_losses)))
                    batch_losses = []
    
        # returns a trained rnn
        return rnn
    

Hyperparameters

Set and train the neural network with the following parameters:

  • Set `sequence_length` to the length of a sequence.
  • Set `batch_size` to the batch size.
  • Set `num_epochs` to the number of epochs to train for.
  • Set `learning_rate` to the learning rate for an Adam optimizer.
  • Set `vocab_size` to the number of unique tokens in our vocabulary.
  • Set `output_size` to the desired size of the output.
  • Set `embedding_dim` to the embedding dimension; smaller than the vocab_size.
  • Set `hidden_dim` to the hidden dimension of your RNN.
  • Set `n_layers` to the number of layers/cells in your RNN.
  • Set `show_every_n_batches` to the number of batches at which the neural network should print progress.

If the network isn't getting the desired results, tweak these parameters and/or the layers in the `RNN` class.

# Data params
# Sequence Length
sequence_length =   # of words in a sequence
# Batch Size
batch_size = 

# data loader - do not change
train_loader = batch_data(int_text, sequence_length, batch_size)

Training parameters

# Number of Epochs
num_epochs = 
# Learning Rate
learning_rate = 

# Model parameters
# Vocab size
vocab_size = 
# Output size
output_size = 
# Embedding Dimension
embedding_dim = 
# Hidden Dimension
hidden_dim = 
# Number of RNN Layers
n_layers = 

# Show stats for every n number of batches
show_every_n_batches = 500

Train

In the next cell, you'll train the neural network on the pre-processed data. If you have a hard time getting a good loss, you may consider changing your hyperparameters. In general, you may get better results with larger hidden and n_layer dimensions, but larger models take a longer time to train. > You should aim for a loss less than 3.5.

You should also experiment with different sequence lengths, which determine the size of the long range dependencies that a model can learn.

# create model and move to gpu if available
rnn = RNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers, dropout=0.5)
if train_on_gpu:
    rnn.cuda()

# defining loss and optimization functions for training
optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# training the model
trained_rnn = train_rnn(rnn, batch_size, optimizer, criterion, num_epochs, show_every_n_batches)

# saving the trained model
helper.save_model('./save/trained_rnn', trained_rnn)
print('Model Trained and Saved')

Question: How did you decide on your model hyperparameters?

For example, did you try different sequence_lengths and find that one size made the model converge faster? What about your hidden_dim and n_layers; how did you decide on those?

Answer: (Write answer, here)

Checkpoint

After running the above training cell, your model will be saved by name, `trained_rnn`, and if you save your notebook progress, you can pause here and come back to this code at another time. You can resume your progress by running the next cell, which will load in our word:id dictionaries and load in your saved model by name!

import torch
import helper
import problem_unittests as tests

_, vocab_to_int, int_to_vocab, token_dict = helper.load_preprocess()
trained_rnn = helper.load_model('./save/trained_rnn')

Act III - The Final Battle

Generate TV Script

With the network trained and saved, you'll use it to generate a new, "fake" Seinfeld TV script in this section.

Generate Text

To generate the text, the network needs to start with a single word and repeat its predictions until it reaches a set length. You'll be using the `generate` function to do this. It takes a word id to start with, `prime_id`, and generates a set length of text, `predict_len`. Also note that it uses topk sampling to introduce some randomness in choosing the most likely next word, given an output set of word scores!

import torch.nn.functional as F

def generate(rnn, prime_id, int_to_vocab, token_dict, pad_value, predict_len=100):
    """
    Generate text using the neural network
    :param decoder: The PyTorch Module that holds the trained neural network
    :param prime_id: The word id to start the first prediction
    :param int_to_vocab: Dict of word id keys to word values
    :param token_dict: Dict of puncuation tokens keys to puncuation values
    :param pad_value: The value used to pad a sequence
    :param predict_len: The length of text to generate
    :return: The generated text
    """
    rnn.eval()

    # create a sequence (batch_size=1) with the prime_id
    current_seq = np.full((1, sequence_length), pad_value)
    current_seq[-1][-1] = prime_id
    predicted = [int_to_vocab[prime_id]]

    for _ in range(predict_len):
        if train_on_gpu:
            current_seq = torch.LongTensor(current_seq).cuda()
        else:
            current_seq = torch.LongTensor(current_seq)

        # initialize the hidden state
        hidden = rnn.init_hidden(current_seq.size(0))

        # get the output of the rnn
        output, _ = rnn(current_seq, hidden)

        # get the next word probabilities
        p = F.softmax(output, dim=1).data
        if(train_on_gpu):
            p = p.cpu() # move to cpu

        # use top_k sampling to get the index of the next word
        top_k = 5
        p, top_i = p.topk(top_k)
        top_i = top_i.numpy().squeeze()

        # select the likely next word index with some element of randomness
        p = p.numpy().squeeze()
        word_i = np.random.choice(top_i, p=p/p.sum())

        # retrieve that word from the dictionary
        word = int_to_vocab[word_i]
        predicted.append(word)     

        # the generated word becomes the next "current sequence" and the cycle can continue
        current_seq = np.roll(current_seq, -1, 1)
        current_seq[-1][-1] = word_i

    gen_sentences = ' '.join(predicted)

    # Replace punctuation tokens
    for key, token in token_dict.items():
        ending = ' ' if key in ['\n', '(', '"'] else ''
        gen_sentences = gen_sentences.replace(' ' + token.lower(), key)
    gen_sentences = gen_sentences.replace('\n ', '\n')
    gen_sentences = gen_sentences.replace('( ', '(')

    # return all the sentences
    return gen_sentences

Generate a New Script

It's time to generate the text. Set `gen_length` to the length of TV script you want to generate and set `prime_word` to one of the following to start the prediction:

  • "jerry"
  • "elaine"
  • "george"
  • "kramer"

You can set the prime word to any word in our dictionary, but it's best to start with a name for generating a TV script. (You can also start with any other names you find in the original text file!)

# run the cell multiple times to get different results!
gen_length = 400 # modify the length to your preference
prime_word = 'jerry' # name for starting the script

"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
pad_word = helper.SPECIAL_WORDS['PADDING']
generated_script = generate(trained_rnn, vocab_to_int[prime_word + ':'], int_to_vocab, token_dict, vocab_to_int[pad_word], gen_length)
print(generated_script)

Save your favorite scripts

Once you have a script that you like (or find interesting), save it to a text file!

# save script to a text file
f =  open("generated_script_1.txt","w")
f.write(generated_script)
f.close()

The TV Script is Not Perfect

It's ok if the TV script doesn't make perfect sense. It should look like alternating lines of dialogue, here is one such example of a few generated lines.

Example generated script

>jerry: what about me? > >jerry: i don't have to wait. > >kramer:(to the sales table) > >elaine:(to jerry) hey, look at this, i'm a good doctor. > >newman:(to elaine) you think i have no idea of this… > >elaine: oh, you better take the phone, and he was a little nervous. > >kramer:(to the phone) hey, hey, jerry, i don't want to be a little bit.(to kramer and jerry) you can't. > >jerry: oh, yeah. i don't even know, i know. > >jerry:(to the phone) oh, i know. > >kramer:(laughing) you know…(to jerry) you don't know.

You can see that there are multiple characters that say (somewhat) complete sentences, but it doesn't have to be perfect! It takes quite a while to get good results, and often, you'll have to use a smaller vocabulary (and discard uncommon words), or get more data. The Seinfeld dataset is about 3.4 MB, which is big enough for our purposes; for script generation you'll want more than 1 MB of text, generally.

Submitting This Project

When submitting this project, make sure to run all the cells before saving the notebook. Save the notebook file as "dlnd_tv_script_generation.ipynb" and save another copy as an HTML file by clicking "File" -> "Download as.."->"html". Include the "helper.py" and "problem_unittests.py" files in your submission. Once you download these files, compress them into one zip file for submission.

Dermatologist Mini-Project

Introduction

This is an exercise in using transfer learning to diagnose melanoma based on images of skin legions. There are three diseases to be detected:

  • Melanoma
  • Nevus
  • Sebhorrheic Keratosis

There is a paper online here (PDF link) that describes the approaches that did best in the competition.

Data Sources

The data is taken from the ISIC 2017: Skin Lesion Analysis Towards Melanoma Detection challenge.

Each folder contains three sub-folders:

  • melanoma/
  • nevus/
  • seborrheic_keratosis/

Set Up

Imports

Python

from pathlib import Path
import warnings

PyPi

from dotenv import load_dotenv
from PIL import Image, ImageFile
from torchvision import datasets
import matplotlib
warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
import matplotlib.pyplot as pyplot
import matplotlib.image as mpimage
import matplotlib.patches as patches
import numpy
import pyttsx3
import seaborn
import torch
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optimizer
import torchvision.transforms as transforms

This Project

from neurotic.tangles.data_paths import (Batches, DataPathTwo, DataSets,
                                         TrainingTestingValidationPaths,
                                         Transformer)
from neurotic.tangles.models import Inception
from neurotic.tangles.timer import Timer
from neurotic.tangles.trainer import Trainer
from neurotic.tangles.logging import Tee

Plotting

get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'")
seaborn.set(style="whitegrid",
            rc={"axes.grid": False,
                "font.family": ["sans-serif"],
                "font.sans-serif": ["Open Sans", "Latin Modern Sans", "Lato"],
                "figure.figsize": (8, 6)},
            font_scale=1)

Set the Random Seed

numpy.random.seed(seed=2019)

Handle Truncated Images

There seems to be at least one image that is truncated which will cause an exception when it's loaded so this next setting lets us ignore the error and keep working.

ImageFile.LOAD_TRUNCATED_IMAGES = True

Constants

These are some global constants

Load Dotenv

load_dotenv()

Model Path

This is where to save the best model.

MODEL_PATH = DataPathTwo(folder_key="MODELS")

The Model

The Training

load_dotenv()
EPOCHS = 100
transfer_path = MODEL_PATH.folder.joinpath("model_transfer.pt")
directory = "~/logs/dermatalogist"
training_log = Tee(log_name="inception_train.log", directory_name=directory)
testing_log = Tee(log_name="inception_test.log", directory_name=directory)
data_sets = DataSets()
inception = Inception(data_sets.class_count)
batches = Batches(data_sets)
trainer = Trainer(training_batches=batches.training,
                  validation_batches=batches.validation,
                  testing_batches=batches.testing,
                  model=inception.model,
                  model_path=transfer_path,
                  optimizer=inception.optimizer,
                  criterion=inception.criterion ,
                  device=inception.device,
                  epochs=EPOCHS,
                  epoch_start=1,
                  is_inception=True,
                  load_model=False,
                  training_log=training_log,
                  testing_log=testing_log,
                  beep=True,
)
trainer()
Starting Training
Started: 2019-01-26 13:59:40.249210
Started: 2019-01-26 13:59:40.249398
Ended: 2019-01-26 14:16:25.675136
Elapsed: 0:16:45.425738
Epoch: 1        Training - Loss: 0.85   Accuracy: 0.67  Validation - Loss: 0.97 Accuracy: 0.53
Validation loss decreased (inf --> 0.973706). Saving model ...
Started: 2019-01-26 14:16:26.913182
Ended: 2019-01-26 14:33:23.108155
Elapsed: 0:16:56.194973
Epoch: 2        Training - Loss: 0.78   Accuracy: 0.68  Validation - Loss: 0.93 Accuracy: 0.56
Validation loss decreased (0.973706 --> 0.934509). Saving model ...
Ended: 2019-01-26 14:33:23.997547
Elapsed: 0:16:57.084365

Starting Testing
Started: 2019-01-26 14:33:24.706175
Test Loss: 0.697
Test Accuracy: 70.95 (1419.0/2000)
Ended: 2019-01-26 14:47:30.356073
Elapsed: 0:14:05.649898

The Testing

The remote session died so I'll just load the test output.

testing_log = Tee(log_name="inception_test.log", directory_name="~/logs/dermatologist")
with testing_log.path.open() as reader:
    for line in reader:
        print(line.rstrip())

Starting Testing
Test Loss: 0.620
Test Accuracy: 74.80 (1496.0/2000)

Prepping The Test File

To check the model you need to create a CSV file with three columns.

Column Description Example
Id Path to the file data/test/melanoma/ISIC_0012258.jpg
task_1 Is melanoma 0
task_2 Is seborrheic keratosis 1
class Predictions:
    """Maps the test data to a predictions file

    Args:
     model_path: path to the stored model parameters
     device: processor to use
     output_path: path to the CSV to output
     test_path: path to the test folder
     inception: object with the model
    """
    def __init__(self, model_path: Path,
                 device: torch.device,
                 output_path: Path,
                 test_path: Path,
                 data_sets: DataSets=None,                 
                 inception: Inception=None) -> None:
        self.model_path = model_path
        self.output_path = output_path
        self.test_path = test_path
        self._device = device
        self._data_sets = data_sets
        self._activation = None
        self.inception = inception
        return

    @property
    def data_sets(self) -> DataSets:
        """the data-sets"""
        if self._data_sets is None:
            self._data_sets = DataSets()
        return self._data_sets

    @property
    def device(self):
        """The processor to use"""
        if self._device is None:
            self._device = torch.device("cuda"
                                        if torch.cuda.is_available()
                                        else "cpu")
        return self._device

    @property
    def inception(self) -> Inception:
        """The Inception Object"""
        if self._inception is None:
            self._inception = Inception(
                classel= self.data_sets.class_count,
                model_path=self.model_path,
                device=self.device)
            self._inception.model.eval()
        return self._inception

    @property
    def activation(self) -> nn.Sigmoid:
        """The non-linear activation"""
        if self._activation is None:
            self._activation = nn.Sigmoid()
        return self._activation

    @inception.setter
    def inception(self, new_inception: Inception) -> None:
        """Sets the inception model to eval only"""
        self._inception = new_inception
        self._inception.model.eval()
        return

    def prediction(self, image_path: Path) -> numpy.ndarray:
        """Calculate predicted class for an image

        Args:
         image_path: path to an inmage file
        Returns:
         array with the probabilities for each disease
        """
        model = self.inception.model        
        image = Image.open(image_path)
        tensor = self.data_sets.transformer.testing(image)
        # add a batch number
        tensor = tensor.unsqueeze_(0)
        tensor = tensor.to(self.inception.device)
        x = torch.autograd.Variable(tensor)
        output = torch.exp(model(x))
        _, top_class = output.topk(1, dim=1)
        return top_class.item()

    def __call__(self) -> None:
        """Creates CSV of predictions"""
        with self.output_path.open("w") as writer:
            writer.write("Id,task_1,task_2\n")
            for category in self.test_path.iterdir():
                for path in category.iterdir():
                    identifier = 'data/' + str(path).split("/dermatologist/")[-1]
                    guess = self.prediction(path)
                    first = 0 if guess else 1
                    second = 1 if guess == 2 else 0
                    writer.write("{},{},{}\n".format(identifier,
                                                     first,
                                                     second))
        return
TIMER = Timer()
test_path = DataPathTwo(folder_key="TEST").folder
csv_output = Path("~/documents/pcloud_drive/outcomes/dermatologist/predictions.csv").expanduser()

predictions = Predictions(model_path=transfer_path,
                          device=inception.device,
                          output_path=csv_output,
                          test_path=test_path,
                          data_sets=data_sets,
                          inception=inception)
with TIMER:
    predictions()
Started: 2019-01-29 22:36:10.975682
Ended: 2019-01-29 22:46:47.190355
Elapsed: 0:10:36.214673

References

Character Level RNN Exercise

Character-Level LSTM in PyTorch

In this notebook, I'll construct a character-level LSTM with PyTorch. The network will train character by character on some text, then generate new text character by character. As an example, I will train on Anna Karenina. This model will be able to generate new text based on the text from the book!

This network is based off of Andrej Karpathy's post on RNNs and implementation in Torch. Below is the general architecture of the character-wise RNN.

Set Up

First let's load in our required resources for data loading and model creation.

import numpy as np import torch from torch import nn import torch.nn.functional as F

with open('data/anna.txt', 'r') as f: text = f.read()

text[:100]

chars = tuple(set(text)) int2char = dict(enumerate(chars)) char2int = {ch: ii for ii, ch in int2char.items()}

encoded = np.array([char2int[ch] for ch in text])

encoded[:100]

def one_hot_encode(arr, n_labels):

one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)

one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.

one_hot = one_hot.reshape((*arr.shape, n_labels))

return one_hot

test_seq = np.array([[3, 5, 1]])
one_hot = one_hot_encode(test_seq, 8)

print(one_hot)

def get_batches(arr, batch_size, seq_length): '''Create a generator that returns batches of size batch_size x seq_length from arr.

Arguments


arr: Array you want to make batches from batch_size: Batch size, the number of sequences per batch seq_length: Number of encoded chars in a sequence '''

## TODO: Get the number of batches we can make n_batches =

## TODO: Keep only enough characters to make full batches arr =

## TODO: Reshape into batch_size rows arr =

## TODO: Iterate over the batches using a window of size seq_length for n in range(0, arr.shape[1], seq_length):

x =

y = yield x, y

batches = get_batches(encoded, 8, 50) x, y = next(batches)

print('x\n', x[:10, :10]) print('\ny\n', y[:10, :10])

train_on_gpu = torch.cuda.is_available() if(train_on_gpu): print('Training on GPU!') else: print('No GPU available, training on CPU; consider making n_epochs very small.')

class CharRNN(nn.Module):

def __init__(self, tokens, n_hidden=256, n_layers=2, drop_prob=0.5, lr=0.001): super().__init__() self.drop_prob = drop_prob self.n_layers = n_layers self.n_hidden = n_hidden self.lr = lr

self.chars = tokens self.int2char = dict(enumerate(self.chars)) self.char2int = {ch: ii for ii, ch in self.int2char.items()}

## TODO: define the layers of the model

def forward(self, x, hidden): ''' Forward pass through the network. These inputs are x, and the hidden/cell state `hidden`. '''

## TODO: Get the outputs and the new hidden state from the lstm

return out, hidden

def init_hidden(self, batch_size): ''' Initializes hidden state '''

weight = next(self.parameters()).data

if (train_on_gpu): hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(), weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda()) else: hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(), weight.new(self.n_layers, batch_size, self.n_hidden).zero_())

return hidden

def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10): ''' Training a network

Arguments


net: CharRNN network data: text data to train the network epochs: Number of epochs to train batch_size: Number of mini-sequences per mini-batch, aka batch size seq_length: Number of character steps per mini-batch lr: learning rate clip: gradient clipping val_frac: Fraction of data to hold out for validation print_every: Number of steps for printing training and validation loss

''' net.train()

opt = torch.optim.Adam(net.parameters(), lr=lr) criterion = nn.CrossEntropyLoss()

val_idx = int(len(data)*(1-val_frac)) data, val_data = data[:val_idx], data[val_idx:]

if(train_on_gpu): net.cuda()

counter = 0 n_chars = len(net.chars) for e in range(epochs):

h = net.init_hidden(batch_size)

for x, y in get_batches(data, batch_size, seq_length): counter += 1

x = one_hot_encode(x, n_chars) inputs, targets = torch.from_numpy(x), torch.from_numpy(y)

if(train_on_gpu): inputs, targets = inputs.cuda(), targets.cuda()

h = tuple([each.data for each in h])

net.zero_grad()

output, h = net(inputs, h)

loss = criterion(output, targets.view(batch_size*seq_length)) loss.backward()

nn.utils.clip_grad_norm_(net.parameters(), clip) opt.step()

if counter % print_every == 0:

val_h = net.init_hidden(batch_size) val_losses = [] net.eval() for x, y in get_batches(val_data, batch_size, seq_length):

x = one_hot_encode(x, n_chars) x, y = torch.from_numpy(x), torch.from_numpy(y)

val_h = tuple([each.data for each in val_h])

inputs, targets = x, y if(train_on_gpu): inputs, targets = inputs.cuda(), targets.cuda()

output, val_h = net(inputs, val_h) val_loss = criterion(output, targets.view(batch_size*seq_length))

val_losses.append(val_loss.item())

net.train() # reset to train mode after iterationg through validation data

print("Epoch: {}/{}…".format(e+1, epochs), "Step: {}…".format(counter), "Loss: {:.4f}…".format(loss.item()), "Val Loss: {:.4f}".format(np.mean(val_losses)))

## TODO: set you model hyperparameters

n_hidden= n_layers=

net = CharRNN(chars, n_hidden, n_layers) print(net)

batch_size = seq_length = n_epochs = # start small if you are just testing initial behavior

train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)

model_name = 'rnn_x_epoch.net'

checkpoint = {'n_hidden': net.n_hidden, 'n_layers': net.n_layers, 'state_dict': net.state_dict(), 'tokens': net.chars}

with open(model_name, 'wb') as f: torch.save(checkpoint, f)

def predict(net, char, h=None, top_k=None): ''' Given a character, predict the next character. Returns the predicted character and the hidden state. '''

x = np.array([[net.char2int[char]]]) x = one_hot_encode(x, len(net.chars)) inputs = torch.from_numpy(x)

if(train_on_gpu): inputs = inputs.cuda()

h = tuple([each.data for each in h])

out, h = net(inputs, h)

p = F.softmax(out, dim=1).data if(train_on_gpu): p = p.cpu() # move to cpu

if top_k is None: top_ch = np.arange(len(net.chars)) else: p, top_ch = p.topk(top_k) top_ch = top_ch.numpy().squeeze()

p = p.numpy().squeeze() char = np.random.choice(top_ch, p=p/p.sum())

return net.int2char[char], h

def sample(net, size, prime='The', top_k=None):

if(train_on_gpu): net.cuda() else: net.cpu()

net.eval() # eval mode

chars = [ch for ch in prime] h = net.init_hidden(1) for ch in prime: char, h = predict(net, ch, h, top_k=top_k)

chars.append(char)

for ii in range(size): char, h = predict(net, chars[-1], h, top_k=top_k) chars.append(char)

return ''.join(chars)

print(sample(net, 1000, prime='Anna', top_k=5))

with open('rnn_x_epoch.net', 'rb') as f: checkpoint = torch.load(f)

loaded = CharRNN(checkpoint['tokens'], n_hidden=checkpoint['n_hidden'], n_layers=checkpoint['n_layers']) loaded.load_state_dict(checkpoint['state_dict'])

print(sample(loaded, 2000, top_k=5, prime="And Levin said"))

Dog Detector

Table of Contents

Introduction

As part of the Dog-Breed Classification application I want to be able to detect whether an image has a dog or a human. This post will use pre-trained models to detect dogs in images.

Set Up

Imports

From PyPi

import torchvision.models as models

VGG-16

My first model will be a pre-trained VGG-16 model that has weights that wer trained on the ImageNet data set. ImageNet contains over 10 million URLs which link to an image containing an object from one of 1000 categories.

Build the Model

VGG16 = models.vgg16(pretrained=True)
VGG16.eval()
VGG16.to(device)

Dog App

Convolutional Neural Networks

Note: The rendered HTML version of this file is on github pages and the original file is on github.

Project: Write an Algorithm for a Dog Identification App


In this notebook, some template code has already been provided for you, and you will need to implement additional functionality to successfully complete this project. You will not need to modify the included code beyond what is requested. Sections that begin with '(IMPLEMENTATION)' in the header indicate that the following block of code will require additional functionality which you must provide. Instructions will be provided for each section, and the specifics of the implementation are marked in the code block with a 'TODO' statement. Please be sure to read the instructions carefully!

Note: Once you have completed all of the code implementations, you need to finalize your work by exporting the Jupyter Notebook as an HTML document. Before exporting the notebook to html, all of the code cells need to have been run so that reviewers can see the final implementation and output. You can then export the notebook by using the menu above and navigating to File -> Download as -> HTML (.html). Include the finished document along with this notebook as your submission.

In addition to implementing code, there will be questions that you must answer which relate to the project and your implementation. Each section where you will answer a question is preceded by a 'Question X' header. Carefully read each question and provide thorough answers in the following text boxes that begin with 'Answer:'. Your project submission will be evaluated based on your answers to each of the questions and the implementation you provide.

Note: Code and Markdown cells can be executed using the Shift + Enter keyboard shortcut. Markdown cells can be edited by double-clicking the cell to enter edit mode.

The rubric contains optional "Stand Out Suggestions" for enhancing the project beyond the minimum requirements. If you decide to pursue the "Stand Out Suggestions", you should include the code in this Jupyter notebook.


Why We're Here

In this notebook, you will make the first steps towards developing an algorithm that could be used as part of a mobile or web app. At the end of this project, your code will accept any user-supplied image as input. If a dog is detected in the image, it will provide an estimate of the dog's breed. If a human is detected, it will provide an estimate of the dog breed that is most resembling. The image below displays potential sample output of your finished project (... but we expect that each student's algorithm will behave differently!).

Sample Dog Output

In this real-world setting, you will need to piece together a series of models to perform different tasks; for instance, the algorithm that detects humans in an image will be different from the CNN that infers dog breed. There are many points of possible failure, and no perfect algorithm exists. Your imperfect solution will nonetheless create a fun user experience!

The Road Ahead

We break the notebook into separate steps. Feel free to use the links below to navigate the notebook.

  • Step 0: Import Datasets
  • Step 1: Detect Humans
  • Step 2: Detect Dogs
  • Step 3: Create a CNN to Classify Dog Breeds (from Scratch)
  • Step 4: Create a CNN to Classify Dog Breeds (using Transfer Learning)
  • Step 5: Write your Algorithm
  • Step 6: Test Your Algorithm

Step 0: Import Datasets

Make sure that you've downloaded the required human and dog datasets:

  • Download the dog dataset. Unzip the folder and place it in this project's home directory, at the location /dogImages.

  • Download the human dataset. Unzip the folder and place it in the home directory, at location /lfw.

Note: If you are using a Windows machine, you are encouraged to use 7zip to extract the folder.

In the code cell below, we save the file paths for both the human (LFW) dataset and dog dataset in the numpy arrays human_files and dog_files.

The original notebook had the imports and set-up for plotting scattered around the notebook, but since there's so many different parts to work on it made it difficult to hunt them all down whenever I restarted the notebook so I've moved them here, but left the original imports in place (or nearly so).

Imports

In [1]:
# python
from datetime import datetime
from functools import partial
from pathlib import Path
import warnings

# from pypi
from PIL import Image, ImageFile
from tabulate import tabulate
from torchvision import datasets
import matplotlib
warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
import cv2
import face_recognition
import matplotlib.image as matplotlib_image
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
import seaborn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optimizer
import torchvision.models as models
import torchvision.transforms as transforms

I tend to use the full names, but the included code uses the common practice (just not mine) of shortening numpy and pyplot so I'm going to alias them to cut down on the NameErrors.

In [2]:
pyplot = plt
numpy = np

Set Up the Plotting

In [3]:
get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'")
seaborn.set(style="whitegrid",
            rc={"axes.grid": False,
                "font.family": ["sans-serif"],
                "font.sans-serif": ["Open Sans", "Latin Modern Sans", "Lato"],
                "figure.figsize": (8, 6)},
            font_scale=1)

Constants

In [4]:
INCEPTION_IMAGE_SIZE = 299
SCRATCH_IMAGE_SIZE = INCEPTION_IMAGE_SIZE
VGG_IMAGE_SIZE = 224

MEANS = [0.485, 0.456, 0.406]
DEVIATIONS = [0.229, 0.224, 0.225]
DOG_LOWER, DOG_UPPER = 150, 269

Load filenames for human and dog images.

In [5]:
ROOT_PATH = Path("~/data/datasets/dog-breed-classification/").expanduser()
HUMAN_PATH = ROOT_PATH.joinpath("lfw")
DOG_PATH = ROOT_PATH.joinpath("dogImages")
MODEL_PATH = Path("~/models/dog-breed-classification").expanduser()

assert HUMAN_PATH.is_dir()
assert DOG_PATH.is_dir()
assert MODEL_PATH.is_dir()

The MODELS is a place to store things that have been moved to the GPU so I can off-load them if needed.

In [6]:
MODELS = []

Check CUDA

In [7]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Using {}".format(device))
Using cuda

Handle Truncated Images

In [8]:
ImageFile.LOAD_TRUNCATED_IMAGES = True
In [9]:
human_files = np.array(list(HUMAN_PATH.glob("*/*")))
dog_files = np.array(list(DOG_PATH.glob("*/*/*")))

assert len(human_files) > 0
assert len(dog_files) > 0

# print number of images in each dataset
print('There are {:,} total human images.'.format(len(human_files)))
print('There are {:,} total dog images.'.format(len(dog_files)))
There are 13,233 total human images.
There are 8,351 total dog images.

Step 1: Detect Humans

In this section, we use OpenCV's implementation of Haar feature-based cascade classifiers to detect human faces in images.

OpenCV provides many pre-trained face detectors, stored as XML files on github. We have downloaded one of these detectors and stored it in the haarcascades directory. In the next code cell, we demonstrate how to use this detector to find human faces in a sample image.

In [10]:
import cv2
import warnings
import matplotlib
warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
import matplotlib.pyplot as plt

# extract pre-trained face detector
haar_path = ROOT_PATH.joinpath('haarcascades/haarcascade_frontalface_alt.xml')
assert haar_path.is_file()
face_cascade = cv2.CascadeClassifier(str(haar_path))

# load color (BGR) image
img = cv2.imread(str(human_files[0]))
# convert BGR image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# find faces in image
faces = face_cascade.detectMultiScale(gray)

# print number of faces detected in the image
print('Number of faces detected:', len(faces))

# get bounding box for each detected face
for (x,y,w,h) in faces:
    # add bounding box to color image
    cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
    
# convert BGR image to RGB for plotting
cv_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# display the image, along with bounding box
plt.imshow(cv_rgb)
plt.show()
Number of faces detected: 1

Before using any of the face detectors, it is standard procedure to convert the images to grayscale. The detectMultiScale function executes the classifier stored in face_cascade and takes the grayscale image as a parameter.

In the above code, faces is a numpy array of detected faces, where each row corresponds to a detected face. Each detected face is a 1D array with four entries that specifies the bounding box of the detected face. The first two entries in the array (extracted in the above code as x and y) specify the horizontal and vertical positions of the top left corner of the bounding box. The last two entries in the array (extracted here as w and h) specify the width and height of the box.

Write a Human Face Detector

We can use this procedure to write a function that returns True if a human face is detected in an image and False otherwise. This function, aptly named face_detector, takes a string-valued file path to an image as input and appears in the code block below.

In [11]:
def face_detector(img_path):
    """"returns True if face is detected in image stored at img_path"""
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray)
    return len(faces) > 0

(IMPLEMENTATION) Assess the Human Face Detector

Question 1: Use the code cell below to test the performance of the face_detector function.

  • What percentage of the first 100 images in human_files have a detected human face?
  • What percentage of the first 100 images in dog_files have a detected human face?

Ideally, we would like 100% of human images with a detected face and 0% of dog images with a detected face. You will see that our algorithm falls short of this goal, but still gives acceptable performance. We extract the file paths for the first 100 images from each of the datasets and store them in the numpy arrays human_files_short and dog_files_short.

Answer: See output below.

In [12]:
from tqdm import tqdm

human_files_short = human_files[:100]
dog_files_short = dog_files[:100]

#-#-# Do NOT modify the code above this line. #-#-#
In [13]:
set([" ".join(filename.name.split("_")[:-1]) for filename in dog_files_short])
Out[13]:
{'Afghan hound',
 'American foxhound',
 'Basset hound',
 'Belgian tervuren',
 'Bichon frise',
 'Bluetick coonhound',
 'Border terrier',
 'Boxer',
 'English cocker spaniel',
 'Greyhound',
 'Lowchen',
 'Newfoundland',
 'Norwich terrier',
 'Papillon',
 'Smooth fox terrier',
 'Tibetan mastiff'}

I'm going to re-do this again with dlib so I'll make a function to answer the question of percentages and add an f1 score to make it a little easier to compare them.

In [14]:
def species_scorer(predictor: callable,
                   true_species: list,
                   false_species: list,
                   labels: list) -> list:
    """Emit a score-table for the predictor

    Args:
     predictor: callable that returns True if it detects the expected species
     true_species: list of images that should be matched by predictor
     false_species: list of images that shouldn't be matched by predictor
     labels: column labels for the table

    Returns:
     false-positive indices
    """
    misses = [predictor(str(image)) for image in false_species]
    false_positives = sum(misses)
    true_positives = sum([predictor(str(image)) for image in true_species])
    false_negatives = len(true_species) - true_positives
    others = len(false_species)
    expected = len(true_species)
    values = ("{:.2f}%".format(100 * true_positives/expected),
            "{:.2f}%".format(100 * false_positives/others),
              "{:.2f}".format((2 * true_positives)/(2 * true_positives
                                                    + false_positives
                                                    + false_negatives)))
    table = zip(labels, values)
    print(tabulate(table, tablefmt="github", headers=["Metric", "Value"]))
    return misses
In [16]:
face_scorer = partial(species_scorer,
                      true_species=human_files_short,
                      false_species=dog_files_short,
                      labels=("First 100 images in `human_files` detected with a face",
                              "First 100 images in `dog_files` detected with a face",
                              "F1"))
In [17]:
open_cv_false_positives = face_scorer(face_detector)
Metric                                                  Value
------------------------------------------------------  -------
First 100 images in `human_files` detected with a face  98.00%
First 100 images in `dog_files` detected with a face    9.00%
F1                                                      0.95

We suggest the face detector from OpenCV as a potential way to detect human images in your algorithm, but you are free to explore other approaches, especially approaches that make use of deep learning :). Please use the code cell below to design and test your own face detection algorithm. If you decide to pursue this optional task, report performance on human_files_short and dog_files_short.

DLIB with face_recognition

This face detector uses face_recognition, a python interface to dlib's facial recognition code.

Testing It with An Image

I created the detect_faces and add_bounding_boxes functions so that I can re-use detect_faces later for the dlib version of the face_detector function.

In [18]:
def detect_faces(image_path: str) -> numpy.ndarray:
    """Finds the locations of faces
    
    Args:
     image_path: path to the image
        
    Returns:
     array of bounding box coordinates for the face(s)
    """
    image = face_recognition.load_image_file(str(image_path))
    return face_recognition.face_locations(image)
In [19]:
def add_bounding_boxes(image_path: str,
                       axe: matplotlib.axes.Axes) -> None:
    """Adds patches to the current matplotlib figure
    
    Args:
     image_path: path to the image file
     axe: axes to add the rectangle to
    """
    for (top, right, bottom, left) in detect_faces(image_path):
        width = right - left
        height = top - bottom
        rectangle = matplotlib.patches.Rectangle((top, right), width, height,
                                      fill=False)
        axe.add_patch(rectangle)
    return    
In [20]:
figure, axe = pyplot.subplots()
human = human_files[0]
name = " ".join(human.name.split("_")[:-1])
image = matplotlib.image.imread(human)
figure.suptitle("dlib Face Recognition Bounding-Box ({})".format(name),
                weight='bold')
add_bounding_boxes(human, axe)
axe.tick_params(dict(axis="both",
                     which="both",
                     bottom=False,
                     top=False))
axe.get_xaxis().set_ticks([])
axe.get_yaxis().set_ticks([])
        
plot = axe.imshow(image)

Test the performance

In [21]:
def has_face(image_path: str) -> bool:
    """Checks if there is at least one face in the image

    Args:
     image_path: path to the image file

    Returns:
     True if there's at least one face in the image
    """
    return len(detect_faces(image_path)) > 0
In [22]:
dlib_false_positives = face_scorer(has_face)
Metric                                                  Value
------------------------------------------------------  -------
First 100 images in `human_files` detected with a face  100.00%
First 100 images in `dog_files` detected with a face    11.00%
F1                                                      0.95

The DLIB version did slightly better in recognizing the humans as humans, but it also had more false positives so it did about the same overall. Although I didn't include the time the dlib version is about four times slower than the OpenCV version, so the OpenCV verision might be better in a real-time environment, on the other hand the dlib version is much simpler to use and so might be better if speed isn't a factor or recall is more important than precision.


Step 2: Detect Dogs

In this section, we use a pre-trained model to detect dogs in images.

Obtain Pre-trained VGG-16 Model

The code cell below downloads the VGG-16 model, along with weights that have been trained on ImageNet, a very large, very popular dataset used for image classification and other vision tasks. ImageNet contains over 10 million URLs, each linking to an image containing an object from one of 1000 categories.

In [22]:
import torch
import torchvision.models as models
In [22]:
# define VGG16 model
VGG16 = models.vgg16(pretrained=True)
In [23]:
# move model to GPU if CUDA is available
if use_cuda:
    VGG16 = VGG16.cuda()
    MODELS.append(VGG16)

Given an image, this pre-trained VGG-16 model returns a prediction (derived from the 1000 possible categories in ImageNet) for the object that is contained in the image.

(IMPLEMENTATION) Making Predictions with a Pre-trained Model

In the next code cell, you will write a function that accepts a path to an image (such as 'dogImages/train/001.Affenpinscher/Affenpinscher_00001.jpg') as input and returns the index corresponding to the ImageNet class that is predicted by the pre-trained VGG-16 model. The output should always be an integer between 0 and 999, inclusive.

Before writing the function, make sure that you take the time to learn how to appropriately pre-process tensors for pre-trained models in the PyTorch documentation.

Transforms

The VGG model expects a 244x244 image (Very Deep Convolutional Networks for Large-Scale Image Recognition) and according to the pytorch documentation all the pre-trained models have means [0.485, 0.456, 0.406] and standard deviations [0.229, 0.224, 0.225] so the images need to be transformed accordingly. The MEANS and DEVIATIONS lists are defined in the constants section at the top of the document along with the VGG_IMAGE_SIZE.

In [24]:
vgg_transform = transforms.Compose([transforms.Resize(255),
                                    transforms.CenterCrop(VGG_IMAGE_SIZE),
                                    transforms.ToTensor(),
                                    transforms.Normalize(MEANS,
                                                         DEVIATIONS)])

Since I'm going to use the Inception-v3 network later on I'm going to create a generic function first and then use it to build separate predictor functions.

In [25]:
def model_predict(image_path: str, model: nn.Module,
                  transform: transforms.Compose) -> int:
    """Predicts the class of item in image

    Args:
     image_path: path to the image to check
     model: model to make the prediction
     transform: callable to convert the image to a tensor

    Returns:
     index corresponding to the model's prediction
    """
    image = Image.open(str(image_path))
    image = transform(image).unsqueeze(0).to(device)
    output = model(image)
    probabilities = torch.exp(output)
    _, top_class = probabilities.topk(1, dim=1)
    return top_class.item()    
In [26]:
VGG16_predict = partial(model_predict, model=VGG16, transform=vgg_transform)

(IMPLEMENTATION) Write a Dog Detector

While looking at the dictionary, you will notice that the categories corresponding to dogs appear in an uninterrupted sequence and correspond to dictionary keys 151-268, inclusive, to include all categories from 'Chihuahua' to 'Mexican hairless'. Thus, in order to check to see if an image is predicted to contain a dog by the pre-trained VGG-16 model, we need only check if the pre-trained model predicts an index between 151 and 268 (inclusive).

Use these ideas to complete the dog_detector function below, which returns True if a dog is detected in an image (and False if not).

In [27]:
def dog_detector(img_path: str, predictor: callable=VGG16_predict) -> bool:
    """Predicts if the image is a dog

    Args:
     img_path: path to image file
     predictor: callable that maps the image to an ID
    
    Returns:
     is-dog: True if the image contains a dog
    """
    return DOG_LOWER < predictor(img_path) < DOG_UPPER

(IMPLEMENTATION) Assess the Dog Detector

Question 2: Use the code cell below to test the performance of your dog_detector function.

  • What percentage of the images in human_files_short have a detected dog?
  • What percentage of the images in dog_files_short have a detected dog?
In [28]:
dog_scorer = partial(species_scorer,
                     true_species=dog_files_short,
                     false_species=human_files_short,
                     labels=("Images in `dog_files_short` with a detected dog",
                             "Images in `human_files_short with a detected dog", "F1"))
In [30]:
false_dogs = dog_scorer(dog_detector)
Metric                                            Value
------------------------------------------------  -------
Images in `dog_files_short` with a detected dog   92.00%
Images in `human_files_short with a detected dog  1.00%
F1                                                0.95

The VGG model didn't miss any dogs but it misclassified 1% of the humans as dogs.

We suggest VGG-16 as a potential network to detect dog images in your algorithm, but you are free to explore other pre-trained networks (such as Inception-v3, ResNet-50, etc). Please use the code cell below to test other pre-trained PyTorch models. If you decide to pursue this optional task, report performance on human_files_short and dog_files_short.

Inception Dog Detector

In [29]:
inception = models.inception_v3(pretrained=True)
inception.to(device)
MODELS.append(inception)
inception.eval()
pass # this is to prevent the output from dumping into the notebook

I couldn't find anyplace where pytorch documents it, but if you look at the source code they have a comment in the forward method indicating that the image needs to be 299x299x3 so they need to be transformed to a different size from the VGG images. INCEPTION_IMAGE_SIZE is set to `299# at the top of this document since this is shared with code that comes in a later section.

In [36]:
inception_transforms = transforms.Compose([transforms.Resize(INCEPTION_IMAGE_SIZE),
                                           transforms.CenterCrop(INCEPTION_IMAGE_SIZE),
                                           transforms.ToTensor(),
                                           transforms.Normalize(MEANS,
                                                                DEVIATIONS)])
In [37]:
inception_predicts = partial(model_predict, model=inception, transform=inception_transforms)
In [38]:
inception_dog_detector = partial(dog_detector, predictor=inception_predicts)
In [39]:
dlib_false_dogs = dog_scorer(inception_dog_detector)
Metric                                            Value
------------------------------------------------  -------
Images in `dog_files_short` with a detected dog   100.00%
Images in `human_files_short with a detected dog  0.00%
F1                                                1.00

The inception model seems to do better than the VGG model did.


Step 3: Create a CNN to Classify Dog Breeds (from Scratch)

Now that we have functions for detecting humans and dogs in images, we need a way to predict breed from images. In this step, you will create a CNN that classifies dog breeds. You must create your CNN from scratch (so, you can't use transfer learning yet!), and you must attain a test accuracy of at least 10%. In Step 4 of this notebook, you will have the opportunity to use transfer learning to create a CNN that attains greatly improved accuracy.

We mention that the task of assigning breed to dogs from images is considered exceptionally challenging. To see why, consider that even a human would have trouble distinguishing between a Brittany and a Welsh Springer Spaniel.

Brittany Welsh Springer Spaniel
title

It is not difficult to find other dog breed pairs with minimal inter-class variation (for instance, Curly-Coated Retrievers and American Water Spaniels).

Curly-Coated Retriever American Water Spaniel

Likewise, recall that labradors come in yellow, chocolate, and black. Your vision-based algorithm will have to conquer this high intra-class variation to determine how to classify all of these different shades as the same breed.

Yellow Labrador Chocolate Labrador Black Labrador

We also mention that random chance presents an exceptionally low bar: setting aside the fact that the classes are slightly imabalanced, a random guess will provide a correct answer roughly 1 in 133 times, which corresponds to an accuracy of less than 1%.

Remember that the practice is far ahead of the theory in deep learning. Experiment with many different architectures, and trust your intuition. And, of course, have fun!

(IMPLEMENTATION) Specify Data Loaders for the Dog Dataset

Use the code cell below to write three separate data loaders for the training, validation, and test datasets of dog images (located at dogImages/train, dogImages/valid, and dogImages/test, respectively). You may find this documentation on custom datasets to be a useful resource. If you are interested in augmenting your training and/or validation data, check out the wide variety of transforms!

The SCRATCH_IMAGE_SIZE, MEANS, and DEVIATIONS variables are defined in the constants section at the top of the notebook.

In [30]:
train_transform = transforms.Compose([
    transforms.RandomRotation(30),
    transforms.RandomResizedCrop(SCRATCH_IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(MEANS,
                         DEVIATIONS)])

test_transform = transforms.Compose([transforms.Resize(350),
                                     transforms.CenterCrop(SCRATCH_IMAGE_SIZE),
                                     transforms.ToTensor(),
                                     transforms.Normalize(MEANS,
                                                          DEVIATIONS)])
In [28]:
dog_training_path = DOG_PATH.joinpath("train")
dog_validation_path = DOG_PATH.joinpath("valid")
dog_testing_path = DOG_PATH.joinpath("test")
In [31]:
training = datasets.ImageFolder(root=str(dog_training_path),
                                transform=train_transform)
validation = datasets.ImageFolder(root=str(dog_validation_path),
                                  transform=test_transform)
testing = datasets.ImageFolder(root=str(dog_testing_path),
                               transform=test_transform)
In [43]:
BATCH_SIZE = 32
WORKERS = 0

train_batches = torch.utils.data.DataLoader(training, batch_size=BATCH_SIZE,
                                            shuffle=True, num_workers=WORKERS)
validation_batches = torch.utils.data.DataLoader(
    validation, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)
test_batches = torch.utils.data.DataLoader(
    testing, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS)

loaders_scratch = dict(train=train_batches,
                       validation=validation_batches,
                       test=test_batches)

Question 3: Describe your chosen procedure for preprocessing the data.

  • How does your code resize the images (by cropping, stretching, etc)? What size did you pick for the input tensor, and why?
  • Did you decide to augment the dataset? If so, how (through translations, flips, rotations, etc)? If not, why not?

Answer:

  • The training images are resized by cropping them, while the testing images are resized by scaling then cropping them. The size I chose for the images was 299 pixels so that I can reuse them with an Inception V3 network in the next section.

  • The training was augmented using rotation, cropping, and horizontal flipping.

(IMPLEMENTATION) Model Architecture

Create a CNN to classify dog breed. Use the template in the code cell below.

In [33]:
BREEDS = len(training.classes)
print("There are {} breeds.".format(BREEDS))
There are 133 breeds.
In [14]:
LAYER_ONE_IN = 3
LAYER_ONE_OUT = 16
LAYER_TWO_OUT = LAYER_ONE_OUT * 2
LAYER_THREE_OUT = LAYER_TWO_OUT * 2
FLATTEN_TO = (SCRATCH_IMAGE_SIZE//8)**2 * LAYER_THREE_OUT
FULLY_CONNECTED_OUT = int(str(FLATTEN_TO)[:3])//100 * 100
KERNEL = 3
PADDING = 1
In [15]:
import torch.nn as nn
import torch.nn.functional as F
In [16]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(LAYER_ONE_IN, LAYER_ONE_OUT,
                               KERNEL, padding=PADDING)
        self.conv2 = nn.Conv2d(LAYER_ONE_OUT, LAYER_TWO_OUT,
                               KERNEL, padding=PADDING)
        self.conv3 = nn.Conv2d(LAYER_TWO_OUT, LAYER_THREE_OUT,
                               KERNEL, padding=PADDING)
        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # linear layer
        self.fc1 = nn.Linear(FLATTEN_TO, FULLY_CONNECTED_OUT)
        self.fc2 = nn.Linear(FULLY_CONNECTED_OUT, BREEDS)
        # dropout layer
        self.dropout = nn.Dropout(0.25)
        return
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))

        x = x.view(-1, FLATTEN_TO)
        x = self.dropout(x)

        x = self.dropout(F.relu(self.fc1(x)))
        return self.fc2(x)
#-#-# You so NOT have to modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()
    MODELS.append(model_scratch)

Question 4: Outline the steps you took to get to your final CNN architecture and your reasoning at each step.

Answer:

It was largely trial and error, copying what we did in the CIFAR problem. I chose (somewhat arbitrarily) three convolutional layers, since two layers didn't seem to do very well. Each convolutional layer doubles the depth while halving the height and width (using MaxPool).

I then flattened the layer to transition from the convolutional layers to the fully-connected layers. I added a fully-connected layer which has 500 outputs - a rough rounding of the number of input weights of the flattened layer down to the nearest 100th. There wasn't any magic to the number, I just wanted a transition from the large flattened layer to the final output layer and when I was experimenting with larger values I was running out of memory and since this isn't the intended final model I tried to keep it modest.

To reduce the likelihood of overfitting I applied dropout to the activation layers (except the final one). Finally, at each of the layers (except the final output layer) I applied ReLU activation to make the model non-linear.

(IMPLEMENTATION) Specify Loss Function and Optimizer

Use the next code cell to specify a loss function and optimizer. Save the chosen loss function as criterion_scratch, and the optimizer as optimizer_scratch below.

In [17]:
import torch.optim as optimizer

criterion_scratch = nn.CrossEntropyLoss()
optimizer_scratch = optimizer.SGD(model_scratch.parameters(),
                                  lr=0.001,
                                  momentum=0.9)

(IMPLEMENTATION) Train and Validate the Model

Train and validate your model in the code cell below. Save the final model parameters at filepath 'model_scratch.pt'.

In [18]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path,
          print_function: callable=print,
          is_inception: bool=False):
    """Trains the model

    Args:
     n_epochs: the number of times to repeat training
     loaders: dict of data batch-loaders
     model: the model to train
     optimizer: the gradient descent object
     criterion: The object to calculate the loss
     use_cuda: boolean to decide whether to move the data to the GPU
     save_path: path to file to save best model to
     print_function: something to pass output to
     is_inception: if True, expect a tuple of tensors as the model output
    """
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf
    
    # check the keys are right so you don't waste an entire epoch to find out
    training_batches = loaders["train"]
    validation_batches = loaders["validation"]
    started = datetime.now()
    print_function("Training Started: {}".format(started))
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        epoch_started = datetime.now()
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for data, target in training_batches:
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            if is_inception:
                output, _ = model(data)
            else:
                output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * data.size(0)
        train_loss /= len(training_batches.dataset)

        ######################    
        # validate the model #
        ######################
        model.eval()
        for data, target in validation_batches:
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            output = model(data)
            loss = criterion(output, target)
            valid_loss += loss.item() * data.size(0)
        valid_loss /= len(validation_batches.dataset)
        print_function('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}\tElapsed: {}'.format(
            epoch,                     
            train_loss,
            valid_loss,
            datetime.now() - epoch_started,
            ))
        
        if valid_loss < valid_loss_min:
            print_function(
                ("Validation loss decreased ({:.6f} --> {:.6f}). "
                 "Saving model ...").format(
                     valid_loss_min,
                     valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
    ended = datetime.now()
    print_function("Training Ended: {}".format(ended))
    print_function("Total Training Time: {}".format(ended - started))            
    return model

Tee

I found out the hard way that Jupyter loses the ability to re-connect to a running cell if you close and re-open the tab, so if you do close it you will have lost all your output. This is something to make sure it gets saved to a file.

In [64]:
class Tee:
    """Save the input to a file and print it

    Args:
     log_name: name to give the log    
     directory_path: path to the directory for the file
    """
    def __init__(self, log_name: str, 
                 directory_name: str="../../../logs/dog-breed-classifier") -> None:
        self.directory_name = directory_name
        self.log_name = log_name
        self._path = None
        self._log = None
        return

    @property
    def path(self) -> Path:
        """path to the log-file"""
        if self._path is None:
            self._path = Path(self.directory_name).expanduser()
            assert self._path.is_dir()
            self._path = self._path.joinpath(self.log_name)
        return self._path

    @property
    def log(self):
        """File object to write log to"""
        if self._log is None:
            self._log = self.path.open("w", buffering=1)
        return self._log

    def __call__(self, line: str) -> None:
        """Writes to the file and stdout

        Args:
         line: text to emit
        """
        self.log.write("{}\n".format(line))
        print(line)
        return

Train the Model

In [20]:
scratch_path = MODEL_PATH.joinpath("model_scratch.pt")
scratch_log = Tee(log_name="scratch_train.log")
In [21]:
EPOCHS = 100
In [22]:
model_scratch = train(EPOCHS, loaders_scratch, model_scratch, optimizer_scratch, 
                      criterion_scratch, use_cuda, scratch_path, print_function=scratch_log)
Training Started: 2019-01-07 00:17:48.769216
Epoch: 1        Training Loss: 4.877051         Validation Loss: 4.841412       Elapsed: 0:03:13.834452
Validation loss decreased (inf --> 4.841412). Saving model ...
Epoch: 2        Training Loss: 4.820985         Validation Loss: 4.747336       Elapsed: 0:03:01.535938
Validation loss decreased (4.841412 --> 4.747336). Saving model ...
Epoch: 3        Training Loss: 4.767189         Validation Loss: 4.684055       Elapsed: 0:03:01.574621
Validation loss decreased (4.747336 --> 4.684055). Saving model ...
Epoch: 4        Training Loss: 4.728553         Validation Loss: 4.607475       Elapsed: 0:03:02.878120
Validation loss decreased (4.684055 --> 4.607475). Saving model ...
Epoch: 5        Training Loss: 4.643230         Validation Loss: 4.515298       Elapsed: 0:03:01.719175
Validation loss decreased (4.607475 --> 4.515298). Saving model ...
Epoch: 6        Training Loss: 4.601643         Validation Loss: 4.451782       Elapsed: 0:03:02.711892
Validation loss decreased (4.515298 --> 4.451782). Saving model ...
Epoch: 7        Training Loss: 4.563049         Validation Loss: 4.390049       Elapsed: 0:03:02.421659
Validation loss decreased (4.451782 --> 4.390049). Saving model ...
Epoch: 8        Training Loss: 4.525313         Validation Loss: 4.401180       Elapsed: 0:03:00.623633
Epoch: 9        Training Loss: 4.494441         Validation Loss: 4.316231       Elapsed: 0:03:03.307759
Validation loss decreased (4.390049 --> 4.316231). Saving model ...
Epoch: 10       Training Loss: 4.462459         Validation Loss: 4.309952       Elapsed: 0:03:01.247355
Validation loss decreased (4.316231 --> 4.309952). Saving model ...
Epoch: 11       Training Loss: 4.440028         Validation Loss: 4.282603       Elapsed: 0:03:01.817202
Validation loss decreased (4.309952 --> 4.282603). Saving model ...
Epoch: 12       Training Loss: 4.408276         Validation Loss: 4.256291       Elapsed: 0:03:02.940067
Validation loss decreased (4.282603 --> 4.256291). Saving model ...
Epoch: 13       Training Loss: 4.382314         Validation Loss: 4.230955       Elapsed: 0:03:01.484585
Validation loss decreased (4.256291 --> 4.230955). Saving model ...
Epoch: 14       Training Loss: 4.339535         Validation Loss: 4.178119       Elapsed: 0:03:01.819115
Validation loss decreased (4.230955 --> 4.178119). Saving model ...
Epoch: 15       Training Loss: 4.314611         Validation Loss: 4.172305       Elapsed: 0:03:01.862936
Validation loss decreased (4.178119 --> 4.172305). Saving model ...
Epoch: 16       Training Loss: 4.294925         Validation Loss: 4.179273       Elapsed: 0:03:02.859107
Epoch: 17       Training Loss: 4.269919         Validation Loss: 4.121323       Elapsed: 0:03:02.187248
Validation loss decreased (4.172305 --> 4.121323). Saving model ...
Epoch: 18       Training Loss: 4.229653         Validation Loss: 4.078084       Elapsed: 0:03:02.005417
Validation loss decreased (4.121323 --> 4.078084). Saving model ...
Epoch: 19       Training Loss: 4.211623         Validation Loss: 4.075537       Elapsed: 0:03:02.023912
Validation loss decreased (4.078084 --> 4.075537). Saving model ...
Epoch: 20       Training Loss: 4.176366         Validation Loss: 4.071403       Elapsed: 0:03:02.443931
Validation loss decreased (4.075537 --> 4.071403). Saving model ...
Epoch: 21       Training Loss: 4.162033         Validation Loss: 4.060058       Elapsed: 0:03:01.880442
Validation loss decreased (4.071403 --> 4.060058). Saving model ...
Epoch: 22       Training Loss: 4.152350         Validation Loss: 4.017785       Elapsed: 0:03:02.961102
Validation loss decreased (4.060058 --> 4.017785). Saving model ...
Epoch: 23       Training Loss: 4.126623         Validation Loss: 4.061260       Elapsed: 0:03:02.727963
Epoch: 24       Training Loss: 4.099212         Validation Loss: 3.992973       Elapsed: 0:03:01.699973
Validation loss decreased (4.017785 --> 3.992973). Saving model ...
Epoch: 25       Training Loss: 4.075190         Validation Loss: 3.998641       Elapsed: 0:03:01.713804
Epoch: 26       Training Loss: 4.046143         Validation Loss: 3.997265       Elapsed: 0:03:02.571748
Epoch: 27       Training Loss: 4.043575         Validation Loss: 3.949613       Elapsed: 0:03:01.425152
Validation loss decreased (3.992973 --> 3.949613). Saving model ...
Epoch: 28       Training Loss: 4.015487         Validation Loss: 3.961522       Elapsed: 0:03:02.782270
Epoch: 29       Training Loss: 3.998070         Validation Loss: 3.948969       Elapsed: 0:03:02.048881
Validation loss decreased (3.949613 --> 3.948969). Saving model ...
Epoch: 30       Training Loss: 3.991606         Validation Loss: 3.938675       Elapsed: 0:03:02.713836
Validation loss decreased (3.948969 --> 3.938675). Saving model ...
Epoch: 31       Training Loss: 3.963830         Validation Loss: 3.918792       Elapsed: 0:03:01.697762
Validation loss decreased (3.938675 --> 3.918792). Saving model ...
Epoch: 32       Training Loss: 3.930790         Validation Loss: 3.897582       Elapsed: 0:03:01.460303
Validation loss decreased (3.918792 --> 3.897582). Saving model ...
Epoch: 33       Training Loss: 3.896765         Validation Loss: 3.963304       Elapsed: 0:03:02.224769
Epoch: 34       Training Loss: 3.879835         Validation Loss: 3.893857       Elapsed: 0:03:02.983978
Validation loss decreased (3.897582 --> 3.893857). Saving model ...
Epoch: 35       Training Loss: 3.888119         Validation Loss: 3.900615       Elapsed: 0:03:02.187086
Epoch: 36       Training Loss: 3.839318         Validation Loss: 3.884181       Elapsed: 0:03:02.805424
Validation loss decreased (3.893857 --> 3.884181). Saving model ...
Epoch: 37       Training Loss: 3.814765         Validation Loss: 3.863985       Elapsed: 0:03:03.838610
Validation loss decreased (3.884181 --> 3.863985). Saving model ...
Epoch: 38       Training Loss: 3.801056         Validation Loss: 3.873780       Elapsed: 0:03:03.033119
Epoch: 39       Training Loss: 3.797330         Validation Loss: 3.827120       Elapsed: 0:03:02.329334
Validation loss decreased (3.863985 --> 3.827120). Saving model ...
Epoch: 40       Training Loss: 3.776431         Validation Loss: 3.852023       Elapsed: 0:03:03.616306
Epoch: 41       Training Loss: 3.747829         Validation Loss: 3.814612       Elapsed: 0:03:03.231390
Validation loss decreased (3.827120 --> 3.814612). Saving model ...
Epoch: 42       Training Loss: 3.713182         Validation Loss: 3.811580       Elapsed: 0:03:00.355972
Validation loss decreased (3.814612 --> 3.811580). Saving model ...
Epoch: 43       Training Loss: 3.705967         Validation Loss: 3.811339       Elapsed: 0:03:11.512757
Validation loss decreased (3.811580 --> 3.811339). Saving model ...
Epoch: 44       Training Loss: 3.677942         Validation Loss: 3.763790       Elapsed: 0:03:06.798942
Validation loss decreased (3.811339 --> 3.763790). Saving model ...
Epoch: 45       Training Loss: 3.670521         Validation Loss: 3.804585       Elapsed: 0:03:09.111308
Epoch: 46       Training Loss: 3.616001         Validation Loss: 3.791811       Elapsed: 0:03:07.913439
Epoch: 47       Training Loss: 3.605779         Validation Loss: 3.818132       Elapsed: 0:03:08.180969
Epoch: 48       Training Loss: 3.578845         Validation Loss: 3.802942       Elapsed: 0:03:07.502958
Epoch: 49       Training Loss: 3.569269         Validation Loss: 3.763015       Elapsed: 0:03:08.838610
Validation loss decreased (3.763790 --> 3.763015). Saving model ...
Epoch: 50       Training Loss: 3.551981         Validation Loss: 3.727734       Elapsed: 0:03:07.301504
Validation loss decreased (3.763015 --> 3.727734). Saving model ...
Epoch: 51       Training Loss: 3.539640         Validation Loss: 3.763292       Elapsed: 0:03:08.697944
Epoch: 52       Training Loss: 3.514974         Validation Loss: 3.789170       Elapsed: 0:03:07.824023
Epoch: 53       Training Loss: 3.478333         Validation Loss: 3.730328       Elapsed: 0:03:08.594196
Epoch: 54       Training Loss: 3.474018         Validation Loss: 3.710677       Elapsed: 0:03:08.306823
Validation loss decreased (3.727734 --> 3.710677). Saving model ...
Epoch: 55       Training Loss: 3.455741         Validation Loss: 3.666004       Elapsed: 0:03:07.551808
Validation loss decreased (3.710677 --> 3.666004). Saving model ...
Epoch: 56       Training Loss: 3.385648         Validation Loss: 3.755735       Elapsed: 0:03:07.685431
Epoch: 57       Training Loss: 3.391713         Validation Loss: 3.739904       Elapsed: 0:03:09.560812
Epoch: 58       Training Loss: 3.385832         Validation Loss: 3.679237       Elapsed: 0:03:07.951572
Epoch: 59       Training Loss: 3.345478         Validation Loss: 3.698172       Elapsed: 0:03:07.605253
Epoch: 61       Training Loss: 3.329898         Validation Loss: 3.687313       Elapsed: 0:03:06.961018
Epoch: 62       Training Loss: 3.332215         Validation Loss: 3.722676       Elapsed: 0:03:08.430620
Epoch: 63       Training Loss: 3.290568         Validation Loss: 3.698964       Elapsed: 0:03:08.096713
Epoch: 64       Training Loss: 3.308631         Validation Loss: 3.693485       Elapsed: 0:03:06.612021
Epoch: 65       Training Loss: 3.242924         Validation Loss: 3.676528       Elapsed: 0:03:02.644056
Epoch: 66       Training Loss: 3.210221         Validation Loss: 3.672967       Elapsed: 0:03:02.000280
Epoch: 67       Training Loss: 3.248309         Validation Loss: 3.700498       Elapsed: 0:03:02.847392
Epoch: 68       Training Loss: 3.186689         Validation Loss: 3.672294       Elapsed: 0:03:04.354137
Epoch: 69       Training Loss: 3.148231         Validation Loss: 3.709312       Elapsed: 0:03:05.193586
Epoch: 70       Training Loss: 3.167838         Validation Loss: 3.735657       Elapsed: 0:03:04.797756
Epoch: 71       Training Loss: 3.154821         Validation Loss: 3.683042       Elapsed: 0:03:07.263391
Epoch: 72       Training Loss: 3.151534         Validation Loss: 3.803930       Elapsed: 0:03:02.779610
Epoch: 73       Training Loss: 3.157296         Validation Loss: 3.690141       Elapsed: 0:03:05.410248
Epoch: 74       Training Loss: 3.101250         Validation Loss: 3.771072       Elapsed: 0:03:03.327209
Epoch: 75       Training Loss: 3.052344         Validation Loss: 3.676567       Elapsed: 0:03:01.068909
Epoch: 76       Training Loss: 3.043009         Validation Loss: 3.728986       Elapsed: 0:03:01.663287
Epoch: 77       Training Loss: 3.035244         Validation Loss: 3.787941       Elapsed: 0:03:02.757887
Epoch: 78       Training Loss: 3.024287         Validation Loss: 3.795896       Elapsed: 0:03:01.845504
Epoch: 79       Training Loss: 2.992325         Validation Loss: 3.716417       Elapsed: 0:03:02.454654
Epoch: 80       Training Loss: 2.985272         Validation Loss: 3.665017       Elapsed: 0:03:01.616717
Validation loss decreased (3.666004 --> 3.665017). Saving model ...
Epoch: 81       Training Loss: 2.972644         Validation Loss: 3.750383       Elapsed: 0:03:02.581951
Epoch: 82       Training Loss: 2.948319         Validation Loss: 3.790278       Elapsed: 0:03:02.529694
Epoch: 83       Training Loss: 2.955792         Validation Loss: 3.807737       Elapsed: 0:03:02.909021
Epoch: 84       Training Loss: 2.953483         Validation Loss: 3.884490       Elapsed: 0:03:00.926423
Epoch: 85       Training Loss: 2.907973         Validation Loss: 3.876141       Elapsed: 0:03:01.702236
Epoch: 86       Training Loss: 2.886144         Validation Loss: 3.806277       Elapsed: 0:03:02.415406
Epoch: 87       Training Loss: 2.895160         Validation Loss: 3.768452       Elapsed: 0:03:02.365341
Epoch: 88       Training Loss: 2.878172         Validation Loss: 3.794703       Elapsed: 0:03:01.910776
Epoch: 89       Training Loss: 2.850065         Validation Loss: 3.784806       Elapsed: 0:03:01.821389
Epoch: 90       Training Loss: 2.808656         Validation Loss: 3.834159       Elapsed: 0:03:02.931420
Epoch: 91       Training Loss: 2.807267         Validation Loss: 3.879032       Elapsed: 0:03:01.804976
Epoch: 92       Training Loss: 2.773044         Validation Loss: 3.779162       Elapsed: 0:03:03.069339
Epoch: 93       Training Loss: 2.787731         Validation Loss: 3.912086       Elapsed: 0:03:01.484451
Epoch: 94       Training Loss: 2.741030         Validation Loss: 3.782457       Elapsed: 0:03:01.528688
Epoch: 95       Training Loss: 2.777800         Validation Loss: 3.873816       Elapsed: 0:03:02.658232
Epoch: 96       Training Loss: 2.748137         Validation Loss: 3.923467       Elapsed: 0:03:01.510292
Epoch: 97       Training Loss: 2.725654         Validation Loss: 3.989069       Elapsed: 0:03:02.315783
Epoch: 98       Training Loss: 2.723776         Validation Loss: 3.946343       Elapsed: 0:03:01.279152
Epoch: 99       Training Loss: 2.662464         Validation Loss: 3.885177       Elapsed: 0:03:02.807385
Epoch: 100      Training Loss: 2.714636         Validation Loss: 3.916170       Elapsed: 0:03:01.294095
Training Ended: 2019-01-07 05:24:48.263423
Total Training Time: 5:06:59.494207

load the model that got the best validation accuracy

In [23]:
model_scratch.load_state_dict(torch.load(scratch_path))

(IMPLEMENTATION) Test the Model

Try out your model on the test dataset of dog images. Use the code cell below to calculate and print the test loss and accuracy. Ensure that your test accuracy is greater than 10%.

In [45]:
def test(loaders, model, criterion, use_cuda, print_function=print):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print_function('Test Loss: {:.6f}\n'.format(test_loss))

    print_function('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))
In [25]:
scratch_test_log = Tee("scratch_test.log")
In [ ]:
# call test function    
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda, print_function=scratch_test_log)
Test Loss: 3.611238


Test Accuracy: 17% (149/836)

Step 4: Create a CNN to Classify Dog Breeds (using Transfer Learning)

You will now use transfer learning to create a CNN that can identify dog breed from images. Your CNN must attain at least 60% accuracy on the test set.

(IMPLEMENTATION) Specify Data Loaders for the Dog Dataset

Use the code cell below to write three separate data loaders for the training, validation, and test datasets of dog images (located at dogImages/train, dogImages/valid, and dogImages/test, respectively).

If you like, you are welcome to use the same data loaders from the previous step, when you created a CNN from scratch.

In [47]:
loaders_transfer = loaders_scratch

(IMPLEMENTATION) Model Architecture

Use transfer learning to create a CNN to classify dog breed. Use the code cell below, and save your initialized model as the variable model_transfer.

The Transfer Model
In [34]:
model_transfer = models.inception_v3(pretrained=True)
for parameter in model_transfer.parameters():
    parameter.requires_grad = False
classifier_inputs = model_transfer.fc.in_features
model_transfer.fc = nn.Linear(in_features=classifier_inputs,
                              out_features=BREEDS,
                              bias=True)
model_transfer.to(device)
MODELS.append(model_transfer)

Question 5: Outline the steps you took to get to your final CNN architecture and your reasoning at each step. Describe why you think the architecture is suitable for the current problem.

Answer:

I looked at the source code and the string representation of the model and saw that the classification was being done by a single fully-connected (Linear) layer with 2,048 inputs and 1,000 outputs. Since we only have 133 outputs I replaced their final layer (model.fc) with one that had the same number of inputs but only 133 outputs.

I chose the Inception V3 network because, like the VGG 16 model, it was trained on the ImageNet data-set and works to detect features in images but, as noted in Rethinking the Inception Architecture for Computer Vision, the Inception model requires fewer computational resources than the VGG model does, which I thought was an attractive feature. The Inception model does introduce a problem in that it uses an auxiliary classifier during training so the training function has to be modified to handle this (the output returns a tuple of tensors), but this seemed minor.

(IMPLEMENTATION) Specify Loss Function and Optimizer

Use the next code cell to specify a loss function and optimizer. Save the chosen loss function as criterion_transfer, and the optimizer as optimizer_transfer below.

In [ ]:
criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optimizer.SGD(
    model_transfer.parameters(),
    lr=0.001,
    momentum=0.9)

(IMPLEMENTATION) Train and Validate the Model

Train and validate your model in the code cell below. Save the final model parameters at filepath 'model_transfer.pt'.

In [24]:
transfer_model_path = MODEL_PATH.joinpath("model_transfer.pt")
In [65]:
transfer_log = Tee(log_name="transfer_train.log")
In [ ]:
EPOCHS = 100
In [ ]:
# train the model
model_transfer = train(EPOCHS,
                       loaders=loaders_transfer,
                       model=model_transfer,
                       optimizer=optimizer_transfer,
                       criterion=criterion_transfer,
                       use_cuda=use_cuda,
                       save_path=transfer_model_path,
                       print_function=transfer_log,
                       is_inception=True)
Training Started: 2019-01-07 05:25:10.303990
Epoch: 1        Training Loss: 4.699307         Validation Loss: 4.270935       Elapsed: 0:03:18.031065
Validation loss decreased (inf --> 4.270935). Saving model ...
Epoch: 2        Training Loss: 4.181660         Validation Loss: 3.670290       Elapsed: 0:03:17.966246
Validation loss decreased (4.270935 --> 3.670290). Saving model ...
Epoch: 3        Training Loss: 3.735970         Validation Loss: 3.142542       Elapsed: 0:03:17.943660
Validation loss decreased (3.670290 --> 3.142542). Saving model ...
Epoch: 4        Training Loss: 3.343428         Validation Loss: 2.698115       Elapsed: 0:03:18.696943
Validation loss decreased (3.142542 --> 2.698115). Saving model ...
Epoch: 5        Training Loss: 2.995878         Validation Loss: 2.334530       Elapsed: 0:03:19.205373
Validation loss decreased (2.698115 --> 2.334530). Saving model ...
Epoch: 6        Training Loss: 2.723056         Validation Loss: 2.033339       Elapsed: 0:03:19.099028
Validation loss decreased (2.334530 --> 2.033339). Saving model ...
Epoch: 7        Training Loss: 2.518057         Validation Loss: 1.812573       Elapsed: 0:03:17.994237
Validation loss decreased (2.033339 --> 1.812573). Saving model ...
Epoch: 8        Training Loss: 2.310053         Validation Loss: 1.609529       Elapsed: 0:03:16.717152
Validation loss decreased (1.812573 --> 1.609529). Saving model ...
Epoch: 9        Training Loss: 2.166829         Validation Loss: 1.439860       Elapsed: 0:03:17.935079
Validation loss decreased (1.609529 --> 1.439860). Saving model ...
Epoch: 10       Training Loss: 2.057079         Validation Loss: 1.292030       Elapsed: 0:03:17.791206
Validation loss decreased (1.439860 --> 1.292030). Saving model ...
Epoch: 11       Training Loss: 1.958263         Validation Loss: 1.243316       Elapsed: 0:03:18.748263
Validation loss decreased (1.292030 --> 1.243316). Saving model ...
Epoch: 12       Training Loss: 1.859445         Validation Loss: 1.130529       Elapsed: 0:03:17.303672
Validation loss decreased (1.243316 --> 1.130529). Saving model ...
Epoch: 13       Training Loss: 1.799369         Validation Loss: 1.067557       Elapsed: 0:03:18.150230
Validation loss decreased (1.130529 --> 1.067557). Saving model ...
Epoch: 14       Training Loss: 1.723310         Validation Loss: 1.018531       Elapsed: 0:03:18.394798
Validation loss decreased (1.067557 --> 1.018531). Saving model ...
Epoch: 15       Training Loss: 1.688872         Validation Loss: 0.965496       Elapsed: 0:03:17.432118
Validation loss decreased (1.018531 --> 0.965496). Saving model ...
Epoch: 16       Training Loss: 1.639950         Validation Loss: 0.907270       Elapsed: 0:03:17.425620
Validation loss decreased (0.965496 --> 0.907270). Saving model ...
Epoch: 17       Training Loss: 1.576800         Validation Loss: 0.875295       Elapsed: 0:03:17.972938
Validation loss decreased (0.907270 --> 0.875295). Saving model ...
Epoch: 18       Training Loss: 1.547050         Validation Loss: 0.824278       Elapsed: 0:03:18.100030
Validation loss decreased (0.875295 --> 0.824278). Saving model ...
Epoch: 19       Training Loss: 1.539646         Validation Loss: 0.808194       Elapsed: 0:03:19.895761
Validation loss decreased (0.824278 --> 0.808194). Saving model ...
Epoch: 20       Training Loss: 1.500094         Validation Loss: 0.777300       Elapsed: 0:03:18.248607
Validation loss decreased (0.808194 --> 0.777300). Saving model ...
Epoch: 21       Training Loss: 1.478536         Validation Loss: 0.762025       Elapsed: 0:03:18.096901
Validation loss decreased (0.777300 --> 0.762025). Saving model ...
Epoch: 22       Training Loss: 1.449271         Validation Loss: 0.745259       Elapsed: 0:03:17.565620
Validation loss decreased (0.762025 --> 0.745259). Saving model ...
Epoch: 23       Training Loss: 1.426696         Validation Loss: 0.721501       Elapsed: 0:03:17.674511
Validation loss decreased (0.745259 --> 0.721501). Saving model ...
Epoch: 24       Training Loss: 1.384365         Validation Loss: 0.706536       Elapsed: 0:03:18.663604
Validation loss decreased (0.721501 --> 0.706536). Saving model ...
Epoch: 25       Training Loss: 1.352370         Validation Loss: 0.684035       Elapsed: 0:03:18.739320
Validation loss decreased (0.706536 --> 0.684035). Saving model ...
Epoch: 26       Training Loss: 1.382330         Validation Loss: 0.680882       Elapsed: 0:03:18.504176
Validation loss decreased (0.684035 --> 0.680882). Saving model ...
Epoch: 27       Training Loss: 1.352410         Validation Loss: 0.662414       Elapsed: 0:03:18.004690
Validation loss decreased (0.680882 --> 0.662414). Saving model ...
Epoch: 28       Training Loss: 1.323105         Validation Loss: 0.652469       Elapsed: 0:03:17.707236
Validation loss decreased (0.662414 --> 0.652469). Saving model ...
Epoch: 29       Training Loss: 1.321770         Validation Loss: 0.634052       Elapsed: 0:03:20.164878
Validation loss decreased (0.652469 --> 0.634052). Saving model ...
Epoch: 30       Training Loss: 1.309750         Validation Loss: 0.638077       Elapsed: 0:03:21.737296
Epoch: 31       Training Loss: 1.307307         Validation Loss: 0.615018       Elapsed: 0:03:18.198152
Validation loss decreased (0.634052 --> 0.615018). Saving model ...
Epoch: 32       Training Loss: 1.259097         Validation Loss: 0.618697       Elapsed: 0:03:19.649852
Epoch: 33       Training Loss: 1.276199         Validation Loss: 0.603413       Elapsed: 0:03:16.942841
Validation loss decreased (0.615018 --> 0.603413). Saving model ...
Epoch: 34       Training Loss: 1.258176         Validation Loss: 0.589237       Elapsed: 0:03:18.103221
Validation loss decreased (0.603413 --> 0.589237). Saving model ...
Epoch: 35       Training Loss: 1.254458         Validation Loss: 0.576390       Elapsed: 0:03:18.758651
Validation loss decreased (0.589237 --> 0.576390). Saving model ...
Epoch: 36       Training Loss: 1.246464         Validation Loss: 0.571317       Elapsed: 0:03:17.794329
Validation loss decreased (0.576390 --> 0.571317). Saving model ...
Epoch: 37       Training Loss: 1.227437         Validation Loss: 0.567114       Elapsed: 0:03:17.484424
Validation loss decreased (0.571317 --> 0.567114). Saving model ...
Epoch: 38       Training Loss: 1.228403         Validation Loss: 0.557364       Elapsed: 0:03:17.744637
Validation loss decreased (0.567114 --> 0.557364). Saving model ...
Epoch: 39       Training Loss: 1.213402         Validation Loss: 0.558201       Elapsed: 0:03:17.285552
Epoch: 40       Training Loss: 1.206945         Validation Loss: 0.557859       Elapsed: 0:03:18.132396
Epoch: 41       Training Loss: 1.193073         Validation Loss: 0.536087       Elapsed: 0:03:17.725738
Validation loss decreased (0.557364 --> 0.536087). Saving model ...
Epoch: 42       Training Loss: 1.194688         Validation Loss: 0.536722       Elapsed: 0:03:17.683174
Epoch: 43       Training Loss: 1.179069         Validation Loss: 0.533558       Elapsed: 0:03:18.412587
Validation loss decreased (0.536087 --> 0.533558). Saving model ...

The connection to the server died during the training (thank you, CenturyLink) so I'll try and read the log instead.

In [28]:
with transfer_log.path.open() as reader:
    for line in reader:
        print(line.rstrip())
Training Started: 2019-01-07 05:25:10.303990
Epoch: 1        Training Loss: 4.699307         Validation Loss: 4.270935       Elapsed: 0:03:18.031065
Validation loss decreased (inf --> 4.270935). Saving model ...
Epoch: 2        Training Loss: 4.181660         Validation Loss: 3.670290       Elapsed: 0:03:17.966246
Validation loss decreased (4.270935 --> 3.670290). Saving model ...
Epoch: 3        Training Loss: 3.735970         Validation Loss: 3.142542       Elapsed: 0:03:17.943660
Validation loss decreased (3.670290 --> 3.142542). Saving model ...
Epoch: 4        Training Loss: 3.343428         Validation Loss: 2.698115       Elapsed: 0:03:18.696943
Validation loss decreased (3.142542 --> 2.698115). Saving model ...
Epoch: 5        Training Loss: 2.995878         Validation Loss: 2.334530       Elapsed: 0:03:19.205373
Validation loss decreased (2.698115 --> 2.334530). Saving model ...
Epoch: 6        Training Loss: 2.723056         Validation Loss: 2.033339       Elapsed: 0:03:19.099028
Validation loss decreased (2.334530 --> 2.033339). Saving model ...
Epoch: 7        Training Loss: 2.518057         Validation Loss: 1.812573       Elapsed: 0:03:17.994237
Validation loss decreased (2.033339 --> 1.812573). Saving model ...
Epoch: 8        Training Loss: 2.310053         Validation Loss: 1.609529       Elapsed: 0:03:16.717152
Validation loss decreased (1.812573 --> 1.609529). Saving model ...
Epoch: 9        Training Loss: 2.166829         Validation Loss: 1.439860       Elapsed: 0:03:17.935079
Validation loss decreased (1.609529 --> 1.439860). Saving model ...
Epoch: 10       Training Loss: 2.057079         Validation Loss: 1.292030       Elapsed: 0:03:17.791206
Validation loss decreased (1.439860 --> 1.292030). Saving model ...
Epoch: 11       Training Loss: 1.958263         Validation Loss: 1.243316       Elapsed: 0:03:18.748263
Validation loss decreased (1.292030 --> 1.243316). Saving model ...
Epoch: 12       Training Loss: 1.859445         Validation Loss: 1.130529       Elapsed: 0:03:17.303672
Validation loss decreased (1.243316 --> 1.130529). Saving model ...
Epoch: 13       Training Loss: 1.799369         Validation Loss: 1.067557       Elapsed: 0:03:18.150230
Validation loss decreased (1.130529 --> 1.067557). Saving model ...
Epoch: 14       Training Loss: 1.723310         Validation Loss: 1.018531       Elapsed: 0:03:18.394798
Validation loss decreased (1.067557 --> 1.018531). Saving model ...
Epoch: 15       Training Loss: 1.688872         Validation Loss: 0.965496       Elapsed: 0:03:17.432118
Validation loss decreased (1.018531 --> 0.965496). Saving model ...
Epoch: 16       Training Loss: 1.639950         Validation Loss: 0.907270       Elapsed: 0:03:17.425620
Validation loss decreased (0.965496 --> 0.907270). Saving model ...
Epoch: 17       Training Loss: 1.576800         Validation Loss: 0.875295       Elapsed: 0:03:17.972938
Validation loss decreased (0.907270 --> 0.875295). Saving model ...
Epoch: 18       Training Loss: 1.547050         Validation Loss: 0.824278       Elapsed: 0:03:18.100030
Validation loss decreased (0.875295 --> 0.824278). Saving model ...
Epoch: 19       Training Loss: 1.539646         Validation Loss: 0.808194       Elapsed: 0:03:19.895761
Validation loss decreased (0.824278 --> 0.808194). Saving model ...
Epoch: 20       Training Loss: 1.500094         Validation Loss: 0.777300       Elapsed: 0:03:18.248607
Validation loss decreased (0.808194 --> 0.777300). Saving model ...
Epoch: 21       Training Loss: 1.478536         Validation Loss: 0.762025       Elapsed: 0:03:18.096901
Validation loss decreased (0.777300 --> 0.762025). Saving model ...
Epoch: 22       Training Loss: 1.449271         Validation Loss: 0.745259       Elapsed: 0:03:17.565620
Validation loss decreased (0.762025 --> 0.745259). Saving model ...
Epoch: 23       Training Loss: 1.426696         Validation Loss: 0.721501       Elapsed: 0:03:17.674511
Validation loss decreased (0.745259 --> 0.721501). Saving model ...
Epoch: 24       Training Loss: 1.384365         Validation Loss: 0.706536       Elapsed: 0:03:18.663604
Validation loss decreased (0.721501 --> 0.706536). Saving model ...
Epoch: 25       Training Loss: 1.352370         Validation Loss: 0.684035       Elapsed: 0:03:18.739320
Validation loss decreased (0.706536 --> 0.684035). Saving model ...
Epoch: 26       Training Loss: 1.382330         Validation Loss: 0.680882       Elapsed: 0:03:18.504176
Validation loss decreased (0.684035 --> 0.680882). Saving model ...
Epoch: 27       Training Loss: 1.352410         Validation Loss: 0.662414       Elapsed: 0:03:18.004690
Validation loss decreased (0.680882 --> 0.662414). Saving model ...
Epoch: 28       Training Loss: 1.323105         Validation Loss: 0.652469       Elapsed: 0:03:17.707236
Validation loss decreased (0.662414 --> 0.652469). Saving model ...
Epoch: 29       Training Loss: 1.321770         Validation Loss: 0.634052       Elapsed: 0:03:20.164878
Validation loss decreased (0.652469 --> 0.634052). Saving model ...
Epoch: 30       Training Loss: 1.309750         Validation Loss: 0.638077       Elapsed: 0:03:21.737296
Epoch: 31       Training Loss: 1.307307         Validation Loss: 0.615018       Elapsed: 0:03:18.198152
Validation loss decreased (0.634052 --> 0.615018). Saving model ...
Epoch: 32       Training Loss: 1.259097         Validation Loss: 0.618697       Elapsed: 0:03:19.649852
Epoch: 33       Training Loss: 1.276199         Validation Loss: 0.603413       Elapsed: 0:03:16.942841
Validation loss decreased (0.615018 --> 0.603413). Saving model ...
Epoch: 34       Training Loss: 1.258176         Validation Loss: 0.589237       Elapsed: 0:03:18.103221
Validation loss decreased (0.603413 --> 0.589237). Saving model ...
Epoch: 35       Training Loss: 1.254458         Validation Loss: 0.576390       Elapsed: 0:03:18.758651
Validation loss decreased (0.589237 --> 0.576390). Saving model ...
Epoch: 36       Training Loss: 1.246464         Validation Loss: 0.571317       Elapsed: 0:03:17.794329
Validation loss decreased (0.576390 --> 0.571317). Saving model ...
Epoch: 37       Training Loss: 1.227437         Validation Loss: 0.567114       Elapsed: 0:03:17.484424
Validation loss decreased (0.571317 --> 0.567114). Saving model ...
Epoch: 38       Training Loss: 1.228403         Validation Loss: 0.557364       Elapsed: 0:03:17.744637
Validation loss decreased (0.567114 --> 0.557364). Saving model ...
Epoch: 39       Training Loss: 1.213402         Validation Loss: 0.558201       Elapsed: 0:03:17.285552
Epoch: 40       Training Loss: 1.206945         Validation Loss: 0.557859       Elapsed: 0:03:18.132396
Epoch: 41       Training Loss: 1.193073         Validation Loss: 0.536087       Elapsed: 0:03:17.725738
Validation loss decreased (0.557364 --> 0.536087). Saving model ...
Epoch: 42       Training Loss: 1.194688         Validation Loss: 0.536722       Elapsed: 0:03:17.683174
Epoch: 43       Training Loss: 1.179069         Validation Loss: 0.533558       Elapsed: 0:03:18.412587
Validation loss decreased (0.536087 --> 0.533558). Saving model ...
Epoch: 44       Training Loss: 1.173093         Validation Loss: 0.521101       Elapsed: 0:03:17.631464
Validation loss decreased (0.533558 --> 0.521101). Saving model ...
Epoch: 45       Training Loss: 1.153653         Validation Loss: 0.527879       Elapsed: 0:03:17.595422
Epoch: 46       Training Loss: 1.158538         Validation Loss: 0.535613       Elapsed: 0:03:18.427818
Epoch: 47       Training Loss: 1.174377         Validation Loss: 0.528422       Elapsed: 0:03:17.892116
Epoch: 48       Training Loss: 1.164288         Validation Loss: 0.507026       Elapsed: 0:03:17.780444
Validation loss decreased (0.521101 --> 0.507026). Saving model ...
Epoch: 49       Training Loss: 1.161782         Validation Loss: 0.503888       Elapsed: 0:03:17.422116
Validation loss decreased (0.507026 --> 0.503888). Saving model ...
Epoch: 50       Training Loss: 1.163059         Validation Loss: 0.500597       Elapsed: 0:03:17.825155
Validation loss decreased (0.503888 --> 0.500597). Saving model ...
Epoch: 51       Training Loss: 1.154003         Validation Loss: 0.509676       Elapsed: 0:03:17.683708
Epoch: 52       Training Loss: 1.122364         Validation Loss: 0.500437       Elapsed: 0:03:16.342809
Validation loss decreased (0.500597 --> 0.500437). Saving model ...
Epoch: 53       Training Loss: 1.118776         Validation Loss: 0.502778       Elapsed: 0:03:17.775326
Epoch: 54       Training Loss: 1.137227         Validation Loss: 0.489028       Elapsed: 0:03:16.730713
Validation loss decreased (0.500437 --> 0.489028). Saving model ...
Epoch: 55       Training Loss: 1.112989         Validation Loss: 0.490746       Elapsed: 0:03:17.194025
Epoch: 56       Training Loss: 1.112278         Validation Loss: 0.491313       Elapsed: 0:03:18.037435
Epoch: 57       Training Loss: 1.105172         Validation Loss: 0.488087       Elapsed: 0:03:17.750197
Validation loss decreased (0.489028 --> 0.488087). Saving model ...
Epoch: 58       Training Loss: 1.106263         Validation Loss: 0.477318       Elapsed: 0:03:17.918800
Validation loss decreased (0.488087 --> 0.477318). Saving model ...
Epoch: 59       Training Loss: 1.110798         Validation Loss: 0.484890       Elapsed: 0:03:17.959631
Epoch: 60       Training Loss: 1.102846         Validation Loss: 0.475269       Elapsed: 0:03:17.318802
Validation loss decreased (0.477318 --> 0.475269). Saving model ...
Epoch: 61       Training Loss: 1.107576         Validation Loss: 0.470764       Elapsed: 0:03:17.191263
Validation loss decreased (0.475269 --> 0.470764). Saving model ...
Epoch: 62       Training Loss: 1.079003         Validation Loss: 0.469544       Elapsed: 0:03:17.907726
Validation loss decreased (0.470764 --> 0.469544). Saving model ...
Epoch: 63       Training Loss: 1.085582         Validation Loss: 0.473371       Elapsed: 0:03:17.590775
Epoch: 64       Training Loss: 1.097795         Validation Loss: 0.466651       Elapsed: 0:03:16.782743
Validation loss decreased (0.469544 --> 0.466651). Saving model ...
Epoch: 65       Training Loss: 1.087516         Validation Loss: 0.466158       Elapsed: 0:03:18.581609
Validation loss decreased (0.466651 --> 0.466158). Saving model ...
Epoch: 66       Training Loss: 1.041934         Validation Loss: 0.469748       Elapsed: 0:03:17.901108
Epoch: 67       Training Loss: 1.075575         Validation Loss: 0.454066       Elapsed: 0:03:17.029518
Validation loss decreased (0.466158 --> 0.454066). Saving model ...
Epoch: 68       Training Loss: 1.074739         Validation Loss: 0.474331       Elapsed: 0:03:18.015337
Epoch: 69       Training Loss: 1.052330         Validation Loss: 0.461796       Elapsed: 0:03:17.474546
Epoch: 70       Training Loss: 1.074078         Validation Loss: 0.457424       Elapsed: 0:03:16.963451
Epoch: 71       Training Loss: 1.032617         Validation Loss: 0.449744       Elapsed: 0:03:17.340017
Validation loss decreased (0.454066 --> 0.449744). Saving model ...
Epoch: 72       Training Loss: 1.054414         Validation Loss: 0.454565       Elapsed: 0:03:17.676010
Epoch: 73       Training Loss: 1.044849         Validation Loss: 0.453206       Elapsed: 0:03:17.600106
Epoch: 74       Training Loss: 1.035498         Validation Loss: 0.458112       Elapsed: 0:03:17.464877
Epoch: 75       Training Loss: 1.047880         Validation Loss: 0.459989       Elapsed: 0:03:17.049121
Epoch: 76       Training Loss: 1.034578         Validation Loss: 0.446105       Elapsed: 0:03:18.764851
Validation loss decreased (0.449744 --> 0.446105). Saving model ...
Epoch: 77       Training Loss: 1.032169         Validation Loss: 0.439367       Elapsed: 0:03:18.741754
Validation loss decreased (0.446105 --> 0.439367). Saving model ...
Epoch: 78       Training Loss: 1.048666         Validation Loss: 0.448395       Elapsed: 0:03:17.824941
Epoch: 79       Training Loss: 1.040212         Validation Loss: 0.440193       Elapsed: 0:03:18.251639
Epoch: 80       Training Loss: 1.032011         Validation Loss: 0.441098       Elapsed: 0:03:17.759952
Epoch: 81       Training Loss: 1.038431         Validation Loss: 0.434215       Elapsed: 0:03:16.541620
Validation loss decreased (0.439367 --> 0.434215). Saving model ...
Epoch: 82       Training Loss: 1.039337         Validation Loss: 0.442144       Elapsed: 0:03:17.911105
Epoch: 83       Training Loss: 1.032783         Validation Loss: 0.438590       Elapsed: 0:03:17.591553
Epoch: 84       Training Loss: 1.034323         Validation Loss: 0.441891       Elapsed: 0:03:17.387050
Epoch: 85       Training Loss: 1.055545         Validation Loss: 0.434267       Elapsed: 0:03:17.262275
Epoch: 86       Training Loss: 0.996985         Validation Loss: 0.432956       Elapsed: 0:03:17.287156
Validation loss decreased (0.434215 --> 0.432956). Saving model ...
Epoch: 87       Training Loss: 1.025106         Validation Loss: 0.433783       Elapsed: 0:03:17.746683
Epoch: 88       Training Loss: 1.003464         Validation Loss: 0.436888       Elapsed: 0:03:17.344770
Epoch: 89       Training Loss: 1.021132         Validation Loss: 0.432445       Elapsed: 0:03:18.347353
Validation loss decreased (0.432956 --> 0.432445). Saving model ...
Epoch: 90       Training Loss: 1.025346         Validation Loss: 0.428862       Elapsed: 0:03:18.518516
Validation loss decreased (0.432445 --> 0.428862). Saving model ...
Epoch: 91       Training Loss: 1.039084         Validation Loss: 0.418361       Elapsed: 0:03:18.556944
Validation loss decreased (0.428862 --> 0.418361). Saving model ...
Epoch: 92       Training Loss: 1.009550         Validation Loss: 0.424567       Elapsed: 0:03:17.763665
Epoch: 93       Training Loss: 1.002043         Validation Loss: 0.430174       Elapsed: 0:03:17.460125
Epoch: 94       Training Loss: 0.995485         Validation Loss: 0.417896       Elapsed: 0:03:18.836221
Validation loss decreased (0.418361 --> 0.417896). Saving model ...
Epoch: 95       Training Loss: 0.969755         Validation Loss: 0.419555       Elapsed: 0:03:11.488185
Epoch: 96       Training Loss: 0.987362         Validation Loss: 0.421185       Elapsed: 0:03:10.406026
Epoch: 97       Training Loss: 0.980267         Validation Loss: 0.417785       Elapsed: 0:03:10.542342
Validation loss decreased (0.417896 --> 0.417785). Saving model ...
Epoch: 98       Training Loss: 0.973978         Validation Loss: 0.416819       Elapsed: 0:03:12.167687
Validation loss decreased (0.417785 --> 0.416819). Saving model ...
Epoch: 99       Training Loss: 0.994163         Validation Loss: 0.418498       Elapsed: 0:03:17.225706
Epoch: 100      Training Loss: 0.998819         Validation Loss: 0.423518       Elapsed: 0:03:18.415953
Training Ended: 2019-01-07 10:55:04.465024
Total Training Time: 5:29:54.161034
In [25]:
# load the model that got the best validation accuracy (uncomment the line below)
model_transfer.load_state_dict(torch.load(transfer_model_path))
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-25-bac3efba0fcd> in <module>
      1 # load the model that got the best validation accuracy (uncomment the line below)
----> 2 model_transfer.load_state_dict(torch.load(transfer_model_path))

~/.virtualenvs/neural_networks/lib/python3.6/site-packages/torch/nn/modules/module.py in load_state_dict(self, state_dict, strict)
    717         if len(error_msgs) > 0:
    718             raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
--> 719                                self.__class__.__name__, "\n\t".join(error_msgs)))
    720 
    721     def parameters(self):

RuntimeError: Error(s) in loading state_dict for Inception3:
        size mismatch for fc.weight: copying a param of torch.Size([1000, 2048]) from checkpoint, where the shape is torch.Size([133, 2048]) in current model.
        size mismatch for fc.bias: copying a param of torch.Size([1000]) from checkpoint, where the shape is torch.Size([133]) in current model.

(IMPLEMENTATION) Test the Model

Try out your model on the test dataset of dog images. Use the code cell below to calculate and print the test loss and accuracy. Ensure that your test accuracy is greater than 60%.

In [46]:
transfer_test_log = Tee("transfer_test.log")
In [51]:
test(loaders_transfer, model_transfer, criterion_transfer, use_cuda, print_function=transfer_test_log)
Test Loss: 0.425383


Test Accuracy: 87% (734/836)

(IMPLEMENTATION) Predict Dog Breed with the Model

Write a function that takes an image path as input and returns the dog breed (Affenpinscher, Afghan hound, etc) that is predicted by your model.

In [32]:
class_names = [item[4:].replace("_", " ") for item in training.classes]

def predict_breed_transfer(img_path: str) -> str:
    """Predicts the dog-breed of what's in the image

    Args:
     img_path: path to the image to search

    Returns:
     the name of the dog-breed
    """
    # load the image
    image = Image.open(image_path)

    # convert the image to a tensor
    tensor = test_transform(image)

    # add a batch number
    tensor = tensor.unsqueeze_(0)

    # put on the GPU or CPU
    tensor = tensor.to(device)

    # make it a variable
    x = torch.autograd.Variable(tensor)

    # make the prediction
    output = model(x)
    return class_names[output.data.cpu().numpy().argmax()]

Step 5: Write your Algorithm

Write an algorithm that accepts a file path to an image and first determines whether the image contains a human, dog, or neither. Then,

  • if a dog is detected in the image, return the predicted breed.
  • if a human is detected in the image, return the resembling dog breed.
  • if neither is detected in the image, provide output that indicates an error.

You are welcome to write your own functions for detecting humans and dogs in images, but feel free to use the face_detector and human_detector functions developed above. You are required to use your CNN from Step 4 to predict dog breed.

Some sample output for our algorithm is provided below, but feel free to design your own user experience!

Sample Human Output

(IMPLEMENTATION) Write your Algorithm

Re-Done Code

I originally wrote my implementation using classes, because I kept getting errors related to the fact that jupyter lets you run cells out of order so I wanted them defined as a group (and because I find it easier to work this way once there is this much code). So I broke the parts up to answer the questions but am including them in this section to make my final solution work. Everything until the Dog Breed Classifier section was already implemented in the sections above using functions and global variables instead of class methods, only the Dog Breed Classification section and below has new implementations.

In [53]:
class Transformer:
    """Builds the image transformers

    Args:
     means: list of means for each channel
     deviations: list of standard deviations for each channel
     image_size: size to crop the image to
    """
    def __init__(self,
                 means: list=MEANS,
                 deviations: list=DEVIATIONS,
                 image_size: int=INCEPTION_IMAGE_SIZE) -> None:
        self.means = means
        self.deviations = deviations
        self.image_size = image_size
        self._training = None
        self._testing = None
        return

    @property
    def training(self) -> transforms.Compose:
        """The image transformers for the training"""
        if self._training is None:
            self._training = transforms.Compose([
                transforms.RandomRotation(30),
                transforms.RandomResizedCrop(self.image_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(self.means,
                                     self.deviations)])
        return self._training

    @property
    def testing(self) -> transforms.Compose:
        """Image transforms for the testing"""
        if self._testing is None:
            self._testing = transforms.Compose(
                [transforms.Resize(self.image_size),
                 transforms.CenterCrop(INCEPTION_IMAGE_SIZE),
                 transforms.ToTensor(),
                 transforms.Normalize(self.means,
                                      self.deviations)])
        return self._testing
In [54]:
class DogDetector:
    """Detects dogs

    Args:
     model_definition: definition for the model
     device: where to run the model (CPU or CUDA)
     image_size: what to resize the file to (depends on the model-definition)
     means: mean for each channel
     deviations: standard deviation for each channel
     dog_lower_bound: index below where dogs start
     dog_upper_bound: index above where dogs end
    """
    def __init__(self,
                 model_definition: nn.Module=models.inception_v3,
                 image_size: int=INCEPTION_IMAGE_SIZE,
                 means: list=MEANS,
                 deviations: list=DEVIATIONS,
                 dog_lower_bound: int=DOG_LOWER,
                 dog_upper_bound: int=DOG_UPPER,
                 device: torch.device=None) -> None:
        self.model_definition = model_definition
        self.image_size = image_size
        self.means = means
        self.deviations = deviations
        self.dog_lower_bound = dog_lower_bound
        self.dog_upper_bound = dog_upper_bound
        self._device = device
        self._model = None
        self._transformer = None
        return

    @property
    def device(self) -> torch.device:
        """The device to add the model to"""
        if self._device is None:
            self._device = torch.device("cuda"
                                        if torch.cuda.is_available()
                                        else "cpu")
        return self._device

    @property
    def model(self) -> nn.Module:
        """Build the model"""
        if self._model is None:
            self._model = self.model_definition(pretrained=True)
            self._model.to(self.device)
            self._model.eval()
        return self._model

    @property
    def transformer(self) -> Transformer:
        """The transformer for the image data"""
        if self._transformer is None:
            self._transformer = Transformer()
        return self._transformer

    def __call__(self, image_path: str) -> bool:
        """Checks if there is a dog in the image"""
        image = Image.open(str(image_path))
        image = self.transformer.testing(image).unsqueeze(0).to(self.device)
        output = self.model(image)
        probabilities = torch.exp(output)
        _, top_class = probabilities.topk(1, dim=1)
        return self.dog_lower_bound < top_class.item() < self.dog_upper_bound
In [55]:
class SpeciesDetector:
    """Detect dogs and humans

    Args:
     device: where to put the dog-detecting model
    """
    def __init__(self, device: torch.device=None) -> None:
        self.device = device
        self._dog_detector = None
        return

    @property
    def dog_detector(self) -> DogDetector:
        """Neural Network dog-detector"""
        if self._dog_detector is None:
            self._dog_detector = DogDetector(device=self.device)
        return self._dog_detector

    def is_human(self, image_path: str) -> bool:
        """Checks if the image is a human
        
        Args:
         image_path: path to the image

        Returns:
         True if there is a human face in the image
        """
        image = face_recognition.load_image_file(str(image_path))
        faces = face_recognition.face_locations(image)
        return len(faces) > 0

    def is_dog(self, image_path: str) -> bool:        
        """Checks if there is a dog in the image"""
        return self.dog_detector(image_path)
In [56]:
class DogPaths:
    """holds the paths to the dog images"""
    def __init__(self) -> None:
        self._main = None
        self._training = None
        self._testing = None
        self._validation = None
        return

    @property
    def main(self) -> Path:
        """The path to the main folder"""
        if self._main is None:
            self._main = DOG_PATH
        return self._main

    @property
    def training(self) -> Path:
        """Path to the training images"""
        if self._training is None:
            self._training = DOG_PATH.joinpath("train")
        return self._training

    @property
    def validation(self) -> Path:
        """Path to the validation images"""
        if self._validation is None:
            self._validation = DOG_PATH.joinpath("valid")
        return self._validation

    @property
    def testing(self) -> Path:
        """Path to the testing images"""
        if self._testing is None:
            self._testing = DOG_PATH.joinpath("test")
        return self._testing
In [57]:
class Inception:
    """Sets up the model, criterion, and optimizer for the transfer learning

    Args:
     classes: number of outputs for the final layer
     device: processor to use
     model_path: path to a saved model
     learning_rate: learning rate for the optimizer
     momentum: momentum for the optimizer
    """
    def __init__(self, classes: int,
                 device: torch.device=None,
                 model_path: str=None,
                 learning_rate: float=0.001, momentum: float=0.9) -> None:
        self.classes = classes
        self.model_path = model_path
        self.learning_rate = learning_rate
        self.momentum = momentum
        self._device = device
        self._model = None
        self._classifier_inputs = None
        self._criterion = None
        self._optimizer = None
        return

    @property
    def device(self) -> torch.device:
        """Processor to use (cpu or cuda)"""
        if self._device is None:
            self._device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu")
        return self._device

    @property
    def model(self) -> models.inception_v3:
        """The inception model"""
        if self._model is None:
            self._model = models.inception_v3(pretrained=True)
            for parameter in self._model.parameters():
                parameter.requires_grad = False
            classifier_inputs = self._model.fc.in_features
            self._model.fc = nn.Linear(in_features=classifier_inputs,
                                       out_features=self.classes,
                                       bias=True)
            self._model.to(self.device)
            if self.model_path:
                self._model.load_state_dict(torch.load(self.model_path))
        return self._model

    @property
    def criterion(self) -> nn.CrossEntropyLoss:
        """The loss callable"""
        if self._criterion is None:
            self._criterion = nn.CrossEntropyLoss()
        return self._criterion

    @property
    def optimizer(self) -> optimizer.SGD:
        """The Gradient Descent object"""
        if self._optimizer is None:
            self._optimizer = optimizer.SGD(
                self.model.parameters(),
                lr=self.learning_rate,
                momentum=self.momentum)
        return self._optimizer
In [58]:
class DataSets:
    """Builds the data-sets

    Args:
     paths: object with the paths to the data-sets
    """
    def __init__(self, paths: DogPaths=None, transformer: Transformer=None) -> None:
        self._paths = paths
        self._transformer = transformer
        self._training = None
        self._validation = None
        self._testing = None
        return

    @property
    def paths(self) -> DogPaths:
        """Object with the paths to the image files"""
        if self._paths is None:
            self._paths = DogPaths()
        return self._paths

    @property
    def transformer(self) -> Transformer:
        """Object with the image transforms"""
        if self._transformer is None:
            self._transformer = Transformer()
        return self._transformer

    @property
    def training(self) -> datasets.ImageFolder:
        """The training data set"""
        if self._training is None:
            self._training = datasets.ImageFolder(
                root=self.paths.training,
                transform=self.transformer.training)
        return self._training

    @property
    def validation(self) -> datasets.ImageFolder:
        """The validation dataset"""
        if self._validation is None:
            self._validation = datasets.ImageFolder(
                root=self.paths.validation,
                transform=self.transformer.testing)
        return self._validation

    @property
    def testing(self) -> datasets.ImageFolder:
        """The test set"""
        if self._testing is None:
            self._testing = datasets.ImageFolder(
                root=self.paths.testing,
                transform=self.transformer.testing)
        return self._testing
In [59]:
class DogPredictor:
    """Makes dog-breed predictions
    
    Args:
     model_path: path to the model's state-dict
     device: processor to run the model on
     data_sets: a DataSets object
     inception: an Inception object
    """
    def __init__(self, model_path: str=None,
                 device: torch.device=None,
                 data_sets: DataSets=None,
                 inception: Inception=None) -> None:
        self.model_path = model_path
        self.device = device
        self._data_sets = data_sets
        self._inception = inception
        self._breeds = None
        return

    @property
    def data_sets(self) -> DataSets:
        if self._data_sets is None:
            self._data_sets = DataSets()
        return self._data_sets

    @property
    def inception(self) -> Inception:
        """An Inception object"""
        if self._inception is None:
            self._inception = Inception(
                classes=len(self.data_sets.training.classes),
                model_path=self.model_path,
                device=self.device)
            self._inception.model.eval()
        return self._inception

    @property
    def breeds(self) -> list:
        """A list of dog-breeds"""
        if self._breeds is None:
            self._breeds = [name[4:].replace("_", " ")
                            for name in self.data_sets.training.classes]
        return self._breeds

    def predict_index(self, image_path:str) -> int:
        """Predicts the index of the breed of the dog in the image

        Args:
         image_path: path to the image
        Returns:
         index in the breeds list for the image
        """
        model = self.inception.model        
        image = Image.open(image_path)
        tensor = self.data_sets.transformer.testing(image)
        # add a batch number
        tensor = tensor.unsqueeze_(0)
        tensor = tensor.to(self.inception.device)
        x = torch.autograd.Variable(tensor)
        output = model(x)
        return output.data.cpu().numpy().argmax()

    def __call__(self, image_path) -> str:
        """Predicts the breed of the dog in the image

        Args:
         image_path: path to the image
        Returns:
         name of the breed
        """
        return self.breeds[self.predict_index(image_path)]

The Dog Breed Classifier

This implements the dog-breed classifier using the classes immediately above.

In [60]:
class DogBreedClassifier:
    """Tries To predict the dog-breed for an image

    Args:
     model_path: path to the inception-model
    """
    def __init__(self, model_path: str) -> None:
        self.model_path = model_path
        self._breed_predictor = None
        self._species_detector = None
        return

    @property
    def breed_predictor(self) -> DogPredictor:
        """Predictor of dog-breeds"""
        if self._breed_predictor is None:
            self._breed_predictor = DogPredictor(model_path=self.model_path)
        return self._breed_predictor

    @property
    def species_detector(self) -> SpeciesDetector:
        """Detector of humans and dogs"""
        if self._species_detector is None:
            self._species_detector = SpeciesDetector(
                device=self.breed_predictor.inception.device)
        return self._species_detector

    def render(self, image_path: str, species: str, breed: str) -> None:
        """Renders the image

        Args:
         image_path: path to the image to render
         species: identified species
         breed: identified breed
        """
        name = " ".join(image_path.name.split(".")[0].split("_")).title()
        figure, axe = pyplot.subplots()
        figure.suptitle("{} ({})".format(species, name), weight="bold")
        axe.set_xlabel("Looks like a {}.".format(breed))
        image = Image.open(image_path)
        axe.tick_params(dict(axis="both",
                             which="both",
                             bottom=False,
                             top=False))
        axe.get_xaxis().set_ticks([])
        axe.get_yaxis().set_ticks([])
        axe_image = axe.imshow(image)
        return

    def __call__(self, image_path:str) -> None:
        """detects the dog-breed and displays the image

        Args:
         image_path: path to the image
        """
        image_path = Path(image_path)
        is_dog = self.species_detector.is_dog(image_path)
        is_human = self.species_detector.is_human(image_path)

        if not is_dog and not is_human:
            species = "Error: Neither Human nor Dog"
            breed = "?"
        else:
            breed = self.breed_predictor(image_path)

        if is_dog and is_human:
            species = "Human-Dog Hybrid"
        elif is_dog:
            species = "Dog"
        elif is_human:
            species = "Human"
        self.render(image_path, species, breed)
        return

The next cell transfers the existing models to the CPU to free up memory on the GPU, since the class-based version builds them anyway.

In [67]:
for model in MODELS:
    model.cpu()
classifier = DogBreedClassifier(model_path=transfer_model_path)
In [68]:
def run_app(img_path):
    """Runs the dog breed classifier

    Args:
     img_path: path to the image to classify
    """
    classifier(img_path)
    return

Step 6: Test Your Algorithm

In this section, you will take your new algorithm for a spin! What kind of dog does the algorithm think that you look like? If you have a dog, does it predict your dog's breed accurately? If you have a cat, does it mistakenly think that your cat is a dog?

(IMPLEMENTATION) Test Your Algorithm on Sample Images!

Test your algorithm at least six images on your computer. Feel free to use any images you like. Use at least two human and two dog images.

First, I'll create a function to find species detections that were wrong.

In [12]:
def first_prediction(source: list, start:int=0, count: int=1) -> int:
    """Gets the index of the first True prediction

    Args:
     source: list of True/False predictions
     start: index to start the search from
     count: number of indices to find

    Returns:
     indices of first True predictions found
    """
    indices = []
    found = 0
    for index, prediction in enumerate(source[start:]):
        if prediction:
            print("{}: {}".format(start + index, prediction))
            indices.append(index)
            found += 1
            if found == count:
                break
    return indices
In [37]:
human_dog = first_prediction(dlib_false_positives)
0: True
In [38]:
hot_dog = "hot_dog.jpg"
rabbit = "rabbit.jpg"
test_images = [dog_files_short[human_dog[0]], hot_dog, rabbit]
In [39]:
dogs = numpy.random.choice(dog_files, 3)
humans = numpy.random.choice(human_files, 3)
In [71]:
images = numpy.hstack((dogs, humans, test_images))
for image in images:
    run_app(image)