NER: Training the Model

Training the Model

Imports

# from python
from collections import namedtuple
from functools import partial
from tempfile import TemporaryFile

import random
import sys

# from pypi
from holoviews import opts
from trax import layers
from trax.supervised import training

import holoviews
import hvplot.pandas
import pandas
import trax

# this project
from neurotic.nlp.named_entity_recognition import (DataGenerator,
                                                   NER,
                                                   NERData,
                                                   TOKEN)
# another project
from graeae import EmbedHoloviews, Timer

Set Up

Plotting

slug = "ner-training-the-model"
Embed = partial(EmbedHoloviews, folder_path=f"files/posts/nlp/{slug}")

Plot = namedtuple("Plot", ["width", "height", "fontscale", "tan", "blue", "red"])
PLOT = Plot(
    width=900,
    height=750,
    fontscale=2,
    tan="#ddb377",
    blue="#4687b7",
    red="#ce7b6d",
 )

Data

ner = NERData()

Settings = namedtuple("Settings", ["seed", "batch_size", "embedding_size", "learning_rate"])
SETTINGS = Settings(seed=33, batch_size=64, embedding_size=50, learning_rate=0.01)
trainee = NER(vocabulary_size=len(ner.data.vocabulary),
              tag_count=len(ner.data.tags))
random.seed(SETTINGS.seed)

training_generator = DataGenerator(x=ner.data.data_sets.x_train,
                                   y=ner.data.data_sets.y_train,
                                   batch_size=SETTINGS.batch_size,
                                   padding=ner.data.vocabulary[TOKEN.pad])

validation_generator = DataGenerator(x=ner.data.data_sets.x_validate,
                                     y=ner.data.data_sets.y_validate,
                                     batch_size=SETTINGS.batch_size,
                                     padding=ner.data.vocabulary[TOKEN.pad])

TIMER = Timer(speak=False)

Middle

The Data Generators

Before we start, we need to create the data generators for training and validation data. It is important that you mask padding in the loss weights of your data, which can be done using the id_to_mask argument of trax.supervised.inputs.add_loss_weights.

train_generator = trax.data.inputs.add_loss_weights(
    training_generator,
    id_to_mask=ner.data.vocabulary[TOKEN.pad])

evaluate_generator = trax.data.inputs.add_loss_weights(
    validation_generator,
    id_to_mask=ner.data.vocabulary[TOKEN.pad])

Training The Model

You will now write a function that takes in your model and trains it.

As you've seen in the previous assignments, you will first create the TrainTask and EvalTask using your data generator. Then you will use the training.Loop to train your model.

Instructions: Implement the train_model program below to train the neural network above. Here is a list of things you should do:

You'll be using a cross entropy loss, with an Adam optimizer. Please read the trax documentation to get a full understanding. The trax GitHub also contains some useful information and a link to a colab notebook.

def train_model(NER: trax.layers.Serial,
                train_generator: type,
                eval_generator: type,
                train_steps: int=1,
                steps_per_checkpoint: int=100,
                learning_rate: float=SETTINGS.learning_rate,
                verbose: bool=False,
                output_dir="~/models/ner/") -> training.Loop:
    """Train the Named Entity Recognition Model
    Args: 
      NER: the model you are building
      train_generator: The data generator for training examples
      eval_generator: The data generator for validation examples,
      train_steps: number of training steps
      output_dir: folder to save your model

    Returns:
      training_loop: a trax supervised training Loop
    """
    train_task = training.TrainTask(
        labeled_data=train_generator,
        loss_layer = layers.WeightedCategoryCrossEntropy(),
        optimizer = trax.optimizers.Adam(learning_rate),
        n_steps_per_checkpoint=steps_per_checkpoint,
    )

    eval_task = training.EvalTask(
      labeled_data = eval_generator,
      metrics = [layers.WeightedCategoryCrossEntropy(),
                 layers.Accuracy()],
      n_eval_batches = SETTINGS.batch_size
    )

    training_loop = training.Loop(
        NER,
        train_task,
        eval_tasks=[eval_task],
        output_dir=output_dir)

    if verbose:
        print(f"Running {train_steps} steps")
    training_loop.run(n_steps = train_steps)
    return training_loop

For some reason they don't give you the option to turn off the print statements so I'm going to suppress all stdout.

training_steps = 1500
real_stdout = sys.stdout

TIMER.emit = False
TIMER.start()
with TemporaryFile("w") as temp_file:
    sys.stdout = temp_file
    training_loop = train_model(trainee.model, train_generator,
                                evaluate_generator,
                                steps_per_checkpoint=10,
                                train_steps=training_steps,
                                verbose=False)
TIMER.stop()
sys.stdout = real_stdout
print(f"{TIMER.ended - TIMER.started}")
0:03:51.538599

Plotting the Metrics

Accuracy

history = training_loop.history
frame = pandas.DataFrame(history.get("eval", "metrics/Accuracy"),
                         columns="Batch Accuracy".split())
maximum = frame.loc[frame.Accuracy.idxmax()]
vline = holoviews.VLine(maximum.Batch).opts(opts.VLine(color=PLOT.red))
hline = holoviews.HLine(maximum.Accuracy).opts(opts.HLine(color=PLOT.red))
line = frame.hvplot(x="Batch",
                    y="Accuracy").opts(
                        opts.Curve(color=PLOT.blue))

plot = (line * hline * vline).opts(
    width=PLOT.width,
    height=PLOT.height, title="Evaluation Batch Accuracy",
                                   )
output = Embed(plot=plot, file_name="evaluation_accuracy")()
print(output)

Figure Missing

Plotting Loss

frame = pandas.DataFrame(history.get("eval",
                                     "metrics/WeightedCategoryCrossEntropy"),
                         columns="Batch Loss".split())
minimum = frame.loc[frame.Loss.idxmin()]
vline = holoviews.VLine(minimum.Batch).opts(opts.VLine(color=PLOT.red))
hline = holoviews.HLine(minimum.Loss).opts(opts.HLine(color=PLOT.red))
line = frame.hvplot(x="Batch", y="Loss").opts(opts.Curve(color=PLOT.blue))

plot = (line * hline * vline).opts(
    width=PLOT.width, height=PLOT.height,
    title="Evaluation Batch Cross Entropy",
                                   )
output = Embed(plot=plot, file_name="evaluation_cross_entropy")()
print(output)

Figure Missing

So it looks like I passed the best point again and am probably overfitting. I wonder if they have a callback to grab the best model like pytorch does? I'm surprised at how fast these models train.