NER: Training the Model
Table of Contents
Training the Model
Imports
# from python
from collections import namedtuple
from functools import partial
from tempfile import TemporaryFile
import random
import sys
# from pypi
from holoviews import opts
from trax import layers
from trax.supervised import training
import holoviews
import hvplot.pandas
import pandas
import trax
# this project
from neurotic.nlp.named_entity_recognition import (DataGenerator,
NER,
NERData,
TOKEN)
# another project
from graeae import EmbedHoloviews, Timer
Set Up
Plotting
slug = "ner-training-the-model"
Embed = partial(EmbedHoloviews, folder_path=f"files/posts/nlp/{slug}")
Plot = namedtuple("Plot", ["width", "height", "fontscale", "tan", "blue", "red"])
PLOT = Plot(
width=900,
height=750,
fontscale=2,
tan="#ddb377",
blue="#4687b7",
red="#ce7b6d",
)
Data
ner = NERData()
Settings = namedtuple("Settings", ["seed", "batch_size", "embedding_size", "learning_rate"])
SETTINGS = Settings(seed=33, batch_size=64, embedding_size=50, learning_rate=0.01)
trainee = NER(vocabulary_size=len(ner.data.vocabulary),
tag_count=len(ner.data.tags))
random.seed(SETTINGS.seed)
training_generator = DataGenerator(x=ner.data.data_sets.x_train,
y=ner.data.data_sets.y_train,
batch_size=SETTINGS.batch_size,
padding=ner.data.vocabulary[TOKEN.pad])
validation_generator = DataGenerator(x=ner.data.data_sets.x_validate,
y=ner.data.data_sets.y_validate,
batch_size=SETTINGS.batch_size,
padding=ner.data.vocabulary[TOKEN.pad])
TIMER = Timer(speak=False)
Middle
The Data Generators
Before we start, we need to create the data generators for training and validation data. It is important that you mask padding in the loss weights of your data, which can be done using the id_to_mask
argument of trax.supervised.inputs.add_loss_weights
.
train_generator = trax.data.inputs.add_loss_weights(
training_generator,
id_to_mask=ner.data.vocabulary[TOKEN.pad])
evaluate_generator = trax.data.inputs.add_loss_weights(
validation_generator,
id_to_mask=ner.data.vocabulary[TOKEN.pad])
Training The Model
You will now write a function that takes in your model and trains it.
As you've seen in the previous assignments, you will first create the TrainTask and EvalTask using your data generator. Then you will use the training.Loop
to train your model.
Instructions: Implement the train_model
program below to train the neural network above. Here is a list of things you should do:
- Create the trainer object by calling
trax.supervised.training.Loop
and pass in the following:- model = NER
- training task that uses the train data generator defined in the cell above
- loss_layer = tl.CrossEntropyLoss()
- optimizer = trax.optimizers.Adam(0.01)
- evaluation task that uses the validation data generator defined in the cell above
- metrics for
EvalTask
:tl.CrossEntropyLoss()
andtl.Accuracy()
- in
EvalTask
setn_eval_batches=10
for better evaluation accuracy
- metrics for
- output_dir = output_dir
You'll be using a cross entropy loss, with an Adam optimizer. Please read the trax documentation to get a full understanding. The trax GitHub also contains some useful information and a link to a colab notebook.
def train_model(NER: trax.layers.Serial,
train_generator: type,
eval_generator: type,
train_steps: int=1,
steps_per_checkpoint: int=100,
learning_rate: float=SETTINGS.learning_rate,
verbose: bool=False,
output_dir="~/models/ner/") -> training.Loop:
"""Train the Named Entity Recognition Model
Args:
NER: the model you are building
train_generator: The data generator for training examples
eval_generator: The data generator for validation examples,
train_steps: number of training steps
output_dir: folder to save your model
Returns:
training_loop: a trax supervised training Loop
"""
train_task = training.TrainTask(
labeled_data=train_generator,
loss_layer = layers.WeightedCategoryCrossEntropy(),
optimizer = trax.optimizers.Adam(learning_rate),
n_steps_per_checkpoint=steps_per_checkpoint,
)
eval_task = training.EvalTask(
labeled_data = eval_generator,
metrics = [layers.WeightedCategoryCrossEntropy(),
layers.Accuracy()],
n_eval_batches = SETTINGS.batch_size
)
training_loop = training.Loop(
NER,
train_task,
eval_tasks=[eval_task],
output_dir=output_dir)
if verbose:
print(f"Running {train_steps} steps")
training_loop.run(n_steps = train_steps)
return training_loop
For some reason they don't give you the option to turn off the print statements so I'm going to suppress all stdout.
training_steps = 1500
real_stdout = sys.stdout
TIMER.emit = False
TIMER.start()
with TemporaryFile("w") as temp_file:
sys.stdout = temp_file
training_loop = train_model(trainee.model, train_generator,
evaluate_generator,
steps_per_checkpoint=10,
train_steps=training_steps,
verbose=False)
TIMER.stop()
sys.stdout = real_stdout
print(f"{TIMER.ended - TIMER.started}")
0:03:51.538599
Plotting the Metrics
Accuracy
history = training_loop.history
frame = pandas.DataFrame(history.get("eval", "metrics/Accuracy"),
columns="Batch Accuracy".split())
maximum = frame.loc[frame.Accuracy.idxmax()]
vline = holoviews.VLine(maximum.Batch).opts(opts.VLine(color=PLOT.red))
hline = holoviews.HLine(maximum.Accuracy).opts(opts.HLine(color=PLOT.red))
line = frame.hvplot(x="Batch",
y="Accuracy").opts(
opts.Curve(color=PLOT.blue))
plot = (line * hline * vline).opts(
width=PLOT.width,
height=PLOT.height, title="Evaluation Batch Accuracy",
)
output = Embed(plot=plot, file_name="evaluation_accuracy")()
print(output)
Plotting Loss
frame = pandas.DataFrame(history.get("eval",
"metrics/WeightedCategoryCrossEntropy"),
columns="Batch Loss".split())
minimum = frame.loc[frame.Loss.idxmin()]
vline = holoviews.VLine(minimum.Batch).opts(opts.VLine(color=PLOT.red))
hline = holoviews.HLine(minimum.Loss).opts(opts.HLine(color=PLOT.red))
line = frame.hvplot(x="Batch", y="Loss").opts(opts.Curve(color=PLOT.blue))
plot = (line * hline * vline).opts(
width=PLOT.width, height=PLOT.height,
title="Evaluation Batch Cross Entropy",
)
output = Embed(plot=plot, file_name="evaluation_cross_entropy")()
print(output)
So it looks like I passed the best point again and am probably overfitting. I wonder if they have a callback to grab the best model like pytorch does? I'm surprised at how fast these models train.