Dermatologist Mini-Project

Introduction

This is an exercise in using transfer learning to diagnose melanoma based on images of skin legions. There are three diseases to be detected:

  • Melanoma
  • Nevus
  • Sebhorrheic Keratosis

There is a paper online here (PDF link) that describes the approaches that did best in the competition.

Data Sources

The data is taken from the ISIC 2017: Skin Lesion Analysis Towards Melanoma Detection challenge.

Each folder contains three sub-folders:

  • melanoma/
  • nevus/
  • seborrheic_keratosis/

Set Up

Imports

Python

from pathlib import Path
import warnings

PyPi

from dotenv import load_dotenv
from PIL import Image, ImageFile
from torchvision import datasets
import matplotlib
warnings.filterwarnings("ignore", category=matplotlib.cbook.mplDeprecation)
import matplotlib.pyplot as pyplot
import matplotlib.image as mpimage
import matplotlib.patches as patches
import numpy
import pyttsx3
import seaborn
import torch
import torchvision.models as models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optimizer
import torchvision.transforms as transforms

This Project

from neurotic.tangles.data_paths import (Batches, DataPathTwo, DataSets,
                                         TrainingTestingValidationPaths,
                                         Transformer)
from neurotic.tangles.models import Inception
from neurotic.tangles.timer import Timer
from neurotic.tangles.trainer import Trainer
from neurotic.tangles.logging import Tee

Plotting

get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'retina'")
seaborn.set(style="whitegrid",
            rc={"axes.grid": False,
                "font.family": ["sans-serif"],
                "font.sans-serif": ["Open Sans", "Latin Modern Sans", "Lato"],
                "figure.figsize": (8, 6)},
            font_scale=1)

Set the Random Seed

numpy.random.seed(seed=2019)

Handle Truncated Images

There seems to be at least one image that is truncated which will cause an exception when it's loaded so this next setting lets us ignore the error and keep working.

ImageFile.LOAD_TRUNCATED_IMAGES = True

Constants

These are some global constants

Load Dotenv

load_dotenv()

Model Path

This is where to save the best model.

MODEL_PATH = DataPathTwo(folder_key="MODELS")

The Model

The Training

load_dotenv()
EPOCHS = 100
transfer_path = MODEL_PATH.folder.joinpath("model_transfer.pt")
directory = "~/logs/dermatalogist"
training_log = Tee(log_name="inception_train.log", directory_name=directory)
testing_log = Tee(log_name="inception_test.log", directory_name=directory)
data_sets = DataSets()
inception = Inception(data_sets.class_count)
batches = Batches(data_sets)
trainer = Trainer(training_batches=batches.training,
                  validation_batches=batches.validation,
                  testing_batches=batches.testing,
                  model=inception.model,
                  model_path=transfer_path,
                  optimizer=inception.optimizer,
                  criterion=inception.criterion ,
                  device=inception.device,
                  epochs=EPOCHS,
                  epoch_start=1,
                  is_inception=True,
                  load_model=False,
                  training_log=training_log,
                  testing_log=testing_log,
                  beep=True,
)
trainer()
Starting Training
Started: 2019-01-26 13:59:40.249210
Started: 2019-01-26 13:59:40.249398
Ended: 2019-01-26 14:16:25.675136
Elapsed: 0:16:45.425738
Epoch: 1        Training - Loss: 0.85   Accuracy: 0.67  Validation - Loss: 0.97 Accuracy: 0.53
Validation loss decreased (inf --> 0.973706). Saving model ...
Started: 2019-01-26 14:16:26.913182
Ended: 2019-01-26 14:33:23.108155
Elapsed: 0:16:56.194973
Epoch: 2        Training - Loss: 0.78   Accuracy: 0.68  Validation - Loss: 0.93 Accuracy: 0.56
Validation loss decreased (0.973706 --> 0.934509). Saving model ...
Ended: 2019-01-26 14:33:23.997547
Elapsed: 0:16:57.084365

Starting Testing
Started: 2019-01-26 14:33:24.706175
Test Loss: 0.697
Test Accuracy: 70.95 (1419.0/2000)
Ended: 2019-01-26 14:47:30.356073
Elapsed: 0:14:05.649898

The Testing

The remote session died so I'll just load the test output.

testing_log = Tee(log_name="inception_test.log", directory_name="~/logs/dermatologist")
with testing_log.path.open() as reader:
    for line in reader:
        print(line.rstrip())

Starting Testing
Test Loss: 0.620
Test Accuracy: 74.80 (1496.0/2000)

Prepping The Test File

To check the model you need to create a CSV file with three columns.

Column Description Example
Id Path to the file data/test/melanoma/ISIC_0012258.jpg
task_1 Is melanoma 0
task_2 Is seborrheic keratosis 1
class Predictions:
    """Maps the test data to a predictions file

    Args:
     model_path: path to the stored model parameters
     device: processor to use
     output_path: path to the CSV to output
     test_path: path to the test folder
     inception: object with the model
    """
    def __init__(self, model_path: Path,
                 device: torch.device,
                 output_path: Path,
                 test_path: Path,
                 data_sets: DataSets=None,                 
                 inception: Inception=None) -> None:
        self.model_path = model_path
        self.output_path = output_path
        self.test_path = test_path
        self._device = device
        self._data_sets = data_sets
        self._activation = None
        self.inception = inception
        return

    @property
    def data_sets(self) -> DataSets:
        """the data-sets"""
        if self._data_sets is None:
            self._data_sets = DataSets()
        return self._data_sets

    @property
    def device(self):
        """The processor to use"""
        if self._device is None:
            self._device = torch.device("cuda"
                                        if torch.cuda.is_available()
                                        else "cpu")
        return self._device

    @property
    def inception(self) -> Inception:
        """The Inception Object"""
        if self._inception is None:
            self._inception = Inception(
                classel= self.data_sets.class_count,
                model_path=self.model_path,
                device=self.device)
            self._inception.model.eval()
        return self._inception

    @property
    def activation(self) -> nn.Sigmoid:
        """The non-linear activation"""
        if self._activation is None:
            self._activation = nn.Sigmoid()
        return self._activation

    @inception.setter
    def inception(self, new_inception: Inception) -> None:
        """Sets the inception model to eval only"""
        self._inception = new_inception
        self._inception.model.eval()
        return

    def prediction(self, image_path: Path) -> numpy.ndarray:
        """Calculate predicted class for an image

        Args:
         image_path: path to an inmage file
        Returns:
         array with the probabilities for each disease
        """
        model = self.inception.model        
        image = Image.open(image_path)
        tensor = self.data_sets.transformer.testing(image)
        # add a batch number
        tensor = tensor.unsqueeze_(0)
        tensor = tensor.to(self.inception.device)
        x = torch.autograd.Variable(tensor)
        output = torch.exp(model(x))
        _, top_class = output.topk(1, dim=1)
        return top_class.item()

    def __call__(self) -> None:
        """Creates CSV of predictions"""
        with self.output_path.open("w") as writer:
            writer.write("Id,task_1,task_2\n")
            for category in self.test_path.iterdir():
                for path in category.iterdir():
                    identifier = 'data/' + str(path).split("/dermatologist/")[-1]
                    guess = self.prediction(path)
                    first = 0 if guess else 1
                    second = 1 if guess == 2 else 0
                    writer.write("{},{},{}\n".format(identifier,
                                                     first,
                                                     second))
        return
TIMER = Timer()
test_path = DataPathTwo(folder_key="TEST").folder
csv_output = Path("~/documents/pcloud_drive/outcomes/dermatologist/predictions.csv").expanduser()

predictions = Predictions(model_path=transfer_path,
                          device=inception.device,
                          output_path=csv_output,
                          test_path=test_path,
                          data_sets=data_sets,
                          inception=inception)
with TIMER:
    predictions()
Started: 2019-01-29 22:36:10.975682
Ended: 2019-01-29 22:46:47.190355
Elapsed: 0:10:36.214673

References