Source code for sklearn.metrics.regression

"""Metrics to assess performance on regression task

Functions named as ``*_score`` return a scalar value to maximize: the higher
the better

Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
the lower the better
"""

# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
#          Mathieu Blondel <mathieu@mblondel.org>
#          Olivier Grisel <olivier.grisel@ensta.org>
#          Arnaud Joly <a.joly@ulg.ac.be>
#          Jochen Wersdorfer <jochen@wersdoerfer.de>
#          Lars Buitinck <L.J.Buitinck@uva.nl>
#          Joel Nothman <joel.nothman@gmail.com>
#          Noel Dawe <noel@dawe.me>
#          Manoj Kumar <manojkumarsivaraj334@gmail.com>
#          Michael Eickenberg <michael.eickenberg@gmail.com>
#          Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu>
# License: BSD 3 clause

from __future__ import division

import numpy as np

from ..utils.validation import check_array, check_consistent_length
from ..utils.validation import column_or_1d
from ..externals.six import string_types

import warnings

__ALL__ = [
    "mean_absolute_error",
    "mean_squared_error",
    "median_absolute_error",
    "r2_score",
    "explained_variance_score"
]


def _check_reg_targets(y_true, y_pred, multioutput):
    """Check that y_true and y_pred belong to the same regression task

    Parameters
    ----------
    y_true : array-like,

    y_pred : array-like,

    multioutput : array-like or string in ['raw_values', uniform_average',
        'variance_weighted'] or None
        None is accepted due to backward compatibility of r2_score().

    Returns
    -------
    type_true : one of {'continuous', continuous-multioutput'}
        The type of the true target data, as output by
        'utils.multiclass.type_of_target'

    y_true : array-like of shape = (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples, n_outputs)
        Estimated target values.

    multioutput : array-like of shape = (n_outputs) or string in ['raw_values',
        uniform_average', 'variance_weighted'] or None
        Custom output weights if ``multioutput`` is array-like or
        just the corresponding argument if ``multioutput`` is a
        correct keyword.

    """
    check_consistent_length(y_true, y_pred)
    y_true = check_array(y_true, ensure_2d=False)
    y_pred = check_array(y_pred, ensure_2d=False)

    if y_true.ndim == 1:
        y_true = y_true.reshape((-1, 1))

    if y_pred.ndim == 1:
        y_pred = y_pred.reshape((-1, 1))

    if y_true.shape[1] != y_pred.shape[1]:
        raise ValueError("y_true and y_pred have different number of output "
                         "({0}!={1})".format(y_true.shape[1], y_pred.shape[1]))

    n_outputs = y_true.shape[1]
    multioutput_options = (None, 'raw_values', 'uniform_average',
                           'variance_weighted')
    if multioutput not in multioutput_options:
        multioutput = check_array(multioutput, ensure_2d=False)
        if n_outputs == 1:
            raise ValueError("Custom weights are useful only in "
                             "multi-output cases.")
        elif n_outputs != len(multioutput):
            raise ValueError(("There must be equally many custom weights "
                              "(%d) as outputs (%d).") %
                             (len(multioutput), n_outputs))
    y_type = 'continuous' if n_outputs == 1 else 'continuous-multioutput'

    return y_type, y_true, y_pred, multioutput


def mean_absolute_error(y_true, y_pred,
                        sample_weight=None,
                        multioutput='uniform_average'):
    """Mean absolute error regression loss

    Read more in the :ref:`User Guide <mean_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape = (n_samples), optional
        Sample weights.

    multioutput : string in ['raw_values', 'uniform_average']
        or array-like of shape (n_outputs)
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.


    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        MAE output is non-negative floating point. The best value is 0.0.

    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_absolute_error(y_true, y_pred)
    0.5
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> mean_absolute_error(y_true, y_pred)
    0.75
    >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    array([ 0.5,  1. ])
    >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
    ... # doctest: +ELLIPSIS
    0.849...
    """
    y_type, y_true, y_pred, multioutput = _check_reg_targets(
        y_true, y_pred, multioutput)
    output_errors = np.average(np.abs(y_pred - y_true),
                               weights=sample_weight, axis=0)
    if isinstance(multioutput, string_types):
        if multioutput == 'raw_values':
            return output_errors
        elif multioutput == 'uniform_average':
            # pass None as weights to np.average: uniform mean
            multioutput = None

    return np.average(output_errors, weights=multioutput)


[docs]def mean_squared_error(y_true, y_pred, sample_weight=None, multioutput='uniform_average'): """Mean squared error regression loss Read more in the :ref:`User Guide <mean_squared_error>`. Parameters ---------- y_true : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape = (n_samples), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average'] or array-like of shape (n_outputs) Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import mean_squared_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_squared_error(y_true, y_pred) 0.375 >>> y_true = [[0.5, 1],[-1, 1],[7, -6]] >>> y_pred = [[0, 2],[-1, 2],[8, -5]] >>> mean_squared_error(y_true, y_pred) # doctest: +ELLIPSIS 0.708... >>> mean_squared_error(y_true, y_pred, multioutput='raw_values') ... # doctest: +ELLIPSIS array([ 0.416..., 1. ]) >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) ... # doctest: +ELLIPSIS 0.824... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) if isinstance(multioutput, string_types): if multioutput == 'raw_values': return output_errors elif multioutput == 'uniform_average': # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput)
def median_absolute_error(y_true, y_pred): """Median absolute error regression loss Read more in the :ref:`User Guide <median_absolute_error>`. Parameters ---------- y_true : array-like of shape = (n_samples) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) Estimated target values. Returns ------- loss : float A positive floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import median_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> median_absolute_error(y_true, y_pred) 0.5 """ y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, 'uniform_average') if y_type == 'continuous-multioutput': raise ValueError("Multioutput not supported in median_absolute_error") return np.median(np.abs(y_pred - y_true)) def explained_variance_score(y_true, y_pred, sample_weight=None, multioutput='uniform_average'): """Explained variance regression score function Best possible score is 1.0, lower values are worse. Read more in the :ref:`User Guide <explained_variance_score>`. Parameters ---------- y_true : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape = (n_samples), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average', \ 'variance_weighted'] or array-like of shape (n_outputs) Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. Returns ------- score : float or ndarray of floats The explained variance or ndarray if 'multioutput' is 'raw_values'. Notes ----- This is not a symmetric function. Examples -------- >>> from sklearn.metrics import explained_variance_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> explained_variance_score(y_true, y_pred) # doctest: +ELLIPSIS 0.957... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average') ... # doctest: +ELLIPSIS 0.983... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0) numerator = np.average((y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0) y_true_avg = np.average(y_true, weights=sample_weight, axis=0) denominator = np.average((y_true - y_true_avg) ** 2, weights=sample_weight, axis=0) nonzero_numerator = numerator != 0 nonzero_denominator = denominator != 0 valid_score = nonzero_numerator & nonzero_denominator output_scores = np.ones(y_true.shape[1]) output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) output_scores[nonzero_numerator & ~nonzero_denominator] = 0. if isinstance(multioutput, string_types): if multioutput == 'raw_values': # return scores individually return output_scores elif multioutput == 'uniform_average': # passing to np.average() None as weights results is uniform mean avg_weights = None elif multioutput == 'variance_weighted': avg_weights = denominator else: avg_weights = multioutput return np.average(output_scores, weights=avg_weights) def r2_score(y_true, y_pred, sample_weight=None, multioutput=None): """R^2 (coefficient of determination) regression score function. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always predicts the expected value of y, disregarding the input features, would get a R^2 score of 0.0. Read more in the :ref:`User Guide <r2_score>`. Parameters ---------- y_true : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape = (n_samples), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average', \ 'variance_weighted'] or None or array-like of shape (n_outputs) Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. Default value correponds to 'variance_weighted', this behaviour is deprecated since version 0.17 and will be changed to 'uniform_average' starting from 0.19. 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. Returns ------- z : float or ndarray of floats The R^2 score or ndarray of scores if 'multioutput' is 'raw_values'. Notes ----- This is not a symmetric function. Unlike most other scores, R^2 score may be negative (it need not actually be the square of a quantity R). References ---------- .. [1] `Wikipedia entry on the Coefficient of determination <http://en.wikipedia.org/wiki/Coefficient_of_determination>`_ Examples -------- >>> from sklearn.metrics import r2_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> r2_score(y_true, y_pred) # doctest: +ELLIPSIS 0.948... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> r2_score(y_true, y_pred, multioutput='variance_weighted') # doctest: +ELLIPSIS 0.938... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) if sample_weight is not None: sample_weight = column_or_1d(sample_weight) weight = sample_weight[:, np.newaxis] else: weight = 1. numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64) denominator = (weight * (y_true - np.average( y_true, axis=0, weights=sample_weight)) ** 2).sum(axis=0, dtype=np.float64) nonzero_denominator = denominator != 0 nonzero_numerator = numerator != 0 valid_score = nonzero_denominator & nonzero_numerator output_scores = np.ones([y_true.shape[1]]) output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) # arbitrary set to zero to avoid -inf scores, having a constant # y_true is not interesting for scoring a regression anyway output_scores[nonzero_numerator & ~nonzero_denominator] = 0. if multioutput is None and y_true.shape[1] != 1: # @FIXME change in 0.18 warnings.warn("Default 'multioutput' behavior now corresponds to " "'variance_weighted' value, it will be changed " "to 'uniform_average' in 0.18.", DeprecationWarning) multioutput = 'variance_weighted' if isinstance(multioutput, string_types): if multioutput == 'raw_values': # return scores individually return output_scores elif multioutput == 'uniform_average': # passing None as weights results is uniform mean avg_weights = None elif multioutput == 'variance_weighted': avg_weights = denominator # avoid fail on constant y or one-element arrays if not np.any(nonzero_denominator): if not np.any(nonzero_numerator): return 1.0 else: return 0.0 else: avg_weights = multioutput return np.average(output_scores, weights=avg_weights)