group-wbl/.venv/lib/python3.13/site-packages/sklearn/metrics/_regression.py
2026-01-09 09:48:03 +08:00

1962 lines
65 KiB
Python

"""Metrics to assess performance on regression task.
Functions named as ``*_score`` return a scalar value to maximize: the higher
the better.
Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
the lower the better.
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import warnings
from numbers import Real
import numpy as np
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.utils._array_api import (
_average,
_find_matching_floating_dtype,
_median,
get_namespace,
get_namespace_and_device,
size,
)
from sklearn.utils._array_api import _xlogy as xlogy
from sklearn.utils._param_validation import Interval, StrOptions, validate_params
from sklearn.utils.stats import _weighted_percentile
from sklearn.utils.validation import (
_check_sample_weight,
_num_samples,
check_array,
check_consistent_length,
column_or_1d,
)
__ALL__ = [
"max_error",
"mean_absolute_error",
"mean_squared_error",
"mean_squared_log_error",
"median_absolute_error",
"mean_absolute_percentage_error",
"mean_pinball_loss",
"r2_score",
"root_mean_squared_log_error",
"root_mean_squared_error",
"explained_variance_score",
"mean_tweedie_deviance",
"mean_poisson_deviance",
"mean_gamma_deviance",
"d2_tweedie_score",
"d2_pinball_score",
"d2_absolute_error_score",
]
def _check_reg_targets(
y_true, y_pred, sample_weight, multioutput, dtype="numeric", xp=None
):
"""Check that y_true, y_pred and sample_weight belong to the same regression task.
To reduce redundancy when calling `_find_matching_floating_dtype`,
please use `_check_reg_targets_with_floating_dtype` instead.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,) or None
Sample weights.
multioutput : array-like or string in ['raw_values', uniform_average',
'variance_weighted'] or None
None is accepted due to backward compatibility of r2_score().
dtype : str or list, default="numeric"
the dtype argument passed to check_array.
xp : module, default=None
Precomputed array namespace module. When passed, typically from a caller
that has already performed inspection of its own inputs, skips array
namespace inspection.
Returns
-------
type_true : one of {'continuous', continuous-multioutput'}
The type of the true target data, as output by
'utils.multiclass.type_of_target'.
y_true : array-like of shape (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,) or None
Sample weights.
multioutput : array-like of shape (n_outputs) or string in ['raw_values',
uniform_average', 'variance_weighted'] or None
Custom output weights if ``multioutput`` is array-like or
just the corresponding argument if ``multioutput`` is a
correct keyword.
"""
xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp)
check_consistent_length(y_true, y_pred, sample_weight)
y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, y_true, dtype=dtype)
if y_true.ndim == 1:
y_true = xp.reshape(y_true, (-1, 1))
if y_pred.ndim == 1:
y_pred = xp.reshape(y_pred, (-1, 1))
if y_true.shape[1] != y_pred.shape[1]:
raise ValueError(
"y_true and y_pred have different number of output ({0}!={1})".format(
y_true.shape[1], y_pred.shape[1]
)
)
n_outputs = y_true.shape[1]
allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted")
if isinstance(multioutput, str):
if multioutput not in allowed_multioutput_str:
raise ValueError(
"Allowed 'multioutput' string values are {}. "
"You provided multioutput={!r}".format(
allowed_multioutput_str, multioutput
)
)
elif multioutput is not None:
multioutput = check_array(multioutput, ensure_2d=False)
if n_outputs == 1:
raise ValueError("Custom weights are useful only in multi-output cases.")
elif n_outputs != multioutput.shape[0]:
raise ValueError(
"There must be equally many custom weights "
f"({multioutput.shape[0]}) as outputs ({n_outputs})."
)
y_type = "continuous" if n_outputs == 1 else "continuous-multioutput"
return y_type, y_true, y_pred, sample_weight, multioutput
def _check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=None
):
"""Ensures y_true, y_pred, and sample_weight correspond to same regression task.
Extends `_check_reg_targets` by automatically selecting a suitable floating-point
data type for inputs using `_find_matching_floating_dtype`.
Use this private method only when converting inputs to array API-compatibles.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,)
multioutput : array-like or string in ['raw_values', 'uniform_average', \
'variance_weighted'] or None
None is accepted due to backward compatibility of r2_score().
xp : module, default=None
Precomputed array namespace module. When passed, typically from a caller
that has already performed inspection of its own inputs, skips array
namespace inspection.
Returns
-------
type_true : one of {'continuous', 'continuous-multioutput'}
The type of the true target data, as output by
'utils.multiclass.type_of_target'.
y_true : array-like of shape (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : array-like of shape (n_outputs) or string in ['raw_values', \
'uniform_average', 'variance_weighted'] or None
Custom output weights if ``multioutput`` is array-like or
just the corresponding argument if ``multioutput`` is a
correct keyword.
"""
dtype_name = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp)
y_type, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
y_true, y_pred, sample_weight, multioutput, dtype=dtype_name, xp=xp
)
return y_type, y_true, y_pred, sample_weight, multioutput
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
},
prefer_skip_nested_validation=True,
)
def mean_absolute_error(
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
"""Mean absolute error regression loss.
The mean absolute error is a non-negative floating point value, where best value
is 0.0. Read more in the :ref:`User Guide <mean_absolute_error>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
Returns
-------
loss : float or array of floats
If multioutput is 'raw_values', then mean absolute error is returned
for each output separately.
If multioutput is 'uniform_average' or an ndarray of weights, then the
weighted average of all output errors is returned.
MAE output is non-negative floating point. The best value is 0.0.
Examples
--------
>>> from sklearn.metrics import mean_absolute_error
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> mean_absolute_error(y_true, y_pred)
0.5
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> mean_absolute_error(y_true, y_pred)
0.75
>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
array([0.5, 1. ])
>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
0.85...
"""
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
output_errors = _average(
xp.abs(y_pred - y_true), weights=sample_weight, axis=0, xp=xp
)
if isinstance(multioutput, str):
if multioutput == "raw_values":
return output_errors
elif multioutput == "uniform_average":
# pass None as weights to _average: uniform mean
multioutput = None
# Average across the outputs (if needed).
# The second call to `_average` should always return
# a scalar array that we convert to a Python float to
# consistently return the same eager evaluated value.
# Therefore, `axis=None`.
mean_absolute_error = _average(output_errors, weights=multioutput, xp=xp)
return float(mean_absolute_error)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"alpha": [Interval(Real, 0, 1, closed="both")],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
},
prefer_skip_nested_validation=True,
)
def mean_pinball_loss(
y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
):
"""Pinball loss for quantile regression.
Read more in the :ref:`User Guide <pinball_loss>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
alpha : float, slope of the pinball loss, default=0.5,
This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,
`alpha=0.95` is minimized by estimators of the 95th percentile.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
Returns
-------
loss : float or ndarray of floats
If multioutput is 'raw_values', then mean absolute error is returned
for each output separately.
If multioutput is 'uniform_average' or an ndarray of weights, then the
weighted average of all output errors is returned.
The pinball loss output is a non-negative floating point. The best
value is 0.0.
Examples
--------
>>> from sklearn.metrics import mean_pinball_loss
>>> y_true = [1, 2, 3]
>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)
0.03...
>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)
0.3...
>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)
0.3...
>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)
0.03...
>>> mean_pinball_loss(y_true, y_true, alpha=0.1)
0.0
>>> mean_pinball_loss(y_true, y_true, alpha=0.9)
0.0
"""
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
diff = y_true - y_pred
sign = xp.astype(diff >= 0, diff.dtype)
loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff
output_errors = _average(loss, weights=sample_weight, axis=0, xp=xp)
if isinstance(multioutput, str) and multioutput == "raw_values":
return output_errors
if isinstance(multioutput, str) and multioutput == "uniform_average":
# pass None as weights to _average: uniform mean
multioutput = None
# Average across the outputs (if needed).
# The second call to `_average` should always return
# a scalar array that we convert to a Python float to
# consistently return the same eager evaluated value.
# Therefore, `axis=None`.
return float(_average(output_errors, weights=multioutput, xp=xp))
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
},
prefer_skip_nested_validation=True,
)
def mean_absolute_percentage_error(
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
"""Mean absolute percentage error (MAPE) regression loss.
Note that we are not using the common "percentage" definition: the percentage
in the range [0, 100] is converted to a relative value in the range [0, 1]
by dividing by 100. Thus, an error of 200% corresponds to a relative error of 2.
Read more in the :ref:`User Guide <mean_absolute_percentage_error>`.
.. versionadded:: 0.24
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
If input is list then the shape must be (n_outputs,).
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
Returns
-------
loss : float or ndarray of floats
If multioutput is 'raw_values', then mean absolute percentage error
is returned for each output separately.
If multioutput is 'uniform_average' or an ndarray of weights, then the
weighted average of all output errors is returned.
MAPE output is non-negative floating point. The best value is 0.0.
But note that bad predictions can lead to arbitrarily large
MAPE values, especially if some `y_true` values are very close to zero.
Note that we return a large value instead of `inf` when `y_true` is zero.
Examples
--------
>>> from sklearn.metrics import mean_absolute_percentage_error
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> mean_absolute_percentage_error(y_true, y_pred)
0.3273...
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> mean_absolute_percentage_error(y_true, y_pred)
0.5515...
>>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])
0.6198...
>>> # the value when some element of the y_true is zero is arbitrarily high because
>>> # of the division by epsilon
>>> y_true = [1., 0., 2.4, 7.]
>>> y_pred = [1.2, 0.1, 2.4, 8.]
>>> mean_absolute_percentage_error(y_true, y_pred)
112589990684262.48
"""
xp, _, device_ = get_namespace_and_device(
y_true, y_pred, sample_weight, multioutput
)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=y_true.dtype, device=device_)
y_true_abs = xp.abs(y_true)
mape = xp.abs(y_pred - y_true) / xp.maximum(y_true_abs, epsilon)
output_errors = _average(mape, weights=sample_weight, axis=0, xp=xp)
if isinstance(multioutput, str):
if multioutput == "raw_values":
return output_errors
elif multioutput == "uniform_average":
# pass None as weights to _average: uniform mean
multioutput = None
# Average across the outputs (if needed).
# The second call to `_average` should always return
# a scalar array that we convert to a Python float to
# consistently return the same eager evaluated value.
# Therefore, `axis=None`.
mean_absolute_percentage_error = _average(output_errors, weights=multioutput, xp=xp)
return float(mean_absolute_percentage_error)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
},
prefer_skip_nested_validation=True,
)
def mean_squared_error(
y_true,
y_pred,
*,
sample_weight=None,
multioutput="uniform_average",
):
"""Mean squared error regression loss.
Read more in the :ref:`User Guide <mean_squared_error>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
Returns
-------
loss : float or array of floats
A non-negative floating point value (the best value is 0.0), or an
array of floating point values, one for each individual target.
Examples
--------
>>> from sklearn.metrics import mean_squared_error
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> mean_squared_error(y_true, y_pred)
0.375
>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
>>> y_pred = [[0, 2],[-1, 2],[8, -5]]
>>> mean_squared_error(y_true, y_pred)
0.708...
>>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
array([0.41666667, 1. ])
>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
0.825...
"""
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
output_errors = _average(
(y_true - y_pred) ** 2, axis=0, weights=sample_weight, xp=xp
)
if isinstance(multioutput, str):
if multioutput == "raw_values":
return output_errors
elif multioutput == "uniform_average":
# pass None as weights to _average: uniform mean
multioutput = None
# Average across the outputs (if needed).
# The second call to `_average` should always return
# a scalar array that we convert to a Python float to
# consistently return the same eager evaluated value.
# Therefore, `axis=None`.
mean_squared_error = _average(output_errors, weights=multioutput, xp=xp)
return float(mean_squared_error)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
},
prefer_skip_nested_validation=True,
)
def root_mean_squared_error(
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
"""Root mean squared error regression loss.
Read more in the :ref:`User Guide <mean_squared_error>`.
.. versionadded:: 1.4
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
Returns
-------
loss : float or ndarray of floats
A non-negative floating point value (the best value is 0.0), or an
array of floating point values, one for each individual target.
Examples
--------
>>> from sklearn.metrics import root_mean_squared_error
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> root_mean_squared_error(y_true, y_pred)
0.612...
>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
>>> y_pred = [[0, 2],[-1, 2],[8, -5]]
>>> root_mean_squared_error(y_true, y_pred)
0.822...
"""
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)
output_errors = xp.sqrt(
mean_squared_error(
y_true, y_pred, sample_weight=sample_weight, multioutput="raw_values"
)
)
if isinstance(multioutput, str):
if multioutput == "raw_values":
return output_errors
elif multioutput == "uniform_average":
# pass None as weights to _average: uniform mean
multioutput = None
# Average across the outputs (if needed).
# The second call to `_average` should always return
# a scalar array that we convert to a Python float to
# consistently return the same eager evaluated value.
# Therefore, `axis=None`.
root_mean_squared_error = _average(output_errors, weights=multioutput, xp=xp)
return float(root_mean_squared_error)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
},
prefer_skip_nested_validation=True,
)
def mean_squared_log_error(
y_true,
y_pred,
*,
sample_weight=None,
multioutput="uniform_average",
):
"""Mean squared logarithmic error regression loss.
Read more in the :ref:`User Guide <mean_squared_log_error>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
'raw_values' :
Returns a full set of errors when the input is of multioutput
format.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
Returns
-------
loss : float or ndarray of floats
A non-negative floating point value (the best value is 0.0), or an
array of floating point values, one for each individual target.
Examples
--------
>>> from sklearn.metrics import mean_squared_log_error
>>> y_true = [3, 5, 2.5, 7]
>>> y_pred = [2.5, 5, 4, 8]
>>> mean_squared_log_error(y_true, y_pred)
0.039...
>>> y_true = [[0.5, 1], [1, 2], [7, 6]]
>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
>>> mean_squared_log_error(y_true, y_pred)
0.044...
>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
array([0.00462428, 0.08377444])
>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
0.060...
"""
xp, _ = get_namespace(y_true, y_pred)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
raise ValueError(
"Mean Squared Logarithmic Error cannot be used when "
"targets contain values less than or equal to -1."
)
return mean_squared_error(
xp.log1p(y_true),
xp.log1p(y_pred),
sample_weight=sample_weight,
multioutput=multioutput,
)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
},
prefer_skip_nested_validation=True,
)
def root_mean_squared_log_error(
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
"""Root mean squared logarithmic error regression loss.
Read more in the :ref:`User Guide <mean_squared_log_error>`.
.. versionadded:: 1.4
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average errors.
'raw_values' :
Returns a full set of errors when the input is of multioutput
format.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
Returns
-------
loss : float or ndarray of floats
A non-negative floating point value (the best value is 0.0), or an
array of floating point values, one for each individual target.
Examples
--------
>>> from sklearn.metrics import root_mean_squared_log_error
>>> y_true = [3, 5, 2.5, 7]
>>> y_pred = [2.5, 5, 4, 8]
>>> root_mean_squared_log_error(y_true, y_pred)
0.199...
"""
xp, _ = get_namespace(y_true, y_pred)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
raise ValueError(
"Root Mean Squared Logarithmic Error cannot be used when "
"targets contain values less than or equal to -1."
)
return root_mean_squared_error(
xp.log1p(y_true),
xp.log1p(y_pred),
sample_weight=sample_weight,
multioutput=multioutput,
)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
"sample_weight": ["array-like", None],
},
prefer_skip_nested_validation=True,
)
def median_absolute_error(
y_true, y_pred, *, multioutput="uniform_average", sample_weight=None
):
"""Median absolute error regression loss.
Median absolute error output is non-negative floating point. The best value
is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values. Array-like value defines
weights used to average errors.
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Errors of all outputs are averaged with uniform weight.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
.. versionadded:: 0.24
Returns
-------
loss : float or ndarray of floats
If multioutput is 'raw_values', then mean absolute error is returned
for each output separately.
If multioutput is 'uniform_average' or an ndarray of weights, then the
weighted average of all output errors is returned.
Examples
--------
>>> from sklearn.metrics import median_absolute_error
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> median_absolute_error(y_true, y_pred)
0.5
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> median_absolute_error(y_true, y_pred)
0.75
>>> median_absolute_error(y_true, y_pred, multioutput='raw_values')
array([0.5, 1. ])
>>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
0.85
"""
xp, _ = get_namespace(y_true, y_pred, multioutput, sample_weight)
_, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
y_true, y_pred, sample_weight, multioutput
)
if sample_weight is None:
output_errors = _median(xp.abs(y_pred - y_true), axis=0)
else:
output_errors = _weighted_percentile(
xp.abs(y_pred - y_true), sample_weight=sample_weight, average=True
)
if isinstance(multioutput, str):
if multioutput == "raw_values":
return output_errors
elif multioutput == "uniform_average":
# pass None as weights to np.average: uniform mean
multioutput = None
return float(_average(output_errors, weights=multioutput, xp=xp))
def _assemble_r2_explained_variance(
numerator, denominator, n_outputs, multioutput, force_finite, xp, device
):
"""Common part used by explained variance score and :math:`R^2` score."""
dtype = numerator.dtype
nonzero_denominator = denominator != 0
if not force_finite:
# Standard formula, that may lead to NaN or -Inf
output_scores = 1 - (numerator / denominator)
else:
nonzero_numerator = numerator != 0
# Default = Zero Numerator = perfect predictions. Set to 1.0
# (note: even if denominator is zero, thus avoiding NaN scores)
output_scores = xp.ones([n_outputs], device=device, dtype=dtype)
# Non-zero Numerator and Non-zero Denominator: use the formula
valid_score = nonzero_denominator & nonzero_numerator
output_scores[valid_score] = 1 - (
numerator[valid_score] / denominator[valid_score]
)
# Non-zero Numerator and Zero Denominator:
# arbitrary set to 0.0 to avoid -inf scores
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
if isinstance(multioutput, str):
if multioutput == "raw_values":
# return scores individually
return output_scores
elif multioutput == "uniform_average":
# pass None as weights to _average: uniform mean
avg_weights = None
elif multioutput == "variance_weighted":
avg_weights = denominator
if not xp.any(nonzero_denominator):
# All weights are zero, _average would raise a ZeroDiv error.
# This only happens when all y are constant (or 1-element long)
# Since weights are all equal, fall back to uniform weights.
avg_weights = None
else:
avg_weights = multioutput
result = _average(output_scores, weights=avg_weights, xp=xp)
if size(result) == 1:
return float(result)
return result
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [
StrOptions({"raw_values", "uniform_average", "variance_weighted"}),
"array-like",
],
"force_finite": ["boolean"],
},
prefer_skip_nested_validation=True,
)
def explained_variance_score(
y_true,
y_pred,
*,
sample_weight=None,
multioutput="uniform_average",
force_finite=True,
):
"""Explained variance regression score function.
Best possible score is 1.0, lower values are worse.
In the particular case when ``y_true`` is constant, the explained variance
score is not finite: it is either ``NaN`` (perfect predictions) or
``-Inf`` (imperfect predictions). To prevent such non-finite numbers to
pollute higher-level experiments such as a grid search cross-validation,
by default these cases are replaced with 1.0 (perfect predictions) or 0.0
(imperfect predictions) respectively. If ``force_finite``
is set to ``False``, this score falls back on the original :math:`R^2`
definition.
.. note::
The Explained Variance score is similar to the :func:`R^2 score <r2_score>`,
but the former does not account for systematic offsets in the prediction
(such as the intercept in linear models, i.e. different intercepts give
the same Explained Variance score). Most often the :func:`R^2 score
<r2_score>` should be preferred.
Read more in the :ref:`User Guide <explained_variance_score>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \
array-like of shape (n_outputs,), default='uniform_average'
Defines aggregating of multiple output scores.
Array-like value defines weights used to average scores.
'raw_values' :
Returns a full set of scores in case of multioutput input.
'uniform_average' :
Scores of all outputs are averaged with uniform weight.
'variance_weighted' :
Scores of all outputs are averaged, weighted by the variances
of each individual output.
force_finite : bool, default=True
Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
data should be replaced with real numbers (``1.0`` if prediction is
perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
for hyperparameters' search procedures (e.g. grid search
cross-validation).
.. versionadded:: 1.1
Returns
-------
score : float or ndarray of floats
The explained variance or ndarray if 'multioutput' is 'raw_values'.
See Also
--------
r2_score :
Similar metric, but accounting for systematic offsets in
prediction.
Notes
-----
This is not a symmetric function.
Examples
--------
>>> from sklearn.metrics import explained_variance_score
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> explained_variance_score(y_true, y_pred)
0.957...
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')
0.983...
>>> y_true = [-2, -2, -2]
>>> y_pred = [-2, -2, -2]
>>> explained_variance_score(y_true, y_pred)
1.0
>>> explained_variance_score(y_true, y_pred, force_finite=False)
nan
>>> y_true = [-2, -2, -2]
>>> y_pred = [-2, -2, -2 + 1e-8]
>>> explained_variance_score(y_true, y_pred)
0.0
>>> explained_variance_score(y_true, y_pred, force_finite=False)
-inf
"""
xp, _, device = get_namespace_and_device(y_true, y_pred, sample_weight, multioutput)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
y_diff_avg = _average(y_true - y_pred, weights=sample_weight, axis=0, xp=xp)
numerator = _average(
(y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0, xp=xp
)
y_true_avg = _average(y_true, weights=sample_weight, axis=0, xp=xp)
denominator = _average(
(y_true - y_true_avg) ** 2, weights=sample_weight, axis=0, xp=xp
)
return _assemble_r2_explained_variance(
numerator=numerator,
denominator=denominator,
n_outputs=y_true.shape[1],
multioutput=multioutput,
force_finite=force_finite,
xp=xp,
device=device,
)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [
StrOptions({"raw_values", "uniform_average", "variance_weighted"}),
"array-like",
None,
],
"force_finite": ["boolean"],
},
prefer_skip_nested_validation=True,
)
def r2_score(
y_true,
y_pred,
*,
sample_weight=None,
multioutput="uniform_average",
force_finite=True,
):
""":math:`R^2` (coefficient of determination) regression score function.
Best possible score is 1.0 and it can be negative (because the
model can be arbitrarily worse). In the general case when the true y is
non-constant, a constant model that always predicts the average y
disregarding the input features would get a :math:`R^2` score of 0.0.
In the particular case when ``y_true`` is constant, the :math:`R^2` score
is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf``
(imperfect predictions). To prevent such non-finite numbers to pollute
higher-level experiments such as a grid search cross-validation, by default
these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect
predictions) respectively. You can set ``force_finite`` to ``False`` to
prevent this fix from happening.
Note: when the prediction residuals have zero mean, the :math:`R^2` score
is identical to the
:func:`Explained Variance score <explained_variance_score>`.
Read more in the :ref:`User Guide <r2_score>`.
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \
array-like of shape (n_outputs,) or None, default='uniform_average'
Defines aggregating of multiple output scores.
Array-like value defines weights used to average scores.
Default is "uniform_average".
'raw_values' :
Returns a full set of scores in case of multioutput input.
'uniform_average' :
Scores of all outputs are averaged with uniform weight.
'variance_weighted' :
Scores of all outputs are averaged, weighted by the variances
of each individual output.
.. versionchanged:: 0.19
Default value of multioutput is 'uniform_average'.
force_finite : bool, default=True
Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
data should be replaced with real numbers (``1.0`` if prediction is
perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
for hyperparameters' search procedures (e.g. grid search
cross-validation).
.. versionadded:: 1.1
Returns
-------
z : float or ndarray of floats
The :math:`R^2` score or ndarray of scores if 'multioutput' is
'raw_values'.
Notes
-----
This is not a symmetric function.
Unlike most other scores, :math:`R^2` score may be negative (it need not
actually be the square of a quantity R).
This metric is not well-defined for single samples and will return a NaN
value if n_samples is less than two.
References
----------
.. [1] `Wikipedia entry on the Coefficient of determination
<https://en.wikipedia.org/wiki/Coefficient_of_determination>`_
Examples
--------
>>> from sklearn.metrics import r2_score
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> r2_score(y_true, y_pred)
0.948...
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> r2_score(y_true, y_pred,
... multioutput='variance_weighted')
0.938...
>>> y_true = [1, 2, 3]
>>> y_pred = [1, 2, 3]
>>> r2_score(y_true, y_pred)
1.0
>>> y_true = [1, 2, 3]
>>> y_pred = [2, 2, 2]
>>> r2_score(y_true, y_pred)
0.0
>>> y_true = [1, 2, 3]
>>> y_pred = [3, 2, 1]
>>> r2_score(y_true, y_pred)
-3.0
>>> y_true = [-2, -2, -2]
>>> y_pred = [-2, -2, -2]
>>> r2_score(y_true, y_pred)
1.0
>>> r2_score(y_true, y_pred, force_finite=False)
nan
>>> y_true = [-2, -2, -2]
>>> y_pred = [-2, -2, -2 + 1e-8]
>>> r2_score(y_true, y_pred)
0.0
>>> r2_score(y_true, y_pred, force_finite=False)
-inf
"""
xp, _, device_ = get_namespace_and_device(
y_true, y_pred, sample_weight, multioutput
)
_, y_true, y_pred, sample_weight, multioutput = (
_check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput, xp=xp
)
)
if _num_samples(y_pred) < 2:
msg = "R^2 score is not well-defined with less than two samples."
warnings.warn(msg, UndefinedMetricWarning)
return float("nan")
if sample_weight is not None:
sample_weight = column_or_1d(sample_weight)
weight = sample_weight[:, None]
else:
weight = 1.0
numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0)
denominator = xp.sum(
weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)) ** 2,
axis=0,
)
return _assemble_r2_explained_variance(
numerator=numerator,
denominator=denominator,
n_outputs=y_true.shape[1],
multioutput=multioutput,
force_finite=force_finite,
xp=xp,
device=device_,
)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
},
prefer_skip_nested_validation=True,
)
def max_error(y_true, y_pred):
"""
The max_error metric calculates the maximum residual error.
Read more in the :ref:`User Guide <max_error>`.
Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,)
Estimated target values.
Returns
-------
max_error : float
A positive floating point value (the best value is 0.0).
Examples
--------
>>> from sklearn.metrics import max_error
>>> y_true = [3, 2, 7, 1]
>>> y_pred = [4, 2, 7, 1]
>>> max_error(y_true, y_pred)
1.0
"""
xp, _ = get_namespace(y_true, y_pred)
y_type, y_true, y_pred, _, _ = _check_reg_targets(
y_true, y_pred, sample_weight=None, multioutput=None, xp=xp
)
if y_type == "continuous-multioutput":
raise ValueError("Multioutput not supported in max_error")
return float(xp.max(xp.abs(y_true - y_pred)))
def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power):
"""Mean Tweedie deviance regression loss."""
xp, _ = get_namespace(y_true, y_pred)
p = power
if p < 0:
# 'Extreme stable', y any real number, y_pred > 0
dev = 2 * (
xp.pow(
xp.where(y_true > 0, y_true, 0.0),
2 - p,
)
/ ((1 - p) * (2 - p))
- y_true * xp.pow(y_pred, 1 - p) / (1 - p)
+ xp.pow(y_pred, 2 - p) / (2 - p)
)
elif p == 0:
# Normal distribution, y and y_pred any real number
dev = (y_true - y_pred) ** 2
elif p == 1:
# Poisson distribution
dev = 2 * (xlogy(y_true, y_true / y_pred) - y_true + y_pred)
elif p == 2:
# Gamma distribution
dev = 2 * (xp.log(y_pred / y_true) + y_true / y_pred - 1)
else:
dev = 2 * (
xp.pow(y_true, 2 - p) / ((1 - p) * (2 - p))
- y_true * xp.pow(y_pred, 1 - p) / (1 - p)
+ xp.pow(y_pred, 2 - p) / (2 - p)
)
return float(_average(dev, weights=sample_weight, xp=xp))
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"power": [
Interval(Real, None, 0, closed="right"),
Interval(Real, 1, None, closed="left"),
],
},
prefer_skip_nested_validation=True,
)
def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
"""Mean Tweedie deviance regression loss.
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
power : float, default=0
Tweedie power parameter. Either power <= 0 or power >= 1.
The higher `p` the less weight is given to extreme
deviations between true and predicted targets.
- power < 0: Extreme stable distribution. Requires: y_pred > 0.
- power = 0 : Normal distribution, output corresponds to
mean_squared_error. y_true and y_pred can be any real numbers.
- power = 1 : Poisson distribution. Requires: y_true >= 0 and
y_pred > 0.
- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
and y_pred > 0.
- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
and y_pred > 0.
- otherwise : Positive stable distribution. Requires: y_true > 0
and y_pred > 0.
Returns
-------
loss : float
A non-negative floating point value (the best value is 0.0).
Examples
--------
>>> from sklearn.metrics import mean_tweedie_deviance
>>> y_true = [2, 0, 1, 4]
>>> y_pred = [0.5, 0.5, 2., 2.]
>>> mean_tweedie_deviance(y_true, y_pred, power=1)
1.4260...
"""
xp, _ = get_namespace(y_true, y_pred)
y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput=None, xp=xp
)
if y_type == "continuous-multioutput":
raise ValueError("Multioutput not supported in mean_tweedie_deviance")
if sample_weight is not None:
sample_weight = column_or_1d(sample_weight)
sample_weight = sample_weight[:, np.newaxis]
message = f"Mean Tweedie deviance error with power={power} can only be used on "
if power < 0:
# 'Extreme stable', y any real number, y_pred > 0
if xp.any(y_pred <= 0):
raise ValueError(message + "strictly positive y_pred.")
elif power == 0:
# Normal, y and y_pred can be any real number
pass
elif 1 <= power < 2:
# Poisson and compound Poisson distribution, y >= 0, y_pred > 0
if xp.any(y_true < 0) or xp.any(y_pred <= 0):
raise ValueError(message + "non-negative y and strictly positive y_pred.")
elif power >= 2:
# Gamma and Extreme stable distribution, y and y_pred > 0
if xp.any(y_true <= 0) or xp.any(y_pred <= 0):
raise ValueError(message + "strictly positive y and y_pred.")
else: # pragma: nocover
# Unreachable statement
raise ValueError
return _mean_tweedie_deviance(
y_true, y_pred, sample_weight=sample_weight, power=power
)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
},
prefer_skip_nested_validation=True,
)
def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):
"""Mean Poisson deviance regression loss.
Poisson deviance is equivalent to the Tweedie deviance with
the power parameter `power=1`.
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values. Requires y_true >= 0.
y_pred : array-like of shape (n_samples,)
Estimated target values. Requires y_pred > 0.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
Returns
-------
loss : float
A non-negative floating point value (the best value is 0.0).
Examples
--------
>>> from sklearn.metrics import mean_poisson_deviance
>>> y_true = [2, 0, 1, 4]
>>> y_pred = [0.5, 0.5, 2., 2.]
>>> mean_poisson_deviance(y_true, y_pred)
1.4260...
"""
return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=1)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
},
prefer_skip_nested_validation=True,
)
def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):
"""Mean Gamma deviance regression loss.
Gamma deviance is equivalent to the Tweedie deviance with
the power parameter `power=2`. It is invariant to scaling of
the target variable, and measures relative errors.
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values. Requires y_true > 0.
y_pred : array-like of shape (n_samples,)
Estimated target values. Requires y_pred > 0.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
Returns
-------
loss : float
A non-negative floating point value (the best value is 0.0).
Examples
--------
>>> from sklearn.metrics import mean_gamma_deviance
>>> y_true = [2, 0.5, 1, 4]
>>> y_pred = [0.5, 0.5, 2., 2.]
>>> mean_gamma_deviance(y_true, y_pred)
1.0568...
"""
return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=2)
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"power": [
Interval(Real, None, 0, closed="right"),
Interval(Real, 1, None, closed="left"),
],
},
prefer_skip_nested_validation=True,
)
def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0):
"""
:math:`D^2` regression score function, fraction of Tweedie deviance explained.
Best possible score is 1.0 and it can be negative (because the model can be
arbitrarily worse). A model that always uses the empirical mean of `y_true` as
constant prediction, disregarding the input features, gets a D^2 score of 0.0.
Read more in the :ref:`User Guide <d2_score>`.
.. versionadded:: 1.0
Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
power : float, default=0
Tweedie power parameter. Either power <= 0 or power >= 1.
The higher `p` the less weight is given to extreme
deviations between true and predicted targets.
- power < 0: Extreme stable distribution. Requires: y_pred > 0.
- power = 0 : Normal distribution, output corresponds to r2_score.
y_true and y_pred can be any real numbers.
- power = 1 : Poisson distribution. Requires: y_true >= 0 and
y_pred > 0.
- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
and y_pred > 0.
- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
and y_pred > 0.
- otherwise : Positive stable distribution. Requires: y_true > 0
and y_pred > 0.
Returns
-------
z : float
The D^2 score.
Notes
-----
This is not a symmetric function.
Like R^2, D^2 score may be negative (it need not actually be the square of
a quantity D).
This metric is not well-defined for single samples and will return a NaN
value if n_samples is less than two.
References
----------
.. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
Wainwright. "Statistical Learning with Sparsity: The Lasso and
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/
Examples
--------
>>> from sklearn.metrics import d2_tweedie_score
>>> y_true = [0.5, 1, 2.5, 7]
>>> y_pred = [1, 1, 5, 3.5]
>>> d2_tweedie_score(y_true, y_pred)
0.285...
>>> d2_tweedie_score(y_true, y_pred, power=1)
0.487...
>>> d2_tweedie_score(y_true, y_pred, power=2)
0.630...
>>> d2_tweedie_score(y_true, y_true, power=2)
1.0
"""
xp, _ = get_namespace(y_true, y_pred)
y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype(
y_true, y_pred, sample_weight, multioutput=None, xp=xp
)
if y_type == "continuous-multioutput":
raise ValueError("Multioutput not supported in d2_tweedie_score")
if _num_samples(y_pred) < 2:
msg = "D^2 score is not well-defined with less than two samples."
warnings.warn(msg, UndefinedMetricWarning)
return float("nan")
y_true, y_pred = xp.squeeze(y_true, axis=1), xp.squeeze(y_pred, axis=1)
numerator = mean_tweedie_deviance(
y_true, y_pred, sample_weight=sample_weight, power=power
)
y_avg = _average(y_true, weights=sample_weight, xp=xp)
denominator = _mean_tweedie_deviance(
y_true, y_avg, sample_weight=sample_weight, power=power
)
return 1 - numerator / denominator
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"alpha": [Interval(Real, 0, 1, closed="both")],
"multioutput": [
StrOptions({"raw_values", "uniform_average"}),
"array-like",
],
},
prefer_skip_nested_validation=True,
)
def d2_pinball_score(
y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
):
"""
:math:`D^2` regression score function, fraction of pinball loss explained.
Best possible score is 1.0 and it can be negative (because the model can be
arbitrarily worse). A model that always uses the empirical alpha-quantile of
`y_true` as constant prediction, disregarding the input features,
gets a :math:`D^2` score of 0.0.
Read more in the :ref:`User Guide <d2_score>`.
.. versionadded:: 1.1
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
alpha : float, default=0.5
Slope of the pinball deviance. It determines the quantile level alpha
for which the pinball deviance and also D2 are optimal.
The default `alpha=0.5` is equivalent to `d2_absolute_error_score`.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average scores.
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Scores of all outputs are averaged with uniform weight.
Returns
-------
score : float or ndarray of floats
The :math:`D^2` score with a pinball deviance
or ndarray of scores if `multioutput='raw_values'`.
Notes
-----
Like :math:`R^2`, :math:`D^2` score may be negative
(it need not actually be the square of a quantity D).
This metric is not well-defined for a single point and will return a NaN
value if n_samples is less than two.
This metric is not a built-in :ref:`string name scorer
<scoring_string_names>` to use along with tools such as
:class:`~sklearn.model_selection.GridSearchCV` or
:class:`~sklearn.model_selection.RandomizedSearchCV`.
Instead, you can :ref:`create a scorer object <scoring_adapt_metric>` using
:func:`~sklearn.metrics.make_scorer`, with any desired parameter settings.
See the `Examples` section for details.
References
----------
.. [1] Eq. (7) of `Koenker, Roger; Machado, José A. F. (1999).
"Goodness of Fit and Related Inference Processes for Quantile Regression"
<https://doi.org/10.1080/01621459.1999.10473882>`_
.. [2] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
Wainwright. "Statistical Learning with Sparsity: The Lasso and
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/
Examples
--------
>>> from sklearn.metrics import d2_pinball_score
>>> y_true = [1, 2, 3]
>>> y_pred = [1, 3, 3]
>>> d2_pinball_score(y_true, y_pred)
0.5
>>> d2_pinball_score(y_true, y_pred, alpha=0.9)
0.772...
>>> d2_pinball_score(y_true, y_pred, alpha=0.1)
-1.045...
>>> d2_pinball_score(y_true, y_true, alpha=0.1)
1.0
Creating a scorer object with :func:`~sklearn.metrics.make_scorer`:
>>> import numpy as np
>>> from sklearn.metrics import make_scorer
>>> from sklearn.model_selection import GridSearchCV
>>> from sklearn.linear_model import QuantileRegressor
>>> X = np.array([[1], [2], [3], [4]])
>>> y = np.array([2.5, 0.0, 2, 8])
>>> pinball_95_scorer = make_scorer(d2_pinball_score, alpha=0.95)
>>> grid = GridSearchCV(
... QuantileRegressor(quantile=0.95),
... param_grid={"fit_intercept": [True, False]},
... scoring=pinball_95_scorer,
... cv=2,
... ).fit(X, y)
>>> grid.best_params_
{'fit_intercept': True}
"""
_, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
y_true, y_pred, sample_weight, multioutput
)
if _num_samples(y_pred) < 2:
msg = "D^2 score is not well-defined with less than two samples."
warnings.warn(msg, UndefinedMetricWarning)
return float("nan")
numerator = mean_pinball_loss(
y_true,
y_pred,
sample_weight=sample_weight,
alpha=alpha,
multioutput="raw_values",
)
if sample_weight is None:
y_quantile = np.tile(
np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1)
)
else:
y_quantile = np.tile(
_weighted_percentile(
y_true, sample_weight=sample_weight, percentile_rank=alpha * 100
),
(len(y_true), 1),
)
denominator = mean_pinball_loss(
y_true,
y_quantile,
sample_weight=sample_weight,
alpha=alpha,
multioutput="raw_values",
)
nonzero_numerator = numerator != 0
nonzero_denominator = denominator != 0
valid_score = nonzero_numerator & nonzero_denominator
output_scores = np.ones(y_true.shape[1])
output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score])
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
if isinstance(multioutput, str):
if multioutput == "raw_values":
# return scores individually
return output_scores
else: # multioutput == "uniform_average"
# passing None as weights to np.average results in uniform mean
avg_weights = None
else:
avg_weights = multioutput
return float(np.average(output_scores, weights=avg_weights))
@validate_params(
{
"y_true": ["array-like"],
"y_pred": ["array-like"],
"sample_weight": ["array-like", None],
"multioutput": [
StrOptions({"raw_values", "uniform_average"}),
"array-like",
],
},
prefer_skip_nested_validation=True,
)
def d2_absolute_error_score(
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
"""
:math:`D^2` regression score function, fraction of absolute error explained.
Best possible score is 1.0 and it can be negative (because the model can be
arbitrarily worse). A model that always uses the empirical median of `y_true`
as constant prediction, disregarding the input features,
gets a :math:`D^2` score of 0.0.
Read more in the :ref:`User Guide <d2_score>`.
.. versionadded:: 1.1
Parameters
----------
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
Estimated target values.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
(n_outputs,), default='uniform_average'
Defines aggregating of multiple output values.
Array-like value defines weights used to average scores.
'raw_values' :
Returns a full set of errors in case of multioutput input.
'uniform_average' :
Scores of all outputs are averaged with uniform weight.
Returns
-------
score : float or ndarray of floats
The :math:`D^2` score with an absolute error deviance
or ndarray of scores if 'multioutput' is 'raw_values'.
Notes
-----
Like :math:`R^2`, :math:`D^2` score may be negative
(it need not actually be the square of a quantity D).
This metric is not well-defined for single samples and will return a NaN
value if n_samples is less than two.
References
----------
.. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
Wainwright. "Statistical Learning with Sparsity: The Lasso and
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/
Examples
--------
>>> from sklearn.metrics import d2_absolute_error_score
>>> y_true = [3, -0.5, 2, 7]
>>> y_pred = [2.5, 0.0, 2, 8]
>>> d2_absolute_error_score(y_true, y_pred)
0.764...
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
>>> d2_absolute_error_score(y_true, y_pred, multioutput='uniform_average')
0.691...
>>> d2_absolute_error_score(y_true, y_pred, multioutput='raw_values')
array([0.8125 , 0.57142857])
>>> y_true = [1, 2, 3]
>>> y_pred = [1, 2, 3]
>>> d2_absolute_error_score(y_true, y_pred)
1.0
>>> y_true = [1, 2, 3]
>>> y_pred = [2, 2, 2]
>>> d2_absolute_error_score(y_true, y_pred)
0.0
>>> y_true = [1, 2, 3]
>>> y_pred = [3, 2, 1]
>>> d2_absolute_error_score(y_true, y_pred)
-1.0
"""
return d2_pinball_score(
y_true, y_pred, sample_weight=sample_weight, alpha=0.5, multioutput=multioutput
)