"""Metrics to assess performance on regression task. Functions named as ``*_score`` return a scalar value to maximize: the higher the better. Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize: the lower the better. """ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause import warnings from numbers import Real import numpy as np from sklearn.exceptions import UndefinedMetricWarning from sklearn.utils._array_api import ( _average, _find_matching_floating_dtype, _median, get_namespace, get_namespace_and_device, size, ) from sklearn.utils._array_api import _xlogy as xlogy from sklearn.utils._param_validation import Interval, StrOptions, validate_params from sklearn.utils.stats import _weighted_percentile from sklearn.utils.validation import ( _check_sample_weight, _num_samples, check_array, check_consistent_length, column_or_1d, ) __ALL__ = [ "max_error", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "mean_absolute_percentage_error", "mean_pinball_loss", "r2_score", "root_mean_squared_log_error", "root_mean_squared_error", "explained_variance_score", "mean_tweedie_deviance", "mean_poisson_deviance", "mean_gamma_deviance", "d2_tweedie_score", "d2_pinball_score", "d2_absolute_error_score", ] def _check_reg_targets( y_true, y_pred, sample_weight, multioutput, dtype="numeric", xp=None ): """Check that y_true, y_pred and sample_weight belong to the same regression task. To reduce redundancy when calling `_find_matching_floating_dtype`, please use `_check_reg_targets_with_floating_dtype` instead. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,) or None Sample weights. multioutput : array-like or string in ['raw_values', uniform_average', 'variance_weighted'] or None None is accepted due to backward compatibility of r2_score(). dtype : str or list, default="numeric" the dtype argument passed to check_array. xp : module, default=None Precomputed array namespace module. When passed, typically from a caller that has already performed inspection of its own inputs, skips array namespace inspection. Returns ------- type_true : one of {'continuous', continuous-multioutput'} The type of the true target data, as output by 'utils.multiclass.type_of_target'. y_true : array-like of shape (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,) or None Sample weights. multioutput : array-like of shape (n_outputs) or string in ['raw_values', uniform_average', 'variance_weighted'] or None Custom output weights if ``multioutput`` is array-like or just the corresponding argument if ``multioutput`` is a correct keyword. """ xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp) check_consistent_length(y_true, y_pred, sample_weight) y_true = check_array(y_true, ensure_2d=False, dtype=dtype) y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, y_true, dtype=dtype) if y_true.ndim == 1: y_true = xp.reshape(y_true, (-1, 1)) if y_pred.ndim == 1: y_pred = xp.reshape(y_pred, (-1, 1)) if y_true.shape[1] != y_pred.shape[1]: raise ValueError( "y_true and y_pred have different number of output ({0}!={1})".format( y_true.shape[1], y_pred.shape[1] ) ) n_outputs = y_true.shape[1] allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted") if isinstance(multioutput, str): if multioutput not in allowed_multioutput_str: raise ValueError( "Allowed 'multioutput' string values are {}. " "You provided multioutput={!r}".format( allowed_multioutput_str, multioutput ) ) elif multioutput is not None: multioutput = check_array(multioutput, ensure_2d=False) if n_outputs == 1: raise ValueError("Custom weights are useful only in multi-output cases.") elif n_outputs != multioutput.shape[0]: raise ValueError( "There must be equally many custom weights " f"({multioutput.shape[0]}) as outputs ({n_outputs})." ) y_type = "continuous" if n_outputs == 1 else "continuous-multioutput" return y_type, y_true, y_pred, sample_weight, multioutput def _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=None ): """Ensures y_true, y_pred, and sample_weight correspond to same regression task. Extends `_check_reg_targets` by automatically selecting a suitable floating-point data type for inputs using `_find_matching_floating_dtype`. Use this private method only when converting inputs to array API-compatibles. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,) multioutput : array-like or string in ['raw_values', 'uniform_average', \ 'variance_weighted'] or None None is accepted due to backward compatibility of r2_score(). xp : module, default=None Precomputed array namespace module. When passed, typically from a caller that has already performed inspection of its own inputs, skips array namespace inspection. Returns ------- type_true : one of {'continuous', 'continuous-multioutput'} The type of the true target data, as output by 'utils.multiclass.type_of_target'. y_true : array-like of shape (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : array-like of shape (n_outputs) or string in ['raw_values', \ 'uniform_average', 'variance_weighted'] or None Custom output weights if ``multioutput`` is array-like or just the corresponding argument if ``multioutput`` is a correct keyword. """ dtype_name = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp) y_type, y_true, y_pred, sample_weight, multioutput = _check_reg_targets( y_true, y_pred, sample_weight, multioutput, dtype=dtype_name, xp=xp ) return y_type, y_true, y_pred, sample_weight, multioutput @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], }, prefer_skip_nested_validation=True, ) def mean_absolute_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """Mean absolute error regression loss. The mean absolute error is a non-negative floating point value, where best value is 0.0. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or array of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. MAE output is non-negative floating point. The best value is 0.0. Examples -------- >>> from sklearn.metrics import mean_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_absolute_error(y_true, y_pred) 0.5 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> mean_absolute_error(y_true, y_pred) 0.75 >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85... """ xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) output_errors = _average( xp.abs(y_pred - y_true), weights=sample_weight, axis=0, xp=xp ) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to _average: uniform mean multioutput = None # Average across the outputs (if needed). # The second call to `_average` should always return # a scalar array that we convert to a Python float to # consistently return the same eager evaluated value. # Therefore, `axis=None`. mean_absolute_error = _average(output_errors, weights=multioutput, xp=xp) return float(mean_absolute_error) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "alpha": [Interval(Real, 0, 1, closed="both")], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], }, prefer_skip_nested_validation=True, ) def mean_pinball_loss( y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average" ): """Pinball loss for quantile regression. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. alpha : float, slope of the pinball loss, default=0.5, This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`, `alpha=0.95` is minimized by estimators of the 95th percentile. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. The pinball loss output is a non-negative floating point. The best value is 0.0. Examples -------- >>> from sklearn.metrics import mean_pinball_loss >>> y_true = [1, 2, 3] >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1) 0.03... >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1) 0.3... >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9) 0.3... >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9) 0.03... >>> mean_pinball_loss(y_true, y_true, alpha=0.1) 0.0 >>> mean_pinball_loss(y_true, y_true, alpha=0.9) 0.0 """ xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) diff = y_true - y_pred sign = xp.astype(diff >= 0, diff.dtype) loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff output_errors = _average(loss, weights=sample_weight, axis=0, xp=xp) if isinstance(multioutput, str) and multioutput == "raw_values": return output_errors if isinstance(multioutput, str) and multioutput == "uniform_average": # pass None as weights to _average: uniform mean multioutput = None # Average across the outputs (if needed). # The second call to `_average` should always return # a scalar array that we convert to a Python float to # consistently return the same eager evaluated value. # Therefore, `axis=None`. return float(_average(output_errors, weights=multioutput, xp=xp)) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], }, prefer_skip_nested_validation=True, ) def mean_absolute_percentage_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """Mean absolute percentage error (MAPE) regression loss. Note that we are not using the common "percentage" definition: the percentage in the range [0, 100] is converted to a relative value in the range [0, 1] by dividing by 100. Thus, an error of 200% corresponds to a relative error of 2. Read more in the :ref:`User Guide `. .. versionadded:: 0.24 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like Defines aggregating of multiple output values. Array-like value defines weights used to average errors. If input is list then the shape must be (n_outputs,). 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute percentage error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. MAPE output is non-negative floating point. The best value is 0.0. But note that bad predictions can lead to arbitrarily large MAPE values, especially if some `y_true` values are very close to zero. Note that we return a large value instead of `inf` when `y_true` is zero. Examples -------- >>> from sklearn.metrics import mean_absolute_percentage_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_absolute_percentage_error(y_true, y_pred) 0.3273... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> mean_absolute_percentage_error(y_true, y_pred) 0.5515... >>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.6198... >>> # the value when some element of the y_true is zero is arbitrarily high because >>> # of the division by epsilon >>> y_true = [1., 0., 2.4, 7.] >>> y_pred = [1.2, 0.1, 2.4, 8.] >>> mean_absolute_percentage_error(y_true, y_pred) 112589990684262.48 """ xp, _, device_ = get_namespace_and_device( y_true, y_pred, sample_weight, multioutput ) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=y_true.dtype, device=device_) y_true_abs = xp.abs(y_true) mape = xp.abs(y_pred - y_true) / xp.maximum(y_true_abs, epsilon) output_errors = _average(mape, weights=sample_weight, axis=0, xp=xp) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to _average: uniform mean multioutput = None # Average across the outputs (if needed). # The second call to `_average` should always return # a scalar array that we convert to a Python float to # consistently return the same eager evaluated value. # Therefore, `axis=None`. mean_absolute_percentage_error = _average(output_errors, weights=multioutput, xp=xp) return float(mean_absolute_percentage_error) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], }, prefer_skip_nested_validation=True, ) def mean_squared_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", ): """Mean squared error regression loss. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or array of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import mean_squared_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_squared_error(y_true, y_pred) 0.375 >>> y_true = [[0.5, 1],[-1, 1],[7, -6]] >>> y_pred = [[0, 2],[-1, 2],[8, -5]] >>> mean_squared_error(y_true, y_pred) 0.708... >>> mean_squared_error(y_true, y_pred, multioutput='raw_values') array([0.41666667, 1. ]) >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.825... """ xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) output_errors = _average( (y_true - y_pred) ** 2, axis=0, weights=sample_weight, xp=xp ) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to _average: uniform mean multioutput = None # Average across the outputs (if needed). # The second call to `_average` should always return # a scalar array that we convert to a Python float to # consistently return the same eager evaluated value. # Therefore, `axis=None`. mean_squared_error = _average(output_errors, weights=multioutput, xp=xp) return float(mean_squared_error) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], }, prefer_skip_nested_validation=True, ) def root_mean_squared_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """Root mean squared error regression loss. Read more in the :ref:`User Guide `. .. versionadded:: 1.4 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import root_mean_squared_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> root_mean_squared_error(y_true, y_pred) 0.612... >>> y_true = [[0.5, 1],[-1, 1],[7, -6]] >>> y_pred = [[0, 2],[-1, 2],[8, -5]] >>> root_mean_squared_error(y_true, y_pred) 0.822... """ xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) output_errors = xp.sqrt( mean_squared_error( y_true, y_pred, sample_weight=sample_weight, multioutput="raw_values" ) ) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to _average: uniform mean multioutput = None # Average across the outputs (if needed). # The second call to `_average` should always return # a scalar array that we convert to a Python float to # consistently return the same eager evaluated value. # Therefore, `axis=None`. root_mean_squared_error = _average(output_errors, weights=multioutput, xp=xp) return float(root_mean_squared_error) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], }, prefer_skip_nested_validation=True, ) def mean_squared_log_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", ): """Mean squared logarithmic error regression loss. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors when the input is of multioutput format. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import mean_squared_log_error >>> y_true = [3, 5, 2.5, 7] >>> y_pred = [2.5, 5, 4, 8] >>> mean_squared_log_error(y_true, y_pred) 0.039... >>> y_true = [[0.5, 1], [1, 2], [7, 6]] >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]] >>> mean_squared_log_error(y_true, y_pred) 0.044... >>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values') array([0.00462428, 0.08377444]) >>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.060... """ xp, _ = get_namespace(y_true, y_pred) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) if xp.any(y_true <= -1) or xp.any(y_pred <= -1): raise ValueError( "Mean Squared Logarithmic Error cannot be used when " "targets contain values less than or equal to -1." ) return mean_squared_error( xp.log1p(y_true), xp.log1p(y_pred), sample_weight=sample_weight, multioutput=multioutput, ) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], }, prefer_skip_nested_validation=True, ) def root_mean_squared_log_error( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """Root mean squared logarithmic error regression loss. Read more in the :ref:`User Guide `. .. versionadded:: 1.4 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors when the input is of multioutput format. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import root_mean_squared_log_error >>> y_true = [3, 5, 2.5, 7] >>> y_pred = [2.5, 5, 4, 8] >>> root_mean_squared_log_error(y_true, y_pred) 0.199... """ xp, _ = get_namespace(y_true, y_pred) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) if xp.any(y_true <= -1) or xp.any(y_pred <= -1): raise ValueError( "Root Mean Squared Logarithmic Error cannot be used when " "targets contain values less than or equal to -1." ) return root_mean_squared_error( xp.log1p(y_true), xp.log1p(y_pred), sample_weight=sample_weight, multioutput=multioutput, ) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], "sample_weight": ["array-like", None], }, prefer_skip_nested_validation=True, ) def median_absolute_error( y_true, y_pred, *, multioutput="uniform_average", sample_weight=None ): """Median absolute error regression loss. Median absolute error output is non-negative floating point. The best value is 0.0. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. sample_weight : array-like of shape (n_samples,), default=None Sample weights. .. versionadded:: 0.24 Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. Examples -------- >>> from sklearn.metrics import median_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> median_absolute_error(y_true, y_pred) 0.5 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> median_absolute_error(y_true, y_pred) 0.75 >>> median_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85 """ xp, _ = get_namespace(y_true, y_pred, multioutput, sample_weight) _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets( y_true, y_pred, sample_weight, multioutput ) if sample_weight is None: output_errors = _median(xp.abs(y_pred - y_true), axis=0) else: output_errors = _weighted_percentile( xp.abs(y_pred - y_true), sample_weight=sample_weight, average=True ) if isinstance(multioutput, str): if multioutput == "raw_values": return output_errors elif multioutput == "uniform_average": # pass None as weights to np.average: uniform mean multioutput = None return float(_average(output_errors, weights=multioutput, xp=xp)) def _assemble_r2_explained_variance( numerator, denominator, n_outputs, multioutput, force_finite, xp, device ): """Common part used by explained variance score and :math:`R^2` score.""" dtype = numerator.dtype nonzero_denominator = denominator != 0 if not force_finite: # Standard formula, that may lead to NaN or -Inf output_scores = 1 - (numerator / denominator) else: nonzero_numerator = numerator != 0 # Default = Zero Numerator = perfect predictions. Set to 1.0 # (note: even if denominator is zero, thus avoiding NaN scores) output_scores = xp.ones([n_outputs], device=device, dtype=dtype) # Non-zero Numerator and Non-zero Denominator: use the formula valid_score = nonzero_denominator & nonzero_numerator output_scores[valid_score] = 1 - ( numerator[valid_score] / denominator[valid_score] ) # Non-zero Numerator and Zero Denominator: # arbitrary set to 0.0 to avoid -inf scores output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0 if isinstance(multioutput, str): if multioutput == "raw_values": # return scores individually return output_scores elif multioutput == "uniform_average": # pass None as weights to _average: uniform mean avg_weights = None elif multioutput == "variance_weighted": avg_weights = denominator if not xp.any(nonzero_denominator): # All weights are zero, _average would raise a ZeroDiv error. # This only happens when all y are constant (or 1-element long) # Since weights are all equal, fall back to uniform weights. avg_weights = None else: avg_weights = multioutput result = _average(output_scores, weights=avg_weights, xp=xp) if size(result) == 1: return float(result) return result @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [ StrOptions({"raw_values", "uniform_average", "variance_weighted"}), "array-like", ], "force_finite": ["boolean"], }, prefer_skip_nested_validation=True, ) def explained_variance_score( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", force_finite=True, ): """Explained variance regression score function. Best possible score is 1.0, lower values are worse. In the particular case when ``y_true`` is constant, the explained variance score is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf`` (imperfect predictions). To prevent such non-finite numbers to pollute higher-level experiments such as a grid search cross-validation, by default these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect predictions) respectively. If ``force_finite`` is set to ``False``, this score falls back on the original :math:`R^2` definition. .. note:: The Explained Variance score is similar to the :func:`R^2 score `, but the former does not account for systematic offsets in the prediction (such as the intercept in linear models, i.e. different intercepts give the same Explained Variance score). Most often the :func:`R^2 score ` should be preferred. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \ array-like of shape (n_outputs,), default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. force_finite : bool, default=True Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant data should be replaced with real numbers (``1.0`` if prediction is perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting for hyperparameters' search procedures (e.g. grid search cross-validation). .. versionadded:: 1.1 Returns ------- score : float or ndarray of floats The explained variance or ndarray if 'multioutput' is 'raw_values'. See Also -------- r2_score : Similar metric, but accounting for systematic offsets in prediction. Notes ----- This is not a symmetric function. Examples -------- >>> from sklearn.metrics import explained_variance_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> explained_variance_score(y_true, y_pred) 0.957... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average') 0.983... >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2] >>> explained_variance_score(y_true, y_pred) 1.0 >>> explained_variance_score(y_true, y_pred, force_finite=False) nan >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2 + 1e-8] >>> explained_variance_score(y_true, y_pred) 0.0 >>> explained_variance_score(y_true, y_pred, force_finite=False) -inf """ xp, _, device = get_namespace_and_device(y_true, y_pred, sample_weight, multioutput) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) y_diff_avg = _average(y_true - y_pred, weights=sample_weight, axis=0, xp=xp) numerator = _average( (y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0, xp=xp ) y_true_avg = _average(y_true, weights=sample_weight, axis=0, xp=xp) denominator = _average( (y_true - y_true_avg) ** 2, weights=sample_weight, axis=0, xp=xp ) return _assemble_r2_explained_variance( numerator=numerator, denominator=denominator, n_outputs=y_true.shape[1], multioutput=multioutput, force_finite=force_finite, xp=xp, device=device, ) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [ StrOptions({"raw_values", "uniform_average", "variance_weighted"}), "array-like", None, ], "force_finite": ["boolean"], }, prefer_skip_nested_validation=True, ) def r2_score( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", force_finite=True, ): """:math:`R^2` (coefficient of determination) regression score function. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). In the general case when the true y is non-constant, a constant model that always predicts the average y disregarding the input features would get a :math:`R^2` score of 0.0. In the particular case when ``y_true`` is constant, the :math:`R^2` score is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf`` (imperfect predictions). To prevent such non-finite numbers to pollute higher-level experiments such as a grid search cross-validation, by default these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect predictions) respectively. You can set ``force_finite`` to ``False`` to prevent this fix from happening. Note: when the prediction residuals have zero mean, the :math:`R^2` score is identical to the :func:`Explained Variance score `. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \ array-like of shape (n_outputs,) or None, default='uniform_average' Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. Default is "uniform_average". 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. .. versionchanged:: 0.19 Default value of multioutput is 'uniform_average'. force_finite : bool, default=True Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant data should be replaced with real numbers (``1.0`` if prediction is perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting for hyperparameters' search procedures (e.g. grid search cross-validation). .. versionadded:: 1.1 Returns ------- z : float or ndarray of floats The :math:`R^2` score or ndarray of scores if 'multioutput' is 'raw_values'. Notes ----- This is not a symmetric function. Unlike most other scores, :math:`R^2` score may be negative (it need not actually be the square of a quantity R). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. References ---------- .. [1] `Wikipedia entry on the Coefficient of determination `_ Examples -------- >>> from sklearn.metrics import r2_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> r2_score(y_true, y_pred) 0.948... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> r2_score(y_true, y_pred, ... multioutput='variance_weighted') 0.938... >>> y_true = [1, 2, 3] >>> y_pred = [1, 2, 3] >>> r2_score(y_true, y_pred) 1.0 >>> y_true = [1, 2, 3] >>> y_pred = [2, 2, 2] >>> r2_score(y_true, y_pred) 0.0 >>> y_true = [1, 2, 3] >>> y_pred = [3, 2, 1] >>> r2_score(y_true, y_pred) -3.0 >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2] >>> r2_score(y_true, y_pred) 1.0 >>> r2_score(y_true, y_pred, force_finite=False) nan >>> y_true = [-2, -2, -2] >>> y_pred = [-2, -2, -2 + 1e-8] >>> r2_score(y_true, y_pred) 0.0 >>> r2_score(y_true, y_pred, force_finite=False) -inf """ xp, _, device_ = get_namespace_and_device( y_true, y_pred, sample_weight, multioutput ) _, y_true, y_pred, sample_weight, multioutput = ( _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput, xp=xp ) ) if _num_samples(y_pred) < 2: msg = "R^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) return float("nan") if sample_weight is not None: sample_weight = column_or_1d(sample_weight) weight = sample_weight[:, None] else: weight = 1.0 numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0) denominator = xp.sum( weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)) ** 2, axis=0, ) return _assemble_r2_explained_variance( numerator=numerator, denominator=denominator, n_outputs=y_true.shape[1], multioutput=multioutput, force_finite=force_finite, xp=xp, device=device_, ) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], }, prefer_skip_nested_validation=True, ) def max_error(y_true, y_pred): """ The max_error metric calculates the maximum residual error. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. Returns ------- max_error : float A positive floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import max_error >>> y_true = [3, 2, 7, 1] >>> y_pred = [4, 2, 7, 1] >>> max_error(y_true, y_pred) 1.0 """ xp, _ = get_namespace(y_true, y_pred) y_type, y_true, y_pred, _, _ = _check_reg_targets( y_true, y_pred, sample_weight=None, multioutput=None, xp=xp ) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in max_error") return float(xp.max(xp.abs(y_true - y_pred))) def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power): """Mean Tweedie deviance regression loss.""" xp, _ = get_namespace(y_true, y_pred) p = power if p < 0: # 'Extreme stable', y any real number, y_pred > 0 dev = 2 * ( xp.pow( xp.where(y_true > 0, y_true, 0.0), 2 - p, ) / ((1 - p) * (2 - p)) - y_true * xp.pow(y_pred, 1 - p) / (1 - p) + xp.pow(y_pred, 2 - p) / (2 - p) ) elif p == 0: # Normal distribution, y and y_pred any real number dev = (y_true - y_pred) ** 2 elif p == 1: # Poisson distribution dev = 2 * (xlogy(y_true, y_true / y_pred) - y_true + y_pred) elif p == 2: # Gamma distribution dev = 2 * (xp.log(y_pred / y_true) + y_true / y_pred - 1) else: dev = 2 * ( xp.pow(y_true, 2 - p) / ((1 - p) * (2 - p)) - y_true * xp.pow(y_pred, 1 - p) / (1 - p) + xp.pow(y_pred, 2 - p) / (2 - p) ) return float(_average(dev, weights=sample_weight, xp=xp)) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "power": [ Interval(Real, None, 0, closed="right"), Interval(Real, 1, None, closed="left"), ], }, prefer_skip_nested_validation=True, ) def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0): """Mean Tweedie deviance regression loss. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. power : float, default=0 Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_tweedie_deviance >>> y_true = [2, 0, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_tweedie_deviance(y_true, y_pred, power=1) 1.4260... """ xp, _ = get_namespace(y_true, y_pred) y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput=None, xp=xp ) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in mean_tweedie_deviance") if sample_weight is not None: sample_weight = column_or_1d(sample_weight) sample_weight = sample_weight[:, np.newaxis] message = f"Mean Tweedie deviance error with power={power} can only be used on " if power < 0: # 'Extreme stable', y any real number, y_pred > 0 if xp.any(y_pred <= 0): raise ValueError(message + "strictly positive y_pred.") elif power == 0: # Normal, y and y_pred can be any real number pass elif 1 <= power < 2: # Poisson and compound Poisson distribution, y >= 0, y_pred > 0 if xp.any(y_true < 0) or xp.any(y_pred <= 0): raise ValueError(message + "non-negative y and strictly positive y_pred.") elif power >= 2: # Gamma and Extreme stable distribution, y and y_pred > 0 if xp.any(y_true <= 0) or xp.any(y_pred <= 0): raise ValueError(message + "strictly positive y and y_pred.") else: # pragma: nocover # Unreachable statement raise ValueError return _mean_tweedie_deviance( y_true, y_pred, sample_weight=sample_weight, power=power ) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], }, prefer_skip_nested_validation=True, ) def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None): """Mean Poisson deviance regression loss. Poisson deviance is equivalent to the Tweedie deviance with the power parameter `power=1`. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. Requires y_true >= 0. y_pred : array-like of shape (n_samples,) Estimated target values. Requires y_pred > 0. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_poisson_deviance >>> y_true = [2, 0, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_poisson_deviance(y_true, y_pred) 1.4260... """ return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=1) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], }, prefer_skip_nested_validation=True, ) def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None): """Mean Gamma deviance regression loss. Gamma deviance is equivalent to the Tweedie deviance with the power parameter `power=2`. It is invariant to scaling of the target variable, and measures relative errors. Read more in the :ref:`User Guide `. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. Requires y_true > 0. y_pred : array-like of shape (n_samples,) Estimated target values. Requires y_pred > 0. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_gamma_deviance >>> y_true = [2, 0.5, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_gamma_deviance(y_true, y_pred) 1.0568... """ return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=2) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "power": [ Interval(Real, None, 0, closed="right"), Interval(Real, 1, None, closed="left"), ], }, prefer_skip_nested_validation=True, ) def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0): """ :math:`D^2` regression score function, fraction of Tweedie deviance explained. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A model that always uses the empirical mean of `y_true` as constant prediction, disregarding the input features, gets a D^2 score of 0.0. Read more in the :ref:`User Guide `. .. versionadded:: 1.0 Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. power : float, default=0 Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to r2_score. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0. Returns ------- z : float The D^2 score. Notes ----- This is not a symmetric function. Like R^2, D^2 score may be negative (it need not actually be the square of a quantity D). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. References ---------- .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. Wainwright. "Statistical Learning with Sparsity: The Lasso and Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ Examples -------- >>> from sklearn.metrics import d2_tweedie_score >>> y_true = [0.5, 1, 2.5, 7] >>> y_pred = [1, 1, 5, 3.5] >>> d2_tweedie_score(y_true, y_pred) 0.285... >>> d2_tweedie_score(y_true, y_pred, power=1) 0.487... >>> d2_tweedie_score(y_true, y_pred, power=2) 0.630... >>> d2_tweedie_score(y_true, y_true, power=2) 1.0 """ xp, _ = get_namespace(y_true, y_pred) y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype( y_true, y_pred, sample_weight, multioutput=None, xp=xp ) if y_type == "continuous-multioutput": raise ValueError("Multioutput not supported in d2_tweedie_score") if _num_samples(y_pred) < 2: msg = "D^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) return float("nan") y_true, y_pred = xp.squeeze(y_true, axis=1), xp.squeeze(y_pred, axis=1) numerator = mean_tweedie_deviance( y_true, y_pred, sample_weight=sample_weight, power=power ) y_avg = _average(y_true, weights=sample_weight, xp=xp) denominator = _mean_tweedie_deviance( y_true, y_avg, sample_weight=sample_weight, power=power ) return 1 - numerator / denominator @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "alpha": [Interval(Real, 0, 1, closed="both")], "multioutput": [ StrOptions({"raw_values", "uniform_average"}), "array-like", ], }, prefer_skip_nested_validation=True, ) def d2_pinball_score( y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average" ): """ :math:`D^2` regression score function, fraction of pinball loss explained. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A model that always uses the empirical alpha-quantile of `y_true` as constant prediction, disregarding the input features, gets a :math:`D^2` score of 0.0. Read more in the :ref:`User Guide `. .. versionadded:: 1.1 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. alpha : float, default=0.5 Slope of the pinball deviance. It determines the quantile level alpha for which the pinball deviance and also D2 are optimal. The default `alpha=0.5` is equivalent to `d2_absolute_error_score`. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. Returns ------- score : float or ndarray of floats The :math:`D^2` score with a pinball deviance or ndarray of scores if `multioutput='raw_values'`. Notes ----- Like :math:`R^2`, :math:`D^2` score may be negative (it need not actually be the square of a quantity D). This metric is not well-defined for a single point and will return a NaN value if n_samples is less than two. This metric is not a built-in :ref:`string name scorer ` to use along with tools such as :class:`~sklearn.model_selection.GridSearchCV` or :class:`~sklearn.model_selection.RandomizedSearchCV`. Instead, you can :ref:`create a scorer object ` using :func:`~sklearn.metrics.make_scorer`, with any desired parameter settings. See the `Examples` section for details. References ---------- .. [1] Eq. (7) of `Koenker, Roger; Machado, José A. F. (1999). "Goodness of Fit and Related Inference Processes for Quantile Regression" `_ .. [2] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. Wainwright. "Statistical Learning with Sparsity: The Lasso and Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ Examples -------- >>> from sklearn.metrics import d2_pinball_score >>> y_true = [1, 2, 3] >>> y_pred = [1, 3, 3] >>> d2_pinball_score(y_true, y_pred) 0.5 >>> d2_pinball_score(y_true, y_pred, alpha=0.9) 0.772... >>> d2_pinball_score(y_true, y_pred, alpha=0.1) -1.045... >>> d2_pinball_score(y_true, y_true, alpha=0.1) 1.0 Creating a scorer object with :func:`~sklearn.metrics.make_scorer`: >>> import numpy as np >>> from sklearn.metrics import make_scorer >>> from sklearn.model_selection import GridSearchCV >>> from sklearn.linear_model import QuantileRegressor >>> X = np.array([[1], [2], [3], [4]]) >>> y = np.array([2.5, 0.0, 2, 8]) >>> pinball_95_scorer = make_scorer(d2_pinball_score, alpha=0.95) >>> grid = GridSearchCV( ... QuantileRegressor(quantile=0.95), ... param_grid={"fit_intercept": [True, False]}, ... scoring=pinball_95_scorer, ... cv=2, ... ).fit(X, y) >>> grid.best_params_ {'fit_intercept': True} """ _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets( y_true, y_pred, sample_weight, multioutput ) if _num_samples(y_pred) < 2: msg = "D^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) return float("nan") numerator = mean_pinball_loss( y_true, y_pred, sample_weight=sample_weight, alpha=alpha, multioutput="raw_values", ) if sample_weight is None: y_quantile = np.tile( np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1) ) else: y_quantile = np.tile( _weighted_percentile( y_true, sample_weight=sample_weight, percentile_rank=alpha * 100 ), (len(y_true), 1), ) denominator = mean_pinball_loss( y_true, y_quantile, sample_weight=sample_weight, alpha=alpha, multioutput="raw_values", ) nonzero_numerator = numerator != 0 nonzero_denominator = denominator != 0 valid_score = nonzero_numerator & nonzero_denominator output_scores = np.ones(y_true.shape[1]) output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0 if isinstance(multioutput, str): if multioutput == "raw_values": # return scores individually return output_scores else: # multioutput == "uniform_average" # passing None as weights to np.average results in uniform mean avg_weights = None else: avg_weights = multioutput return float(np.average(output_scores, weights=avg_weights)) @validate_params( { "y_true": ["array-like"], "y_pred": ["array-like"], "sample_weight": ["array-like", None], "multioutput": [ StrOptions({"raw_values", "uniform_average"}), "array-like", ], }, prefer_skip_nested_validation=True, ) def d2_absolute_error_score( y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" ): """ :math:`D^2` regression score function, fraction of absolute error explained. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A model that always uses the empirical median of `y_true` as constant prediction, disregarding the input features, gets a :math:`D^2` score of 0.0. Read more in the :ref:`User Guide `. .. versionadded:: 1.1 Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,), default='uniform_average' Defines aggregating of multiple output values. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. Returns ------- score : float or ndarray of floats The :math:`D^2` score with an absolute error deviance or ndarray of scores if 'multioutput' is 'raw_values'. Notes ----- Like :math:`R^2`, :math:`D^2` score may be negative (it need not actually be the square of a quantity D). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. References ---------- .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. Wainwright. "Statistical Learning with Sparsity: The Lasso and Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ Examples -------- >>> from sklearn.metrics import d2_absolute_error_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> d2_absolute_error_score(y_true, y_pred) 0.764... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> d2_absolute_error_score(y_true, y_pred, multioutput='uniform_average') 0.691... >>> d2_absolute_error_score(y_true, y_pred, multioutput='raw_values') array([0.8125 , 0.57142857]) >>> y_true = [1, 2, 3] >>> y_pred = [1, 2, 3] >>> d2_absolute_error_score(y_true, y_pred) 1.0 >>> y_true = [1, 2, 3] >>> y_pred = [2, 2, 2] >>> d2_absolute_error_score(y_true, y_pred) 0.0 >>> y_true = [1, 2, 3] >>> y_pred = [3, 2, 1] >>> d2_absolute_error_score(y_true, y_pred) -1.0 """ return d2_pinball_score( y_true, y_pred, sample_weight=sample_weight, alpha=0.5, multioutput=multioutput )