"""Metrics to assess performance on regression task.

Functions named as ``*_score`` return a scalar value to maximize: the higher
the better.

Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
the lower the better.
"""

# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

import warnings
from numbers import Real

import numpy as np

from sklearn.exceptions import UndefinedMetricWarning
from sklearn.utils._array_api import (
    _average,
    _find_matching_floating_dtype,
    _median,
    get_namespace,
    get_namespace_and_device,
    size,
)
from sklearn.utils._array_api import _xlogy as xlogy
from sklearn.utils._param_validation import Interval, StrOptions, validate_params
from sklearn.utils.stats import _weighted_percentile
from sklearn.utils.validation import (
    _check_sample_weight,
    _num_samples,
    check_array,
    check_consistent_length,
    column_or_1d,
)

__ALL__ = [
    "max_error",
    "mean_absolute_error",
    "mean_squared_error",
    "mean_squared_log_error",
    "median_absolute_error",
    "mean_absolute_percentage_error",
    "mean_pinball_loss",
    "r2_score",
    "root_mean_squared_log_error",
    "root_mean_squared_error",
    "explained_variance_score",
    "mean_tweedie_deviance",
    "mean_poisson_deviance",
    "mean_gamma_deviance",
    "d2_tweedie_score",
    "d2_pinball_score",
    "d2_absolute_error_score",
]


def _check_reg_targets(
    y_true, y_pred, sample_weight, multioutput, dtype="numeric", xp=None
):
    """Check that y_true, y_pred and sample_weight belong to the same regression task.

    To reduce redundancy when calling `_find_matching_floating_dtype`,
    please use `_check_reg_targets_with_floating_dtype` instead.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,) or None
        Sample weights.

    multioutput : array-like or string in ['raw_values', uniform_average',
        'variance_weighted'] or None
        None is accepted due to backward compatibility of r2_score().

    dtype : str or list, default="numeric"
        the dtype argument passed to check_array.

    xp : module, default=None
        Precomputed array namespace module. When passed, typically from a caller
        that has already performed inspection of its own inputs, skips array
        namespace inspection.

    Returns
    -------
    type_true : one of {'continuous', continuous-multioutput'}
        The type of the true target data, as output by
        'utils.multiclass.type_of_target'.

    y_true : array-like of shape (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,) or None
        Sample weights.

    multioutput : array-like of shape (n_outputs) or string in ['raw_values',
        uniform_average', 'variance_weighted'] or None
        Custom output weights if ``multioutput`` is array-like or
        just the corresponding argument if ``multioutput`` is a
        correct keyword.
    """
    xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp)

    check_consistent_length(y_true, y_pred, sample_weight)
    y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
    y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
    if sample_weight is not None:
        sample_weight = _check_sample_weight(sample_weight, y_true, dtype=dtype)

    if y_true.ndim == 1:
        y_true = xp.reshape(y_true, (-1, 1))

    if y_pred.ndim == 1:
        y_pred = xp.reshape(y_pred, (-1, 1))

    if y_true.shape[1] != y_pred.shape[1]:
        raise ValueError(
            "y_true and y_pred have different number of output ({0}!={1})".format(
                y_true.shape[1], y_pred.shape[1]
            )
        )

    n_outputs = y_true.shape[1]
    allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted")
    if isinstance(multioutput, str):
        if multioutput not in allowed_multioutput_str:
            raise ValueError(
                "Allowed 'multioutput' string values are {}. "
                "You provided multioutput={!r}".format(
                    allowed_multioutput_str, multioutput
                )
            )
    elif multioutput is not None:
        multioutput = check_array(multioutput, ensure_2d=False)
        if n_outputs == 1:
            raise ValueError("Custom weights are useful only in multi-output cases.")
        elif n_outputs != multioutput.shape[0]:
            raise ValueError(
                "There must be equally many custom weights "
                f"({multioutput.shape[0]}) as outputs ({n_outputs})."
            )
    y_type = "continuous" if n_outputs == 1 else "continuous-multioutput"

    return y_type, y_true, y_pred, sample_weight, multioutput


def _check_reg_targets_with_floating_dtype(
    y_true, y_pred, sample_weight, multioutput, xp=None
):
    """Ensures y_true, y_pred, and sample_weight correspond to same regression task.

    Extends `_check_reg_targets` by automatically selecting a suitable floating-point
    data type for inputs using `_find_matching_floating_dtype`.

    Use this private method only when converting inputs to array API-compatibles.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,)

    multioutput : array-like or string in ['raw_values', 'uniform_average', \
        'variance_weighted'] or None
        None is accepted due to backward compatibility of r2_score().

    xp : module, default=None
        Precomputed array namespace module. When passed, typically from a caller
        that has already performed inspection of its own inputs, skips array
        namespace inspection.

    Returns
    -------
    type_true : one of {'continuous', 'continuous-multioutput'}
        The type of the true target data, as output by
        'utils.multiclass.type_of_target'.

    y_true : array-like of shape (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : array-like of shape (n_outputs) or string in ['raw_values', \
        'uniform_average', 'variance_weighted'] or None
        Custom output weights if ``multioutput`` is array-like or
        just the corresponding argument if ``multioutput`` is a
        correct keyword.
    """
    dtype_name = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp)

    y_type, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
        y_true, y_pred, sample_weight, multioutput, dtype=dtype_name, xp=xp
    )

    return y_type, y_true, y_pred, sample_weight, multioutput


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_absolute_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Mean absolute error regression loss.

    The mean absolute error is a non-negative floating point value, where best value
    is 0.0. Read more in the :ref:`User Guide <mean_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'}  or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or array of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        MAE output is non-negative floating point. The best value is 0.0.

    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_absolute_error(y_true, y_pred)
    0.5
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> mean_absolute_error(y_true, y_pred)
    0.75
    >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    array([0.5, 1. ])
    >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.85...
    """
    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    output_errors = _average(
        xp.abs(y_pred - y_true), weights=sample_weight, axis=0, xp=xp
    )
    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    mean_absolute_error = _average(output_errors, weights=multioutput, xp=xp)

    return float(mean_absolute_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "alpha": [Interval(Real, 0, 1, closed="both")],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_pinball_loss(
    y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
):
    """Pinball loss for quantile regression.

    Read more in the :ref:`User Guide <pinball_loss>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    alpha : float, slope of the pinball loss, default=0.5,
        This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,
        `alpha=0.95` is minimized by estimators of the 95th percentile.

    multioutput : {'raw_values', 'uniform_average'}  or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        The pinball loss output is a non-negative floating point. The best
        value is 0.0.

    Examples
    --------
    >>> from sklearn.metrics import mean_pinball_loss
    >>> y_true = [1, 2, 3]
    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)
    0.03...
    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)
    0.3...
    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)
    0.3...
    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)
    0.03...
    >>> mean_pinball_loss(y_true, y_true, alpha=0.1)
    0.0
    >>> mean_pinball_loss(y_true, y_true, alpha=0.9)
    0.0
    """
    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    diff = y_true - y_pred
    sign = xp.astype(diff >= 0, diff.dtype)
    loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff
    output_errors = _average(loss, weights=sample_weight, axis=0, xp=xp)

    if isinstance(multioutput, str) and multioutput == "raw_values":
        return output_errors

    if isinstance(multioutput, str) and multioutput == "uniform_average":
        # pass None as weights to _average: uniform mean
        multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    return float(_average(output_errors, weights=multioutput, xp=xp))


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_absolute_percentage_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Mean absolute percentage error (MAPE) regression loss.

    Note that we are not using the common "percentage" definition: the percentage
    in the range [0, 100] is converted to a relative value in the range [0, 1]
    by dividing by 100. Thus, an error of 200% corresponds to a relative error of 2.

    Read more in the :ref:`User Guide <mean_absolute_percentage_error>`.

    .. versionadded:: 0.24

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.
        If input is list then the shape must be (n_outputs,).

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute percentage error
        is returned for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        MAPE output is non-negative floating point. The best value is 0.0.
        But note that bad predictions can lead to arbitrarily large
        MAPE values, especially if some `y_true` values are very close to zero.
        Note that we return a large value instead of `inf` when `y_true` is zero.

    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_percentage_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_absolute_percentage_error(y_true, y_pred)
    0.3273...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> mean_absolute_percentage_error(y_true, y_pred)
    0.5515...
    >>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.6198...
    >>> # the value when some element of the y_true is zero is arbitrarily high because
    >>> # of the division by epsilon
    >>> y_true = [1., 0., 2.4, 7.]
    >>> y_pred = [1.2, 0.1, 2.4, 8.]
    >>> mean_absolute_percentage_error(y_true, y_pred)
    112589990684262.48
    """
    xp, _, device_ = get_namespace_and_device(
        y_true, y_pred, sample_weight, multioutput
    )
    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )
    epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=y_true.dtype, device=device_)
    y_true_abs = xp.abs(y_true)
    mape = xp.abs(y_pred - y_true) / xp.maximum(y_true_abs, epsilon)
    output_errors = _average(mape, weights=sample_weight, axis=0, xp=xp)
    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    mean_absolute_percentage_error = _average(output_errors, weights=multioutput, xp=xp)

    return float(mean_absolute_percentage_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_squared_error(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
):
    """Mean squared error regression loss.

    Read more in the :ref:`User Guide <mean_squared_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or array of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import mean_squared_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_squared_error(y_true, y_pred)
    0.375
    >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
    >>> y_pred = [[0, 2],[-1, 2],[8, -5]]
    >>> mean_squared_error(y_true, y_pred)
    0.708...
    >>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
    array([0.41666667, 1.        ])
    >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.825...
    """
    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)
    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )
    output_errors = _average(
        (y_true - y_pred) ** 2, axis=0, weights=sample_weight, xp=xp
    )

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    mean_squared_error = _average(output_errors, weights=multioutput, xp=xp)

    return float(mean_squared_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def root_mean_squared_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Root mean squared error regression loss.

    Read more in the :ref:`User Guide <mean_squared_error>`.

    .. versionadded:: 1.4

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import root_mean_squared_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> root_mean_squared_error(y_true, y_pred)
    0.612...
    >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
    >>> y_pred = [[0, 2],[-1, 2],[8, -5]]
    >>> root_mean_squared_error(y_true, y_pred)
    0.822...
    """

    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)

    output_errors = xp.sqrt(
        mean_squared_error(
            y_true, y_pred, sample_weight=sample_weight, multioutput="raw_values"
        )
    )

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    root_mean_squared_error = _average(output_errors, weights=multioutput, xp=xp)

    return float(root_mean_squared_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_squared_log_error(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
):
    """Mean squared logarithmic error regression loss.

    Read more in the :ref:`User Guide <mean_squared_log_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'

        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors when the input is of multioutput
            format.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import mean_squared_log_error
    >>> y_true = [3, 5, 2.5, 7]
    >>> y_pred = [2.5, 5, 4, 8]
    >>> mean_squared_log_error(y_true, y_pred)
    0.039...
    >>> y_true = [[0.5, 1], [1, 2], [7, 6]]
    >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
    >>> mean_squared_log_error(y_true, y_pred)
    0.044...
    >>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
    array([0.00462428, 0.08377444])
    >>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.060...
    """
    xp, _ = get_namespace(y_true, y_pred)

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
        raise ValueError(
            "Mean Squared Logarithmic Error cannot be used when "
            "targets contain values less than or equal to -1."
        )

    return mean_squared_error(
        xp.log1p(y_true),
        xp.log1p(y_pred),
        sample_weight=sample_weight,
        multioutput=multioutput,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def root_mean_squared_log_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Root mean squared logarithmic error regression loss.

    Read more in the :ref:`User Guide <mean_squared_log_error>`.

    .. versionadded:: 1.4

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'

        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors when the input is of multioutput
            format.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import root_mean_squared_log_error
    >>> y_true = [3, 5, 2.5, 7]
    >>> y_pred = [2.5, 5, 4, 8]
    >>> root_mean_squared_log_error(y_true, y_pred)
    0.199...
    """
    xp, _ = get_namespace(y_true, y_pred)

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
        raise ValueError(
            "Root Mean Squared Logarithmic Error cannot be used when "
            "targets contain values less than or equal to -1."
        )

    return root_mean_squared_error(
        xp.log1p(y_true),
        xp.log1p(y_pred),
        sample_weight=sample_weight,
        multioutput=multioutput,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
        "sample_weight": ["array-like", None],
    },
    prefer_skip_nested_validation=True,
)
def median_absolute_error(
    y_true, y_pred, *, multioutput="uniform_average", sample_weight=None
):
    """Median absolute error regression loss.

    Median absolute error output is non-negative floating point. The best value
    is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values. Array-like value defines
        weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

        .. versionadded:: 0.24

    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

    Examples
    --------
    >>> from sklearn.metrics import median_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> median_absolute_error(y_true, y_pred)
    0.5
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> median_absolute_error(y_true, y_pred)
    0.75
    >>> median_absolute_error(y_true, y_pred, multioutput='raw_values')
    array([0.5, 1. ])
    >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.85
    """
    xp, _ = get_namespace(y_true, y_pred, multioutput, sample_weight)
    _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
        y_true, y_pred, sample_weight, multioutput
    )
    if sample_weight is None:
        output_errors = _median(xp.abs(y_pred - y_true), axis=0)
    else:
        output_errors = _weighted_percentile(
            xp.abs(y_pred - y_true), sample_weight=sample_weight, average=True
        )
    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to np.average: uniform mean
            multioutput = None

    return float(_average(output_errors, weights=multioutput, xp=xp))


def _assemble_r2_explained_variance(
    numerator, denominator, n_outputs, multioutput, force_finite, xp, device
):
    """Common part used by explained variance score and :math:`R^2` score."""
    dtype = numerator.dtype

    nonzero_denominator = denominator != 0

    if not force_finite:
        # Standard formula, that may lead to NaN or -Inf
        output_scores = 1 - (numerator / denominator)
    else:
        nonzero_numerator = numerator != 0
        # Default = Zero Numerator = perfect predictions. Set to 1.0
        # (note: even if denominator is zero, thus avoiding NaN scores)
        output_scores = xp.ones([n_outputs], device=device, dtype=dtype)
        # Non-zero Numerator and Non-zero Denominator: use the formula
        valid_score = nonzero_denominator & nonzero_numerator

        output_scores[valid_score] = 1 - (
            numerator[valid_score] / denominator[valid_score]
        )

        # Non-zero Numerator and Zero Denominator:
        # arbitrary set to 0.0 to avoid -inf scores
        output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            # return scores individually
            return output_scores
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            avg_weights = None
        elif multioutput == "variance_weighted":
            avg_weights = denominator
            if not xp.any(nonzero_denominator):
                # All weights are zero, _average would raise a ZeroDiv error.
                # This only happens when all y are constant (or 1-element long)
                # Since weights are all equal, fall back to uniform weights.
                avg_weights = None
    else:
        avg_weights = multioutput

    result = _average(output_scores, weights=avg_weights, xp=xp)
    if size(result) == 1:
        return float(result)
    return result


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average", "variance_weighted"}),
            "array-like",
        ],
        "force_finite": ["boolean"],
    },
    prefer_skip_nested_validation=True,
)
def explained_variance_score(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
    force_finite=True,
):
    """Explained variance regression score function.

    Best possible score is 1.0, lower values are worse.

    In the particular case when ``y_true`` is constant, the explained variance
    score is not finite: it is either ``NaN`` (perfect predictions) or
    ``-Inf`` (imperfect predictions). To prevent such non-finite numbers to
    pollute higher-level experiments such as a grid search cross-validation,
    by default these cases are replaced with 1.0 (perfect predictions) or 0.0
    (imperfect predictions) respectively. If ``force_finite``
    is set to ``False``, this score falls back on the original :math:`R^2`
    definition.

    .. note::
       The Explained Variance score is similar to the :func:`R^2 score <r2_score>`,
       but the former does not account for systematic offsets in the prediction
       (such as the intercept in linear models, i.e. different intercepts give
       the same Explained Variance score). Most often the :func:`R^2 score
       <r2_score>` should be preferred.

    Read more in the :ref:`User Guide <explained_variance_score>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \
            array-like of shape (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output scores.
        Array-like value defines weights used to average scores.

        'raw_values' :
            Returns a full set of scores in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

        'variance_weighted' :
            Scores of all outputs are averaged, weighted by the variances
            of each individual output.

    force_finite : bool, default=True
        Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
        data should be replaced with real numbers (``1.0`` if prediction is
        perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
        for hyperparameters' search procedures (e.g. grid search
        cross-validation).

        .. versionadded:: 1.1

    Returns
    -------
    score : float or ndarray of floats
        The explained variance or ndarray if 'multioutput' is 'raw_values'.

    See Also
    --------
    r2_score :
        Similar metric, but accounting for systematic offsets in
        prediction.

    Notes
    -----
    This is not a symmetric function.

    Examples
    --------
    >>> from sklearn.metrics import explained_variance_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> explained_variance_score(y_true, y_pred)
    0.957...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')
    0.983...
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2]
    >>> explained_variance_score(y_true, y_pred)
    1.0
    >>> explained_variance_score(y_true, y_pred, force_finite=False)
    nan
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2 + 1e-8]
    >>> explained_variance_score(y_true, y_pred)
    0.0
    >>> explained_variance_score(y_true, y_pred, force_finite=False)
    -inf
    """
    xp, _, device = get_namespace_and_device(y_true, y_pred, sample_weight, multioutput)

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    y_diff_avg = _average(y_true - y_pred, weights=sample_weight, axis=0, xp=xp)
    numerator = _average(
        (y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0, xp=xp
    )

    y_true_avg = _average(y_true, weights=sample_weight, axis=0, xp=xp)
    denominator = _average(
        (y_true - y_true_avg) ** 2, weights=sample_weight, axis=0, xp=xp
    )

    return _assemble_r2_explained_variance(
        numerator=numerator,
        denominator=denominator,
        n_outputs=y_true.shape[1],
        multioutput=multioutput,
        force_finite=force_finite,
        xp=xp,
        device=device,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average", "variance_weighted"}),
            "array-like",
            None,
        ],
        "force_finite": ["boolean"],
    },
    prefer_skip_nested_validation=True,
)
def r2_score(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
    force_finite=True,
):
    """:math:`R^2` (coefficient of determination) regression score function.

    Best possible score is 1.0 and it can be negative (because the
    model can be arbitrarily worse). In the general case when the true y is
    non-constant, a constant model that always predicts the average y
    disregarding the input features would get a :math:`R^2` score of 0.0.

    In the particular case when ``y_true`` is constant, the :math:`R^2` score
    is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf``
    (imperfect predictions). To prevent such non-finite numbers to pollute
    higher-level experiments such as a grid search cross-validation, by default
    these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect
    predictions) respectively. You can set ``force_finite`` to ``False`` to
    prevent this fix from happening.

    Note: when the prediction residuals have zero mean, the :math:`R^2` score
    is identical to the
    :func:`Explained Variance score <explained_variance_score>`.

    Read more in the :ref:`User Guide <r2_score>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \
            array-like of shape (n_outputs,) or None, default='uniform_average'

        Defines aggregating of multiple output scores.
        Array-like value defines weights used to average scores.
        Default is "uniform_average".

        'raw_values' :
            Returns a full set of scores in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

        'variance_weighted' :
            Scores of all outputs are averaged, weighted by the variances
            of each individual output.

        .. versionchanged:: 0.19
            Default value of multioutput is 'uniform_average'.

    force_finite : bool, default=True
        Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
        data should be replaced with real numbers (``1.0`` if prediction is
        perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
        for hyperparameters' search procedures (e.g. grid search
        cross-validation).

        .. versionadded:: 1.1

    Returns
    -------
    z : float or ndarray of floats
        The :math:`R^2` score or ndarray of scores if 'multioutput' is
        'raw_values'.

    Notes
    -----
    This is not a symmetric function.

    Unlike most other scores, :math:`R^2` score may be negative (it need not
    actually be the square of a quantity R).

    This metric is not well-defined for single samples and will return a NaN
    value if n_samples is less than two.

    References
    ----------
    .. [1] `Wikipedia entry on the Coefficient of determination
            <https://en.wikipedia.org/wiki/Coefficient_of_determination>`_

    Examples
    --------
    >>> from sklearn.metrics import r2_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> r2_score(y_true, y_pred)
    0.948...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> r2_score(y_true, y_pred,
    ...          multioutput='variance_weighted')
    0.938...
    >>> y_true = [1, 2, 3]
    >>> y_pred = [1, 2, 3]
    >>> r2_score(y_true, y_pred)
    1.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [2, 2, 2]
    >>> r2_score(y_true, y_pred)
    0.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [3, 2, 1]
    >>> r2_score(y_true, y_pred)
    -3.0
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2]
    >>> r2_score(y_true, y_pred)
    1.0
    >>> r2_score(y_true, y_pred, force_finite=False)
    nan
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2 + 1e-8]
    >>> r2_score(y_true, y_pred)
    0.0
    >>> r2_score(y_true, y_pred, force_finite=False)
    -inf
    """
    xp, _, device_ = get_namespace_and_device(
        y_true, y_pred, sample_weight, multioutput
    )

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    if _num_samples(y_pred) < 2:
        msg = "R^2 score is not well-defined with less than two samples."
        warnings.warn(msg, UndefinedMetricWarning)
        return float("nan")

    if sample_weight is not None:
        sample_weight = column_or_1d(sample_weight)
        weight = sample_weight[:, None]
    else:
        weight = 1.0

    numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0)
    denominator = xp.sum(
        weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)) ** 2,
        axis=0,
    )

    return _assemble_r2_explained_variance(
        numerator=numerator,
        denominator=denominator,
        n_outputs=y_true.shape[1],
        multioutput=multioutput,
        force_finite=force_finite,
        xp=xp,
        device=device_,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
    },
    prefer_skip_nested_validation=True,
)
def max_error(y_true, y_pred):
    """
    The max_error metric calculates the maximum residual error.

    Read more in the :ref:`User Guide <max_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,)
        Estimated target values.

    Returns
    -------
    max_error : float
        A positive floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import max_error
    >>> y_true = [3, 2, 7, 1]
    >>> y_pred = [4, 2, 7, 1]
    >>> max_error(y_true, y_pred)
    1.0
    """
    xp, _ = get_namespace(y_true, y_pred)
    y_type, y_true, y_pred, _, _ = _check_reg_targets(
        y_true, y_pred, sample_weight=None, multioutput=None, xp=xp
    )
    if y_type == "continuous-multioutput":
        raise ValueError("Multioutput not supported in max_error")
    return float(xp.max(xp.abs(y_true - y_pred)))


def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power):
    """Mean Tweedie deviance regression loss."""
    xp, _ = get_namespace(y_true, y_pred)
    p = power
    if p < 0:
        # 'Extreme stable', y any real number, y_pred > 0
        dev = 2 * (
            xp.pow(
                xp.where(y_true > 0, y_true, 0.0),
                2 - p,
            )
            / ((1 - p) * (2 - p))
            - y_true * xp.pow(y_pred, 1 - p) / (1 - p)
            + xp.pow(y_pred, 2 - p) / (2 - p)
        )
    elif p == 0:
        # Normal distribution, y and y_pred any real number
        dev = (y_true - y_pred) ** 2
    elif p == 1:
        # Poisson distribution
        dev = 2 * (xlogy(y_true, y_true / y_pred) - y_true + y_pred)
    elif p == 2:
        # Gamma distribution
        dev = 2 * (xp.log(y_pred / y_true) + y_true / y_pred - 1)
    else:
        dev = 2 * (
            xp.pow(y_true, 2 - p) / ((1 - p) * (2 - p))
            - y_true * xp.pow(y_pred, 1 - p) / (1 - p)
            + xp.pow(y_pred, 2 - p) / (2 - p)
        )
    return float(_average(dev, weights=sample_weight, xp=xp))


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "power": [
            Interval(Real, None, 0, closed="right"),
            Interval(Real, 1, None, closed="left"),
        ],
    },
    prefer_skip_nested_validation=True,
)
def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
    """Mean Tweedie deviance regression loss.

    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    power : float, default=0
        Tweedie power parameter. Either power <= 0 or power >= 1.

        The higher `p` the less weight is given to extreme
        deviations between true and predicted targets.

        - power < 0: Extreme stable distribution. Requires: y_pred > 0.
        - power = 0 : Normal distribution, output corresponds to
          mean_squared_error. y_true and y_pred can be any real numbers.
        - power = 1 : Poisson distribution. Requires: y_true >= 0 and
          y_pred > 0.
        - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
          and y_pred > 0.
        - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
        - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
          and y_pred > 0.
        - otherwise : Positive stable distribution. Requires: y_true > 0
          and y_pred > 0.

    Returns
    -------
    loss : float
        A non-negative floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import mean_tweedie_deviance
    >>> y_true = [2, 0, 1, 4]
    >>> y_pred = [0.5, 0.5, 2., 2.]
    >>> mean_tweedie_deviance(y_true, y_pred, power=1)
    1.4260...
    """
    xp, _ = get_namespace(y_true, y_pred)
    y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype(
        y_true, y_pred, sample_weight, multioutput=None, xp=xp
    )
    if y_type == "continuous-multioutput":
        raise ValueError("Multioutput not supported in mean_tweedie_deviance")

    if sample_weight is not None:
        sample_weight = column_or_1d(sample_weight)
        sample_weight = sample_weight[:, np.newaxis]

    message = f"Mean Tweedie deviance error with power={power} can only be used on "
    if power < 0:
        # 'Extreme stable', y any real number, y_pred > 0
        if xp.any(y_pred <= 0):
            raise ValueError(message + "strictly positive y_pred.")
    elif power == 0:
        # Normal, y and y_pred can be any real number
        pass
    elif 1 <= power < 2:
        # Poisson and compound Poisson distribution, y >= 0, y_pred > 0
        if xp.any(y_true < 0) or xp.any(y_pred <= 0):
            raise ValueError(message + "non-negative y and strictly positive y_pred.")
    elif power >= 2:
        # Gamma and Extreme stable distribution, y and y_pred > 0
        if xp.any(y_true <= 0) or xp.any(y_pred <= 0):
            raise ValueError(message + "strictly positive y and y_pred.")
    else:  # pragma: nocover
        # Unreachable statement
        raise ValueError

    return _mean_tweedie_deviance(
        y_true, y_pred, sample_weight=sample_weight, power=power
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
    },
    prefer_skip_nested_validation=True,
)
def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):
    """Mean Poisson deviance regression loss.

    Poisson deviance is equivalent to the Tweedie deviance with
    the power parameter `power=1`.

    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values. Requires y_true >= 0.

    y_pred : array-like of shape (n_samples,)
        Estimated target values. Requires y_pred > 0.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    loss : float
        A non-negative floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import mean_poisson_deviance
    >>> y_true = [2, 0, 1, 4]
    >>> y_pred = [0.5, 0.5, 2., 2.]
    >>> mean_poisson_deviance(y_true, y_pred)
    1.4260...
    """
    return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=1)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
    },
    prefer_skip_nested_validation=True,
)
def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):
    """Mean Gamma deviance regression loss.

    Gamma deviance is equivalent to the Tweedie deviance with
    the power parameter `power=2`. It is invariant to scaling of
    the target variable, and measures relative errors.

    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values. Requires y_true > 0.

    y_pred : array-like of shape (n_samples,)
        Estimated target values. Requires y_pred > 0.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    loss : float
        A non-negative floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import mean_gamma_deviance
    >>> y_true = [2, 0.5, 1, 4]
    >>> y_pred = [0.5, 0.5, 2., 2.]
    >>> mean_gamma_deviance(y_true, y_pred)
    1.0568...
    """
    return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=2)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "power": [
            Interval(Real, None, 0, closed="right"),
            Interval(Real, 1, None, closed="left"),
        ],
    },
    prefer_skip_nested_validation=True,
)
def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0):
    """
    :math:`D^2` regression score function, fraction of Tweedie deviance explained.

    Best possible score is 1.0 and it can be negative (because the model can be
    arbitrarily worse). A model that always uses the empirical mean of `y_true` as
    constant prediction, disregarding the input features, gets a D^2 score of 0.0.

    Read more in the :ref:`User Guide <d2_score>`.

    .. versionadded:: 1.0

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    power : float, default=0
        Tweedie power parameter. Either power <= 0 or power >= 1.

        The higher `p` the less weight is given to extreme
        deviations between true and predicted targets.

        - power < 0: Extreme stable distribution. Requires: y_pred > 0.
        - power = 0 : Normal distribution, output corresponds to r2_score.
          y_true and y_pred can be any real numbers.
        - power = 1 : Poisson distribution. Requires: y_true >= 0 and
          y_pred > 0.
        - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
          and y_pred > 0.
        - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
        - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
          and y_pred > 0.
        - otherwise : Positive stable distribution. Requires: y_true > 0
          and y_pred > 0.

    Returns
    -------
    z : float
        The D^2 score.

    Notes
    -----
    This is not a symmetric function.

    Like R^2, D^2 score may be negative (it need not actually be the square of
    a quantity D).

    This metric is not well-defined for single samples and will return a NaN
    value if n_samples is less than two.

    References
    ----------
    .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
           Wainwright. "Statistical Learning with Sparsity: The Lasso and
           Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/

    Examples
    --------
    >>> from sklearn.metrics import d2_tweedie_score
    >>> y_true = [0.5, 1, 2.5, 7]
    >>> y_pred = [1, 1, 5, 3.5]
    >>> d2_tweedie_score(y_true, y_pred)
    0.285...
    >>> d2_tweedie_score(y_true, y_pred, power=1)
    0.487...
    >>> d2_tweedie_score(y_true, y_pred, power=2)
    0.630...
    >>> d2_tweedie_score(y_true, y_true, power=2)
    1.0
    """
    xp, _ = get_namespace(y_true, y_pred)

    y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype(
        y_true, y_pred, sample_weight, multioutput=None, xp=xp
    )
    if y_type == "continuous-multioutput":
        raise ValueError("Multioutput not supported in d2_tweedie_score")

    if _num_samples(y_pred) < 2:
        msg = "D^2 score is not well-defined with less than two samples."
        warnings.warn(msg, UndefinedMetricWarning)
        return float("nan")

    y_true, y_pred = xp.squeeze(y_true, axis=1), xp.squeeze(y_pred, axis=1)
    numerator = mean_tweedie_deviance(
        y_true, y_pred, sample_weight=sample_weight, power=power
    )

    y_avg = _average(y_true, weights=sample_weight, xp=xp)
    denominator = _mean_tweedie_deviance(
        y_true, y_avg, sample_weight=sample_weight, power=power
    )

    return 1 - numerator / denominator


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "alpha": [Interval(Real, 0, 1, closed="both")],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average"}),
            "array-like",
        ],
    },
    prefer_skip_nested_validation=True,
)
def d2_pinball_score(
    y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
):
    """
    :math:`D^2` regression score function, fraction of pinball loss explained.

    Best possible score is 1.0 and it can be negative (because the model can be
    arbitrarily worse). A model that always uses the empirical alpha-quantile of
    `y_true` as constant prediction, disregarding the input features,
    gets a :math:`D^2` score of 0.0.

    Read more in the :ref:`User Guide <d2_score>`.

    .. versionadded:: 1.1

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    alpha : float, default=0.5
        Slope of the pinball deviance. It determines the quantile level alpha
        for which the pinball deviance and also D2 are optimal.
        The default `alpha=0.5` is equivalent to `d2_absolute_error_score`.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average scores.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

    Returns
    -------
    score : float or ndarray of floats
        The :math:`D^2` score with a pinball deviance
        or ndarray of scores if `multioutput='raw_values'`.

    Notes
    -----
    Like :math:`R^2`, :math:`D^2` score may be negative
    (it need not actually be the square of a quantity D).

    This metric is not well-defined for a single point and will return a NaN
    value if n_samples is less than two.

    This metric is not a built-in :ref:`string name scorer
    <scoring_string_names>` to use along with tools such as
    :class:`~sklearn.model_selection.GridSearchCV` or
    :class:`~sklearn.model_selection.RandomizedSearchCV`.
    Instead, you can :ref:`create a scorer object <scoring_adapt_metric>` using
    :func:`~sklearn.metrics.make_scorer`, with any desired parameter settings.
    See the `Examples` section for details.

     References
    ----------
    .. [1] Eq. (7) of `Koenker, Roger; Machado, José A. F. (1999).
           "Goodness of Fit and Related Inference Processes for Quantile Regression"
           <https://doi.org/10.1080/01621459.1999.10473882>`_
    .. [2] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
           Wainwright. "Statistical Learning with Sparsity: The Lasso and
           Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/

    Examples
    --------
    >>> from sklearn.metrics import d2_pinball_score
    >>> y_true = [1, 2, 3]
    >>> y_pred = [1, 3, 3]
    >>> d2_pinball_score(y_true, y_pred)
    0.5
    >>> d2_pinball_score(y_true, y_pred, alpha=0.9)
    0.772...
    >>> d2_pinball_score(y_true, y_pred, alpha=0.1)
    -1.045...
    >>> d2_pinball_score(y_true, y_true, alpha=0.1)
    1.0

    Creating a scorer object with :func:`~sklearn.metrics.make_scorer`:

    >>> import numpy as np
    >>> from sklearn.metrics import make_scorer
    >>> from sklearn.model_selection import GridSearchCV
    >>> from sklearn.linear_model import QuantileRegressor
    >>> X = np.array([[1], [2], [3], [4]])
    >>> y = np.array([2.5, 0.0, 2, 8])
    >>> pinball_95_scorer = make_scorer(d2_pinball_score, alpha=0.95)
    >>> grid = GridSearchCV(
    ...     QuantileRegressor(quantile=0.95),
    ...     param_grid={"fit_intercept": [True, False]},
    ...     scoring=pinball_95_scorer,
    ...     cv=2,
    ... ).fit(X, y)
    >>> grid.best_params_
    {'fit_intercept': True}
    """
    _, y_true, y_pred, sample_weight, multioutput = _check_reg_targets(
        y_true, y_pred, sample_weight, multioutput
    )

    if _num_samples(y_pred) < 2:
        msg = "D^2 score is not well-defined with less than two samples."
        warnings.warn(msg, UndefinedMetricWarning)
        return float("nan")

    numerator = mean_pinball_loss(
        y_true,
        y_pred,
        sample_weight=sample_weight,
        alpha=alpha,
        multioutput="raw_values",
    )

    if sample_weight is None:
        y_quantile = np.tile(
            np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1)
        )
    else:
        y_quantile = np.tile(
            _weighted_percentile(
                y_true, sample_weight=sample_weight, percentile_rank=alpha * 100
            ),
            (len(y_true), 1),
        )

    denominator = mean_pinball_loss(
        y_true,
        y_quantile,
        sample_weight=sample_weight,
        alpha=alpha,
        multioutput="raw_values",
    )

    nonzero_numerator = numerator != 0
    nonzero_denominator = denominator != 0
    valid_score = nonzero_numerator & nonzero_denominator
    output_scores = np.ones(y_true.shape[1])

    output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score])
    output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            # return scores individually
            return output_scores
        else:  # multioutput == "uniform_average"
            # passing None as weights to np.average results in uniform mean
            avg_weights = None
    else:
        avg_weights = multioutput

    return float(np.average(output_scores, weights=avg_weights))


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average"}),
            "array-like",
        ],
    },
    prefer_skip_nested_validation=True,
)
def d2_absolute_error_score(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """
    :math:`D^2` regression score function, fraction of absolute error explained.

    Best possible score is 1.0 and it can be negative (because the model can be
    arbitrarily worse). A model that always uses the empirical median of `y_true`
    as constant prediction, disregarding the input features,
    gets a :math:`D^2` score of 0.0.

    Read more in the :ref:`User Guide <d2_score>`.

    .. versionadded:: 1.1

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average scores.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

    Returns
    -------
    score : float or ndarray of floats
        The :math:`D^2` score with an absolute error deviance
        or ndarray of scores if 'multioutput' is 'raw_values'.

    Notes
    -----
    Like :math:`R^2`, :math:`D^2` score may be negative
    (it need not actually be the square of a quantity D).

    This metric is not well-defined for single samples and will return a NaN
    value if n_samples is less than two.

     References
    ----------
    .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
           Wainwright. "Statistical Learning with Sparsity: The Lasso and
           Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/

    Examples
    --------
    >>> from sklearn.metrics import d2_absolute_error_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> d2_absolute_error_score(y_true, y_pred)
    0.764...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> d2_absolute_error_score(y_true, y_pred, multioutput='uniform_average')
    0.691...
    >>> d2_absolute_error_score(y_true, y_pred, multioutput='raw_values')
    array([0.8125    , 0.57142857])
    >>> y_true = [1, 2, 3]
    >>> y_pred = [1, 2, 3]
    >>> d2_absolute_error_score(y_true, y_pred)
    1.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [2, 2, 2]
    >>> d2_absolute_error_score(y_true, y_pred)
    0.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [3, 2, 1]
    >>> d2_absolute_error_score(y_true, y_pred)
    -1.0
    """
    return d2_pinball_score(
        y_true, y_pred, sample_weight=sample_weight, alpha=0.5, multioutput=multioutput
    )