Source code for maxframe.learn.metrics._classification

# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List

import numpy as np

from ... import opcodes
from ... import tensor as mt
from ...core import ENTITY_TYPE, ExecutableTuple, OutputType
from ...core.operator import Operator
from ...serialization.serializables import (
    AnyField,
    BoolField,
    FieldTypes,
    Float64Field,
    Int64Field,
    KeyField,
    ListField,
    StringField,
)
from ...tensor.core import TensorOrder
from ...typing_ import EntityType
from ..core import LearnOperatorMixin
from ..utils import check_array, check_consistent_length
from ._check_targets import _check_targets


def _weighted_sum(sample_score, sample_weight, normalize=False):
    if normalize:
        return mt.average(sample_score, weights=sample_weight)
    elif sample_weight is not None:
        return mt.dot(sample_score, sample_weight)
    else:
        return sample_score.sum()


class AccuracyScore(Operator, LearnOperatorMixin):
    _op_type_ = opcodes.ACCURACY_SCORE

    y_true = AnyField("y_true", default=None)
    y_pred = AnyField("y_pred", default=None)
    normalize = BoolField("normalize", default=None)
    sample_weight = AnyField("sample_weight", default=None)
    type_true = KeyField("type_true", default=None)

    @classmethod
    def _set_inputs(cls, op: "AccuracyScore", inputs: List[EntityType]):
        super()._set_inputs(op, inputs)
        inputs_iter = iter(op.inputs)
        if op.y_true is not None:
            op.y_true = next(inputs_iter)
        if op.y_pred is not None:
            op.y_pred = next(inputs_iter)
        if op.type_true is not None:
            op.type_true = next(inputs_iter)
        if isinstance(op.sample_weight, ENTITY_TYPE):
            op.sample_weight = next(inputs_iter)

    def __call__(self, y_true, y_pred):
        self._output_types = [OutputType.tensor]
        type_true, y_true, y_pred = _check_targets(y_true, y_pred)
        self.type_true = type_true
        inputs = [y_true, y_pred, type_true]
        if isinstance(self.sample_weight, ENTITY_TYPE):
            inputs.append(self.sample_weight)

        dtype = (
            np.dtype(float)
            if self.normalize
            else np.result_type(y_true.dtype, y_pred.dtype)
        )
        return self.new_tileable(
            inputs, dtype=dtype, shape=(), order=TensorOrder.C_ORDER
        )


[docs] def accuracy_score( y_true, y_pred, normalize=True, sample_weight=None, execute=False, session=None, run_kwargs=None, ): """Accuracy classification score. In multilabel classification, this function computes subset accuracy: the set of labels predicted for a sample must *exactly* match the corresponding set of labels in y_true. Read more in the :ref:`User Guide <accuracy_score>`. Parameters ---------- y_true : 1d array-like, or label indicator tensor / sparse tensor Ground truth (correct) labels. y_pred : 1d array-like, or label indicator tensor / sparse tensor Predicted labels, as returned by a classifier. normalize : bool, optional (default=True) If ``False``, return the number of correctly classified samples. Otherwise, return the fraction of correctly classified samples. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- score : float If ``normalize == True``, return the fraction of correctly classified samples (float), else returns the number of correctly classified samples (int). The best performance is 1 with ``normalize == True`` and the number of samples with ``normalize == False``. See also -------- jaccard_score, hamming_loss, zero_one_loss Notes ----- In binary and multiclass classification, this function is equal to the ``jaccard_score`` function. Examples -------- >>> from maxframe.learn.metrics import accuracy_score >>> y_pred = [0, 2, 1, 3] >>> y_true = [0, 1, 2, 3] >>> accuracy_score(y_true, y_pred).execute() 0.5 >>> accuracy_score(y_true, y_pred, normalize=False).execute() 2 In the multilabel case with binary label indicators: >>> import maxframe.tensor as mt >>> accuracy_score(mt.array([[0, 1], [1, 1]]), mt.ones((2, 2))).execute() 0.5 """ # Compute accuracy for each possible representation op = AccuracyScore( y_true=y_true, y_pred=y_pred, normalize=normalize, sample_weight=sample_weight ) score = op(y_true, y_pred) if not execute: return score return score.execute(session=session, **(run_kwargs or dict()))
class LogLoss(Operator, LearnOperatorMixin): _op_type_ = opcodes.LOG_LOSS y_true = AnyField("y_true") y_pred = AnyField("y_pred") eps = Float64Field("eps", default=1e-15) normalize = BoolField("normalize", default=True) sample_weight = AnyField("sample_weight", default=None) labels = AnyField("labels", default=None) @classmethod def _set_inputs(cls, op: "LogLoss", inputs: List[EntityType]): super()._set_inputs(op, inputs) inputs_iter = iter(op.inputs) op.y_true = next(inputs_iter) op.y_pred = next(inputs_iter) if isinstance(op.sample_weight, ENTITY_TYPE): op.sample_weight = next(inputs_iter) if isinstance(op.labels, ENTITY_TYPE): op.labels = next(inputs_iter) def __call__(self, y_true, y_pred, sample_weight=None, labels=None): self._output_types = [OutputType.tensor] self.sample_weight = sample_weight self.labels = labels inputs = [y_true, y_pred] if isinstance(self.sample_weight, ENTITY_TYPE): inputs.append(self.sample_weight) if isinstance(self.labels, ENTITY_TYPE): inputs.append(self.labels) dtype = ( np.dtype(float) if self.normalize else np.result_type(y_true.dtype, y_pred.dtype) ) return self.new_tileable( inputs, dtype=dtype, shape=(), order=TensorOrder.C_ORDER )
[docs] def log_loss( y_true, y_pred, *, eps=1e-15, normalize=True, sample_weight=None, labels=None, execute=False, session=None, run_kwargs=None, ): r"""Log loss, aka logistic loss or cross-entropy loss. This is the loss function used in (multinomial) logistic regression and extensions of it such as neural networks, defined as the negative log-likelihood of a logistic model that returns ``y_pred`` probabilities for its training data ``y_true``. The log loss is only defined for two or more labels. For a single sample with true label :math:`y \in \{0,1\}` and and a probability estimate :math:`p = \operatorname{Pr}(y = 1)`, the log loss is: .. math:: L_{\log}(y, p) = -(y \log (p) + (1 - y) \log (1 - p)) Read more in the :ref:`User Guide <log_loss>`. Parameters ---------- y_true : array-like or label indicator matrix Ground truth (correct) labels for n_samples samples. y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) Predicted probabilities, as returned by a classifier's predict_proba method. If ``y_pred.shape = (n_samples,)`` the probabilities provided are assumed to be that of the positive class. The labels in ``y_pred`` are assumed to be ordered alphabetically, as done by :class:`preprocessing.LabelBinarizer`. eps : float, default=1e-15 Log loss is undefined for p=0 or p=1, so probabilities are clipped to max(eps, min(1 - eps, p)). normalize : bool, default=True If true, return the mean loss per sample. Otherwise, return the sum of the per-sample losses. sample_weight : array-like of shape (n_samples,), default=None Sample weights. labels : array-like, default=None If not provided, labels will be inferred from y_true. If ``labels`` is ``None`` and ``y_pred`` has shape (n_samples,) the labels are assumed to be binary and are inferred from ``y_true``. Returns ------- loss : float Notes ----- The logarithm used is the natural logarithm (base-e). Examples -------- >>> from maxframe.learn.metrics import log_loss >>> log_loss(["spam", "ham", "ham", "spam"], ... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]]) 0.21616... References ---------- C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer, p. 209. """ if not isinstance(y_true, (ENTITY_TYPE, np.ndarray)): y_true = mt.array(y_true) if not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)): y_pred = mt.array(y_pred) if sample_weight is not None and not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)): sample_weight = mt.array(sample_weight) if labels is not None and not isinstance(labels, (ENTITY_TYPE, np.ndarray)): labels = mt.array(labels) y_pred = check_array(y_pred, ensure_2d=False) y_pred, y_true, sample_weight = check_consistent_length( y_pred, y_true, sample_weight ) op = LogLoss(eps=eps, normalize=normalize) res = op( y_true=y_true, y_pred=y_pred, sample_weight=sample_weight, labels=labels, ) if execute: return res.execute(session=session, **(run_kwargs or {})) return res
class MultiLabelConfusionMatrix(Operator, LearnOperatorMixin): _op_type_ = opcodes.MULTILABEL_CONFUSION_MATRIX y_true = KeyField("y_true", default=None) y_pred = KeyField("y_pred", default=None) sample_weight = AnyField("sample_weight", default=None) labels = AnyField("labels", default=None) samplewise = BoolField("samplewise", default=False) @classmethod def _set_inputs(cls, op: "MultiLabelConfusionMatrix", inputs: List[EntityType]): super()._set_inputs(op, inputs) inputs_iter = iter(op.inputs) if isinstance(op.y_true, ENTITY_TYPE): op.y_true = next(inputs_iter) if isinstance(op.y_pred, ENTITY_TYPE): op.y_pred = next(inputs_iter) if isinstance(op.sample_weight, ENTITY_TYPE): op.sample_weight = next(inputs_iter) if isinstance(op.labels, ENTITY_TYPE): op.labels = next(inputs_iter) def __call__(self, y_true, y_pred, sample_weight=None, labels=None): self._output_types = [OutputType.tensor] self.y_true = y_true self.y_pred = y_pred self.sample_weight = sample_weight self.labels = labels if not self.samplewise: tensor_size = np.nan else: tensor_size = y_true.shape[0] inputs = [y_true, y_pred, sample_weight, labels] inputs = [t for t in inputs if isinstance(t, ENTITY_TYPE)] return self.new_tileable(inputs, shape=(tensor_size, 2, 2), dtype=np.dtype(int))
[docs] def multilabel_confusion_matrix( y_true, y_pred, *, sample_weight=None, labels=None, samplewise=False, execute=False, session=None, run_kwargs=None, ): """ Compute a confusion matrix for each class or sample. Compute class-wise (default) or sample-wise (samplewise=True) multilabel confusion matrix to evaluate the accuracy of a classification, and output confusion matrices for each class or sample. In multilabel confusion matrix :math:`MCM`, the count of true negatives is :math:`MCM_{:,0,0}`, false negatives is :math:`MCM_{:,1,0}`, true positives is :math:`MCM_{:,1,1}` and false positives is :math:`MCM_{:,0,1}`. Multiclass data will be treated as if binarized under a one-vs-rest transformation. Returned confusion matrices will be in the order of sorted unique labels in the union of (y_true, y_pred). Read more in the :ref:`User Guide <multilabel_confusion_matrix>`. Parameters ---------- y_true : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \ (n_samples,) Ground truth (correct) target values. y_pred : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \ (n_samples,) Estimated targets as returned by a classifier. sample_weight : array-like of shape (n_samples,), default=None Sample weights. labels : array-like of shape (n_classes,), default=None A list of classes or column indices to select some (or to force inclusion of classes absent from the data). samplewise : bool, default=False In the multilabel case, this calculates a confusion matrix per sample. Returns ------- multi_confusion : ndarray of shape (n_outputs, 2, 2) A 2x2 confusion matrix corresponding to each output in the input. When calculating class-wise multi_confusion (default), then n_outputs = n_labels; when calculating sample-wise multi_confusion (samplewise=True), n_outputs = n_samples. If ``labels`` is defined, the results will be returned in the order specified in ``labels``, otherwise the results will be returned in sorted order by default. See Also -------- confusion_matrix : Compute confusion matrix to evaluate the accuracy of a classifier. Notes ----- The `multilabel_confusion_matrix` calculates class-wise or sample-wise multilabel confusion matrices, and in multiclass tasks, labels are binarized under a one-vs-rest way; while :func:`~sklearn.metrics.confusion_matrix` calculates one confusion matrix for confusion between every two classes. Examples -------- Multiclass case: >>> import maxframe.tensor as mt >>> from maxframe.learn.metrics import multilabel_confusion_matrix >>> y_true = ["cat", "ant", "cat", "cat", "ant", "bird"] >>> y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"] >>> multilabel_confusion_matrix(y_true, y_pred, ... labels=["ant", "bird", "cat"]).execute() array([[[3, 1], [0, 2]], <BLANKLINE> [[5, 0], [1, 0]], <BLANKLINE> [[2, 1], [1, 2]]]) Multilabel-indicator case not implemented yet. """ if not isinstance(y_true, (ENTITY_TYPE, np.ndarray)): y_true = mt.array(y_true) if not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)): y_pred = mt.array(y_pred) if sample_weight is not None and not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)): sample_weight = mt.array(sample_weight) if labels is not None and not isinstance(labels, (ENTITY_TYPE, np.ndarray)): labels = mt.array(labels) op = MultiLabelConfusionMatrix(samplewise=samplewise) res = op( y_true=y_true, y_pred=y_pred, sample_weight=sample_weight, labels=labels, ) if execute: return res.execute(session=session, **(run_kwargs or {})) return res
def _check_zero_division(zero_division): # pragma: no cover if isinstance(zero_division, str) and zero_division == "warn": return elif isinstance(zero_division, (int, float)) and zero_division in [0, 1]: return raise ValueError( "Got zero_division={0}." ' Must be one of ["warn", 0, 1]'.format(zero_division) ) class PrecisionRecallFScoreSupport(Operator, LearnOperatorMixin): _op_type_ = opcodes.PRECISION_RECALL_F_SCORE_SUPPORT y_true = KeyField("y_true", default=None) y_pred = KeyField("y_pred", default=None) beta = Float64Field("beta", default=1.0) labels = AnyField("labels", default=None) pos_label = Int64Field("pos_label", default=1) average = StringField("average", default=None) warn_for = ListField("warn_for", FieldTypes.string, default=None) sample_weight = KeyField("sample_weight", default=None) zero_division = AnyField("zero_division", default=None) @property def output_limit(self) -> int: return 3 if self.average else 4 @classmethod def _set_inputs(cls, op: "PrecisionRecallFScoreSupport", inputs: List[EntityType]): super()._set_inputs(op, inputs) inputs_iter = iter(op.inputs) if op.y_true is not None: op.y_true = next(inputs_iter) if op.y_pred is not None: op.y_pred = next(inputs_iter) if isinstance(op.sample_weight, ENTITY_TYPE): op.sample_weight = next(inputs_iter) if isinstance(op.labels, ENTITY_TYPE): op.labels = next(inputs_iter) def __call__(self, y_true, y_pred, sample_weight=None, labels=None): self._output_types = [OutputType.tensor] * 4 self.y_true = y_true self.y_pred = y_pred self.sample_weight = sample_weight self.labels = labels inputs = [y_true, y_pred, sample_weight, labels] inputs = [t for t in inputs if isinstance(t, ENTITY_TYPE)] if self.average: if self.average == "samples": tensor_shape = (y_true.shape[0],) else: tensor_shape = () else: tensor_shape = (np.nan,) kws = [{"dtype": np.dtype(float), "shape": tensor_shape}] * 3 if not self.average: kws.append({"dtype": np.dtype(int), "shape": tensor_shape}) return self.new_tileables(inputs, kws=kws)
[docs] def precision_recall_fscore_support( y_true, y_pred, *, beta=1.0, labels=None, pos_label=1, average=None, warn_for=("precision", "recall", "f-score"), sample_weight=None, zero_division="warn", execute=False, session=None, run_kwargs=None, ): """Compute precision, recall, F-measure and support for each class The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of true positives and ``fp`` the number of false positives. The precision is intuitively the ability of the classifier not to label as positive a sample that is negative. The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of true positives and ``fn`` the number of false negatives. The recall is intuitively the ability of the classifier to find all the positive samples. The F-beta score can be interpreted as a weighted harmonic mean of the precision and recall, where an F-beta score reaches its best value at 1 and worst score at 0. The F-beta score weights recall more than precision by a factor of ``beta``. ``beta == 1.0`` means recall and precision are equally important. The support is the number of occurrences of each class in ``y_true``. If ``pos_label is None`` and in binary classification, this function returns the average precision, recall and F-measure if ``average`` is one of ``'micro'``, ``'macro'``, ``'weighted'`` or ``'samples'``. Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`. Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) target values. y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. beta : float, 1.0 by default The strength of recall versus precision in the F-score. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, 1 by default The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : string, [None (default), 'binary', 'micro', 'macro', 'samples', \ 'weighted'] If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). warn_for : tuple or set, for internal use This determines which warnings will be made in the case that this function is being used to return only one of its metrics. sample_weight : array-like of shape (n_samples,), default=None Sample weights. zero_division : "warn", 0 or 1, default="warn" Sets the value to return when there is a zero division: - recall: when there are no positive labels - precision: when there are no positive predictions - f-score: both If set to "warn", this acts as 0, but warnings are also raised. Returns ------- precision : float (if average is not None) or array of float, shape =\ [n_unique_labels] recall : float (if average is not None) or array of float, , shape =\ [n_unique_labels] fbeta_score : float (if average is not None) or array of float, shape =\ [n_unique_labels] support : None (if average is not None) or array of int, shape =\ [n_unique_labels] The number of occurrences of each label in ``y_true``. References ---------- .. [1] `Wikipedia entry for the Precision and recall <https://en.wikipedia.org/wiki/Precision_and_recall>`_ .. [2] `Wikipedia entry for the F1-score <https://en.wikipedia.org/wiki/F1_score>`_ .. [3] `Discriminative Methods for Multi-labeled Classification Advances in Knowledge Discovery and Data Mining (2004), pp. 22-30 by Shantanu Godbole, Sunita Sarawagi <http://www.godbole.net/shantanu/pubs/multilabelsvm-pakdd04.pdf>`_ Examples -------- >>> import numpy as np >>> from maxframe.learn.metrics import precision_recall_fscore_support >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) >>> precision_recall_fscore_support(y_true, y_pred, average='macro') (0.22..., 0.33..., 0.26..., None) >>> precision_recall_fscore_support(y_true, y_pred, average='micro') (0.33..., 0.33..., 0.33..., None) >>> precision_recall_fscore_support(y_true, y_pred, average='weighted') (0.22..., 0.33..., 0.26..., None) It is possible to compute per-label precisions, recalls, F1-scores and supports instead of averaging: >>> precision_recall_fscore_support(y_true, y_pred, average=None, ... labels=['pig', 'dog', 'cat']) (array([0. , 0. , 0.66...]), array([0., 0., 1.]), array([0. , 0. , 0.8]), array([2, 2, 2])) Notes ----- When ``true positive + false positive == 0``, precision is undefined; When ``true positive + false negative == 0``, recall is undefined. In such cases, by default the metric will be set to 0, as will f-score, and ``UndefinedMetricWarning`` will be raised. This behavior can be modified with ``zero_division``. """ _check_zero_division(zero_division) if beta < 0: raise ValueError("beta should be >=0 in the F-beta score") if not isinstance(y_true, (ENTITY_TYPE, np.ndarray)): y_true = mt.array(y_true) if not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)): y_pred = mt.array(y_pred) if sample_weight is not None and not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)): sample_weight = mt.array(sample_weight) if labels is not None and not isinstance(labels, (ENTITY_TYPE, np.ndarray)): labels = mt.array(labels) op = PrecisionRecallFScoreSupport( beta=beta, pos_label=pos_label, average=average, warn_for=warn_for, zero_division=zero_division, ) res = ExecutableTuple( op(y_true, y_pred, sample_weight=sample_weight, labels=labels) ) if execute: res = res.execute(session=session, **(run_kwargs or {})) return res if op.output_limit == 4 else res + (None,)
[docs] def precision_score( y_true, y_pred, *, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn", execute=False, session=None, run_kwargs=None, ): """Compute the precision The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of true positives and ``fp`` the number of false positives. The precision is intuitively the ability of the classifier not to label as positive a sample that is negative. The best value is 1 and the worst value is 0. Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`. Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) target values. y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, 1 by default The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ 'weighted'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). sample_weight : array-like of shape (n_samples,), default=None Sample weights. zero_division : "warn", 0 or 1, default="warn" Sets the value to return when there is a zero division. If set to "warn", this acts as 0, but warnings are also raised. Returns ------- precision : float (if average is not None) or array of float, shape =\ [n_unique_labels] Precision of the positive class in binary classification or weighted average of the precision of each class for the multiclass task. See also -------- precision_recall_fscore_support, multilabel_confusion_matrix Examples -------- >>> from maxframe.learn.metrics import precision_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] >>> precision_score(y_true, y_pred, average='macro') 0.22... >>> precision_score(y_true, y_pred, average='micro') 0.33... >>> precision_score(y_true, y_pred, average='weighted') 0.22... >>> precision_score(y_true, y_pred, average=None) array([0.66..., 0. , 0. ]) >>> y_pred = [0, 0, 0, 0, 0, 0] >>> precision_score(y_true, y_pred, average=None) array([0.33..., 0. , 0. ]) >>> precision_score(y_true, y_pred, average=None, zero_division=1) array([0.33..., 1. , 1. ]) Notes ----- When ``true positive + false positive == 0``, precision returns 0 and raises ``UndefinedMetricWarning``. This behavior can be modified with ``zero_division``. """ p, _, _, _ = precision_recall_fscore_support( y_true, y_pred, labels=labels, pos_label=pos_label, average=average, warn_for=("precision",), sample_weight=sample_weight, zero_division=zero_division, execute=execute, session=session, run_kwargs=run_kwargs, ) return p
[docs] def recall_score( y_true, y_pred, *, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn", execute=False, session=None, run_kwargs=None, ): """Compute the recall The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of true positives and ``fn`` the number of false negatives. The recall is intuitively the ability of the classifier to find all the positive samples. The best value is 1 and the worst value is 0. Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`. Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) target values. y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, 1 by default The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ 'weighted'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). sample_weight : array-like of shape (n_samples,), default=None Sample weights. zero_division : "warn", 0 or 1, default="warn" Sets the value to return when there is a zero division. If set to "warn", this acts as 0, but warnings are also raised. Returns ------- recall : float (if average is not None) or array of float, shape =\ [n_unique_labels] Recall of the positive class in binary classification or weighted average of the recall of each class for the multiclass task. See also -------- precision_recall_fscore_support, balanced_accuracy_score, multilabel_confusion_matrix Examples -------- >>> from maxframe.learn.metrics import recall_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] >>> recall_score(y_true, y_pred, average='macro') 0.33... >>> recall_score(y_true, y_pred, average='micro') 0.33... >>> recall_score(y_true, y_pred, average='weighted') 0.33... >>> recall_score(y_true, y_pred, average=None) array([1., 0., 0.]) >>> y_true = [0, 0, 0, 0, 0, 0] >>> recall_score(y_true, y_pred, average=None) array([0.5, 0. , 0. ]) >>> recall_score(y_true, y_pred, average=None, zero_division=1) array([0.5, 1. , 1. ]) Notes ----- When ``true positive + false negative == 0``, recall returns 0 and raises ``UndefinedMetricWarning``. This behavior can be modified with ``zero_division``. """ _, r, _, _ = precision_recall_fscore_support( y_true, y_pred, labels=labels, pos_label=pos_label, average=average, warn_for=("recall",), sample_weight=sample_weight, zero_division=zero_division, execute=execute, session=session, run_kwargs=run_kwargs, ) return r
[docs] def f1_score( y_true, y_pred, *, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn", execute=False, session=None, run_kwargs=None, ): """Compute the F1 score, also known as balanced F-score or F-measure The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. The relative contribution of precision and recall to the F1 score are equal. The formula for the F1 score is:: F1 = 2 * (precision * recall) / (precision + recall) In the multi-class and multi-label case, this is the average of the F1 score of each class with weighting depending on the ``average`` parameter. Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`. Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) target values. y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, 1 by default The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ 'weighted'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). sample_weight : array-like of shape (n_samples,), default=None Sample weights. zero_division : "warn", 0 or 1, default="warn" Sets the value to return when there is a zero division, i.e. when all predictions and labels are negative. If set to "warn", this acts as 0, but warnings are also raised. Returns ------- f1_score : float or array of float, shape = [n_unique_labels] F1 score of the positive class in binary classification or weighted average of the F1 scores of each class for the multiclass task. See also -------- fbeta_score, precision_recall_fscore_support, jaccard_score, multilabel_confusion_matrix References ---------- .. [1] `Wikipedia entry for the F1-score <https://en.wikipedia.org/wiki/F1_score>`_ Examples -------- >>> from maxframe.learn.metrics import f1_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] >>> f1_score(y_true, y_pred, average='macro') 0.26... >>> f1_score(y_true, y_pred, average='micro') 0.33... >>> f1_score(y_true, y_pred, average='weighted') 0.26... >>> f1_score(y_true, y_pred, average=None) array([0.8, 0. , 0. ]) >>> y_true = [0, 0, 0, 0, 0, 0] >>> y_pred = [0, 0, 0, 0, 0, 0] >>> f1_score(y_true, y_pred, zero_division=1) 1.0... Notes ----- When ``true positive + false positive == 0``, precision is undefined; When ``true positive + false negative == 0``, recall is undefined. In such cases, by default the metric will be set to 0, as will f-score, and ``UndefinedMetricWarning`` will be raised. This behavior can be modified with ``zero_division``. """ return fbeta_score( y_true, y_pred, beta=1, labels=labels, pos_label=pos_label, average=average, sample_weight=sample_weight, zero_division=zero_division, execute=execute, session=session, run_kwargs=run_kwargs, )
[docs] def fbeta_score( y_true, y_pred, *, beta, labels=None, pos_label=1, average="binary", sample_weight=None, zero_division="warn", execute=False, session=None, run_kwargs=None, ): """Compute the F-beta score The F-beta score is the weighted harmonic mean of precision and recall, reaching its optimal value at 1 and its worst value at 0. The `beta` parameter determines the weight of recall in the combined score. ``beta < 1`` lends more weight to precision, while ``beta > 1`` favors recall (``beta -> 0`` considers only precision, ``beta -> +inf`` only recall). Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`. Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix Ground truth (correct) target values. y_pred : 1d array-like, or label indicator array / sparse matrix Estimated targets as returned by a classifier. beta : float Determines the weight of recall in the combined score. labels : list, optional The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, 1 by default The class to report if ``average='binary'`` and the data is binary. If the data are multiclass or multilabel, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : string, [None, 'binary' (default), 'micro', 'macro', 'samples', \ 'weighted'] This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). sample_weight : array-like of shape (n_samples,), default=None Sample weights. zero_division : "warn", 0 or 1, default="warn" Sets the value to return when there is a zero division, i.e. when all predictions and labels are negative. If set to "warn", this acts as 0, but warnings are also raised. Returns ------- fbeta_score : float (if average is not None) or array of float, shape =\ [n_unique_labels] F-beta score of the positive class in binary classification or weighted average of the F-beta score of each class for the multiclass task. See also -------- precision_recall_fscore_support, multilabel_confusion_matrix References ---------- .. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern Information Retrieval. Addison Wesley, pp. 327-328. .. [2] `Wikipedia entry for the F1-score <https://en.wikipedia.org/wiki/F1_score>`_ Examples -------- >>> from maxframe.learn.metrics import fbeta_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] >>> fbeta_score(y_true, y_pred, average='macro', beta=0.5) 0.23... >>> fbeta_score(y_true, y_pred, average='micro', beta=0.5) 0.33... >>> fbeta_score(y_true, y_pred, average='weighted', beta=0.5) 0.23... >>> fbeta_score(y_true, y_pred, average=None, beta=0.5) array([0.71..., 0. , 0. ]) Notes ----- When ``true positive + false positive == 0`` or ``true positive + false negative == 0``, f-score returns 0 and raises ``UndefinedMetricWarning``. This behavior can be modified with ``zero_division``. """ _, _, f, _ = precision_recall_fscore_support( y_true, y_pred, beta=beta, labels=labels, pos_label=pos_label, average=average, warn_for=("f-score",), sample_weight=sample_weight, zero_division=zero_division, execute=execute, session=session, run_kwargs=run_kwargs, ) return f