Source code for maxframe.learn.preprocessing._label._label_encoder

# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from .... import tensor as mt
from ....udf import builtin_function
from ...core import BaseEstimator, TransformerMixin
from ...utils._encode import _encode, _unique
from ...utils.validation import _num_samples, column_or_1d

try:
    from sklearn.utils.validation import check_is_fitted
except ImportError:
    check_is_fitted = lambda *_: None


[docs] class LabelEncoder(TransformerMixin, BaseEstimator): """Encode target labels with value between 0 and n_classes-1. This transformer should be used to encode target values, *i.e.* `y`, and not the input `X`. Read more in the :ref:`User Guide <preprocessing_targets>`. Attributes ---------- classes_ : ndarray of shape (n_classes,) Holds the label for each class. See Also -------- OrdinalEncoder : Encode categorical features using an ordinal encoding scheme. OneHotEncoder : Encode categorical features as a one-hot numeric array. Examples -------- `LabelEncoder` can be used to normalize labels. >>> from maxframe.learn import preprocessing >>> le = preprocessing.LabelEncoder() >>> le.fit([1, 2, 2, 6]).execute() LabelEncoder() >>> le.classes_.to_numpy() array([1, 2, 6]) >>> le.transform([1, 1, 2, 6]).to_numpy() array([0, 0, 1, 2]...) >>> le.inverse_transform([0, 0, 1, 2]).to_numpy() array([1, 1, 2, 6]) It can also be used to transform non-numerical labels (as long as they are hashable and comparable) to numerical labels. >>> le = preprocessing.LabelEncoder() >>> le.fit(["paris", "paris", "tokyo", "amsterdam"]).execute() LabelEncoder() >>> list(le.classes_.to_numpy()) ['amsterdam', 'paris', 'tokyo'] >>> le.transform(["tokyo", "tokyo", "paris"]).to_numpy() array([2, 2, 1]...) >>> list(le.inverse_transform([2, 2, 1]).to_numpy()) ['tokyo', 'tokyo', 'paris'] """ def fit(self, y, execute=False, session=None, run_kwargs=None): """Fit label encoder. Parameters ---------- y : array-like of shape (n_samples,) Target values. Returns ------- self : returns an instance of self. Fitted label encoder. """ y = column_or_1d(y, warn=True) self.classes_ = _unique(y) if execute: self.execute(session=session, **(run_kwargs or dict())) return self def fit_transform(self, y, execute=False, session=None, run_kwargs=None): """Fit label encoder and return encoded labels. Parameters ---------- y : array-like of shape (n_samples,) Target values. Returns ------- y : array-like of shape (n_samples,) Encoded labels. """ y = column_or_1d(y, warn=True) self.classes_, y = _unique(y, return_inverse=True) if execute: self.execute(session=session, extra_tileables=y, **(run_kwargs or dict())) return y def transform(self, y, execute=False, session=None, run_kwargs=None): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape (n_samples,) Target values. Returns ------- y : array-like of shape (n_samples,) Labels as normalized encodings. """ check_is_fitted(self) y = column_or_1d(y, warn=True) # transform of empty array is empty array if _num_samples(y) == 0: return mt.array([]) t = _encode(y, uniques=self.classes_) if execute: t = t.execute(session=session, **(run_kwargs or dict())) return t @staticmethod @builtin_function def _class_checker(chunk_data, classes_data): diff = np.setdiff1d(chunk_data, np.arange(len(classes_data))) if len(diff): raise ValueError("y contains previously unseen labels: %s" % str(diff)) return classes_data[chunk_data] def inverse_transform(self, y, execute=False, session=None, run_kwargs=None): """Transform labels back to original encoding. Parameters ---------- y : ndarray of shape (n_samples,) Target values. Returns ------- y : ndarray of shape (n_samples,) Original encoding. """ check_is_fitted(self) y = column_or_1d(y, warn=True) # inverse transform of empty array is empty array if _num_samples(y) == 0: return mt.array([]) labels = mt.asarray(y).mf.apply_chunk( self._class_checker, args=(self.classes_,), dtype=self.classes_.dtype ) if execute: labels.execute(session=session, **(run_kwargs or dict())) return labels def _more_tags(self): return {"X_types": ["1dlabels"]}