Source code for maxframe.learn.preprocessing._data.normalize

# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from .... import opcodes
from ....core import ExecutableTuple
from ....serialization.serializables import BoolField, Int32Field, KeyField, StringField
from ....tensor.core import TensorOrder
from ....tensor.operators import TensorOperator, TensorOperatorMixin
from ...utils.validation import check_array


class TensorNormalize(TensorOperator, TensorOperatorMixin):
    _op_module_ = "learn"
    _op_type_ = opcodes.NORMALIZE

    input = KeyField("input")
    norm = StringField("norm", default=None)
    axis = Int32Field("axis", default=None)
    return_norm = BoolField("return_norm", default=None)
    # for test purpose
    use_sklearn = BoolField("use_sklearn", default=None)

    def __init__(self, **kw):
        super().__init__(**kw)
        if self.use_sklearn is None:
            # force to use sklearn if not specified
            self.use_sklearn = True

    @classmethod
    def _set_inputs(cls, op, inputs):
        super()._set_inputs(op, inputs)
        op.input = op.inputs[0]

    @property
    def output_limit(self):
        return 2 if self.return_norm else 1

    def __call__(self, x, copy=True):
        x = check_array(
            x,
            accept_sparse=True,
            estimator="the normalize function",
            dtype=(np.float64, np.float32, np.float16),
        )

        normed = None
        if not self.return_norm:
            res = self.new_tensor([x], shape=x.shape, order=x.order)
        else:
            kws = [
                {"shape": x.shape, "order": x.order},
                {
                    "shape": (x.shape[0] if self.axis == 1 else x.shape[1],),
                    "order": TensorOrder.C_ORDER,
                },
            ]
            res, normed = self.new_tensors([x], kws=kws, output_limit=2)

        if not copy and self.axis == 1:
            # follow the behaviour of sklearn
            x.data = res.data

        if normed is None:
            return res
        return ExecutableTuple([res, normed])



[docs]
def normalize(X, norm="l2", axis=1, copy=True, return_norm=False):
    """
    Scale input vectors individually to unit norm (vector length).

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        The data to normalize, element by element.
        scipy.sparse matrices should be in CSR format to avoid an
        un-necessary copy.

    norm : 'l1', 'l2', or 'max', optional ('l2' by default)
        The norm to use to normalize each non zero sample (or each non-zero
        feature if axis is 0).

    axis : 0 or 1, optional (1 by default)
        axis used to normalize the data along. If 1, independently normalize
        each sample, otherwise (if 0) normalize each feature.

    copy : boolean, optional, default True
        set to False to perform inplace row normalization and avoid a
        copy (if the input is already a tensor and if axis is 1).

    return_norm : boolean, default False
        whether to return the computed norms

    Returns
    -------
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        Normalized input X.

    norms : Tensor, shape [n_samples] if axis=1 else [n_features]
        A tensor of norms along given axis for X.
        When X is sparse, a NotImplementedError will be raised
        for norm 'l1' or 'l2'.

    See also
    --------
    Normalizer: Performs normalization using the ``Transformer`` API
        (e.g. as part of a preprocessing :class:`maxframe.learn.pipeline.Pipeline`).
    """
    if norm not in ("l1", "l2", "max"):
        raise ValueError(f"'{norm}' is not a supported norm")
    if axis not in (0, 1):
        raise ValueError(f"'{axis}' is not a supported axis")
    op = TensorNormalize(norm=norm, axis=axis, return_norm=return_norm, dtype=X.dtype)
    return op(X, copy=copy)