Source code for maxframe.learn.preprocessing._data.normalize

# Copyright 1999-2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from maxframe import opcodes
from maxframe.core import ExecutableTuple
from maxframe.learn.utils.validation import check_array
from maxframe.serialization.serializables import (
    BoolField,
    Int32Field,
    KeyField,
    StringField,
)
from maxframe.tensor.core import TensorOrder
from maxframe.tensor.operators import TensorOperator, TensorOperatorMixin


class TensorNormalize(TensorOperator, TensorOperatorMixin):
    _op_module_ = "learn"
    _op_type_ = opcodes.NORMALIZE

    input = KeyField("input")
    norm = StringField("norm", default=None)
    axis = Int32Field("axis", default=None)
    return_norm = BoolField("return_norm", default=None)
    # for test purpose
    use_sklearn = BoolField("use_sklearn", default=None)

    def __init__(self, **kw):
        super().__init__(**kw)
        if self.use_sklearn is None:
            # force to use sklearn if not specified
            self.use_sklearn = True

    @classmethod
    def _set_inputs(cls, op, inputs):
        super()._set_inputs(op, inputs)
        op.input = op.inputs[0]

    @property
    def output_limit(self):
        return 2 if self.return_norm else 1

    def __call__(self, x, copy=True):
        x = check_array(
            x,
            accept_sparse=True,
            estimator="the normalize function",
            dtype=(np.float64, np.float32, np.float16),
        )

        normed = None
        if not self.return_norm:
            res = self.new_tensor([x], shape=x.shape, order=x.order)
        else:
            kws = [
                {"shape": x.shape, "order": x.order},
                {
                    "shape": (x.shape[0] if self.axis == 1 else x.shape[1],),
                    "order": TensorOrder.C_ORDER,
                },
            ]
            res, normed = self.new_tensors([x], kws=kws, output_limit=2)

        if not copy and self.axis == 1:
            # follow the behaviour of sklearn
            x.data = res.data

        if normed is None:
            return res
        return ExecutableTuple([res, normed])



[docs]
def normalize(X, norm="l2", axis=1, copy=True, return_norm=False):
    """
    Scale input vectors individually to unit norm (vector length).

    Parameters
    ----------
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        The data to normalize, element by element.
        scipy.sparse matrices should be in CSR format to avoid an
        un-necessary copy.

    norm : 'l1', 'l2', or 'max', optional ('l2' by default)
        The norm to use to normalize each non zero sample (or each non-zero
        feature if axis is 0).

    axis : 0 or 1, optional (1 by default)
        axis used to normalize the data along. If 1, independently normalize
        each sample, otherwise (if 0) normalize each feature.

    copy : boolean, optional, default True
        set to False to perform inplace row normalization and avoid a
        copy (if the input is already a tensor and if axis is 1).

    return_norm : boolean, default False
        whether to return the computed norms

    Returns
    -------
    X : {array-like, sparse matrix}, shape [n_samples, n_features]
        Normalized input X.

    norms : Tensor, shape [n_samples] if axis=1 else [n_features]
        A tensor of norms along given axis for X.
        When X is sparse, a NotImplementedError will be raised
        for norm 'l1' or 'l2'.

    See also
    --------
    Normalizer: Performs normalization using the ``Transformer`` API
        (e.g. as part of a preprocessing :class:`maxframe.learn.pipeline.Pipeline`).
    """
    if norm not in ("l1", "l2", "max"):
        raise ValueError(f"'{norm}' is not a supported norm")
    if axis not in (0, 1):
        raise ValueError(f"'{axis}' is not a supported axis")
    op = TensorNormalize(norm=norm, axis=axis, return_norm=return_norm, dtype=X.dtype)
    return op(X, copy=copy)