Source code for maxframe.tensor.misc.unique

# Copyright 1999-2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

from maxframe import opcodes
from maxframe.serialization.serializables import (
    BoolField,
    Int32Field,
    Int64Field,
    StringField,
)
from maxframe.tensor.core import TensorOrder
from maxframe.tensor.operators import TensorHasInput, TensorOperatorMixin
from maxframe.tensor.utils import validate_axis


class TensorUnique(TensorHasInput, TensorOperatorMixin):
    _op_type_ = opcodes.UNIQUE

    return_index = BoolField("return_index", default=False)
    return_inverse = BoolField("return_inverse", default=False)
    return_counts = BoolField("return_counts", default=False)
    axis = Int32Field("axis", default=None)

    method = StringField("method", default=None)
    aggregate_size = Int32Field("aggregate_size", default=None)
    start_pos = Int64Field("start_pos", default=None)
    sort = BoolField("sort", default=True)
    use_na_sentinel = BoolField("use_na_sentinel", default=True)
    na_position = StringField("na_position", default=None)

    @property
    def output_limit(self):
        return 1 + self.return_index + self.return_inverse + self.return_counts

    @classmethod
    def _gen_kws(cls, op: "TensorUnique", input_obj, chunk=False, chunk_index=None):
        kws = []

        # unique tensor
        shape = list(input_obj.shape)
        shape[op.axis] = np.nan
        kw = {"shape": tuple(shape), "dtype": input_obj.dtype, "gpu": input_obj.op.gpu}
        if chunk:
            idx = [0] * len(shape)
            idx[op.axis] = chunk_index or 0
            kw["index"] = tuple(idx)
        kws.append(kw)

        # unique indices tensor
        if op.return_index:
            kw = {
                "shape": (np.nan,),
                "dtype": np.dtype(np.intp),
                "gpu": input_obj.op.gpu,
                "type": "indices",
            }
            if chunk:
                kw["index"] = (chunk_index or 0,)
            kws.append(kw)

        # unique inverse tensor
        if op.return_inverse:
            kw = {
                "shape": (input_obj.shape[op.axis],),
                "dtype": np.dtype(np.intp),
                "gpu": input_obj.op.gpu,
                "type": "inverse",
            }
            if chunk:
                kw["index"] = (chunk_index or 0,)
            kws.append(kw)

        # unique counts tensor
        if op.return_counts:
            kw = {
                "shape": (np.nan,),
                "dtype": np.dtype(int),
                "gpu": input_obj.op.gpu,
                "type": "counts",
            }
            if chunk:
                kw["index"] = (chunk_index or 0,)
            kws.append(kw)

        return kws

    def __call__(self, ar):
        from maxframe.tensor.misc.atleast_1d import atleast_1d

        ar = atleast_1d(ar)
        if self.axis is None:
            if ar.ndim > 1:
                ar = ar.flatten()
            self.axis = 0
        else:
            self.axis = validate_axis(ar.ndim, self.axis)

        kws = self._gen_kws(self, ar)
        tensors = self.new_tensors([ar], kws=kws, order=TensorOrder.C_ORDER)
        if len(tensors) == 1:
            return tensors[0]
        return tensors


[docs] def unique( ar, return_index=False, return_inverse=False, return_counts=False, axis=None, method="auto", aggregate_size=None, sort=True, use_na_sentinel=False, na_position=None, ): """ Find the unique elements of a tensor. Returns the sorted unique elements of a tensor. There are three optional outputs in addition to the unique elements: * the indices of the input tensor that give the unique values * the indices of the unique tensor that reconstruct the input tensor * the number of times each unique value comes up in the input tensor Parameters ---------- ar : array_like Input tensor. Unless `axis` is specified, this will be flattened if it is not already 1-D. return_index : bool, optional If True, also return the indices of `ar` (along the specified axis, if provided, or in the flattened tensor) that result in the unique tensor. return_inverse : bool, optional If True, also return the indices of the unique tensor (for the specified axis, if provided) that can be used to reconstruct `ar`. return_counts : bool, optional If True, also return the number of times each unique item appears in `ar`. axis : int or None, optional The axis to operate on. If None, `ar` will be flattened. If an integer, the subarrays indexed by the given axis will be flattened and treated as the elements of a 1-D tensor with the dimension of the given axis, see the notes for more details. Object tensors or structured tensors that contain objects are not supported if the `axis` kwarg is used. The default is None. Returns ------- unique : Tensor The sorted unique values. unique_indices : Tensor, optional The indices of the first occurrences of the unique values in the original tensor. Only provided if `return_index` is True. unique_inverse : Tensor, optional The indices to reconstruct the original tensor from the unique tensor. Only provided if `return_inverse` is True. unique_counts : Tensor, optional The number of times each of the unique values comes up in the original tensor. Only provided if `return_counts` is True. Examples -------- >>> import maxframe.tensor as mt >>> mt.unique([1, 1, 2, 2, 3, 3]).execute() array([1, 2, 3]) >>> a = mt.array([[1, 1], [2, 3]]) >>> mt.unique(a).execute() array([1, 2, 3]) Return the unique rows of a 2D tensor >>> a = mt.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) >>> mt.unique(a, axis=0).execute() array([[1, 0, 0], [2, 3, 4]]) Return the indices of the original tensor that give the unique values: >>> a = mt.array(['a', 'b', 'b', 'c', 'a']) >>> u, indices = mt.unique(a, return_index=True) >>> u.execute() array(['a', 'b', 'c'], dtype='|S1') >>> indices.execute() array([0, 1, 3]) >>> a[indices].execute() array(['a', 'b', 'c'], dtype='|S1') Reconstruct the input array from the unique values: >>> a = mt.array([1, 2, 6, 4, 2, 3, 2]) >>> u, indices = mt.unique(a, return_inverse=True) >>> u.execute() array([1, 2, 3, 4, 6]) >>> indices.execute() array([0, 1, 4, 3, 1, 2, 1]) >>> u[indices].execute() array([1, 2, 6, 4, 2, 3, 2]) """ op = TensorUnique( return_index=return_index, return_inverse=return_inverse, return_counts=return_counts, axis=axis, method=method, aggregate_size=aggregate_size, sort=sort, use_na_sentinel=use_na_sentinel, na_position=na_position, ) return op(ar)