Source code for maxframe.tensor.statistics.bincount

# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List, Optional

import numpy as np

from ... import opcodes
from ...serialization.serializables import Int64Field, ReferenceField
from ...typing_ import EntityType
from ..datasource import tensor as astensor
from ..operators import TensorMapReduceOperator, TensorOperatorMixin

_DEFAULT_CHUNK_SIZE_LIMIT = 1e8


class TensorBinCount(TensorMapReduceOperator, TensorOperatorMixin):
    _op_type_ = opcodes.BINCOUNT

    weights = ReferenceField("weights", default=None)
    minlength: Optional[int] = Int64Field("minlength", default=0)
    chunk_size_limit: int = Int64Field("chunk_size_limit")

    chunk_count: Optional[int] = Int64Field("chunk_count")
    tileable_right_bound: Optional[int] = Int64Field("tileable_right_bound")

    def __call__(self, x, weights=None):
        inputs = [x]
        self.weights = weights
        dtype = np.dtype(np.int_)
        if weights is not None:
            inputs.append(weights)
            dtype = weights.dtype
        return self.new_tensor(inputs, dtype=dtype, shape=(np.nan,))

    @classmethod
    def _set_inputs(cls, op: "TensorBinCount", inputs: List[EntityType]):
        super()._set_inputs(op, inputs)
        if len(inputs) > 1:
            op.weights = inputs[1]


[docs] def bincount(x, weights=None, minlength=0, chunk_size_limit=None): """ Count number of occurrences of each value in array of non-negative ints. The number of bins (of size 1) is one larger than the largest value in `x`. If `minlength` is specified, there will be at least this number of bins in the output array (though it will be longer if necessary, depending on the contents of `x`). Each bin gives the number of occurrences of its index value in `x`. If `weights` is specified the input array is weighted by it, i.e. if a value ``n`` is found at position ``i``, ``out[n] += weight[i]`` instead of ``out[n] += 1``. Parameters ---------- x : tensor or array_like, 1 dimension, nonnegative ints Input array. weights : tensor or array_like, optional Weights, array of the same shape as `x`. minlength : int, optional A minimum number of bins for the output array. Returns ------- out : tensor of ints The result of binning the input array. The length of `out` is equal to ``np.amax(x)+1``. Raises ------ ValueError If the input is not 1-dimensional, or contains elements with negative values, or if `minlength` is negative. TypeError If the type of the input is float or complex. See Also -------- histogram, digitize, unique Examples -------- >>> import maxframe.tensor as mt >>> mt.bincount(mt.arange(5)).execute() array([1, 1, 1, 1, 1]) >>> mt.bincount(mt.tensor([0, 1, 1, 3, 2, 1, 7])).execute() array([1, 3, 1, 1, 0, 0, 0, 1]) The input array needs to be of integer dtype, otherwise a TypeError is raised: >>> mt.bincount(mt.arange(5, dtype=float)).execute() Traceback (most recent call last): ....execute() TypeError: Cannot cast array data from dtype('float64') to dtype('int64') according to the rule 'safe' A possible use of ``bincount`` is to perform sums over variable-size chunks of an array, using the ``weights`` keyword. >>> w = mt.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights >>> x = mt.array([0, 1, 1, 2, 2, 2]) >>> mt.bincount(x, weights=w).execute() array([ 0.3, 0.7, 1.1]) """ x = astensor(x) weights = astensor(weights) if weights is not None else None if not np.issubdtype(x.dtype, np.int_): raise TypeError(f"Cannot cast array data from {x.dtype} to {np.dtype(np.int_)}") if x.ndim != 1: raise ValueError("'x' must be 1 dimension") if minlength < 0: raise ValueError("'minlength' must not be negative") chunk_size_limit = ( chunk_size_limit if chunk_size_limit is not None else _DEFAULT_CHUNK_SIZE_LIMIT ) op = TensorBinCount(minlength=minlength, chunk_size_limit=chunk_size_limit) return op(x, weights=weights)