# Copyright 1999-2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional
import numpy as np
from maxframe import opcodes
from maxframe.serialization.serializables import Int64Field, ReferenceField
from maxframe.tensor.datasource import tensor as astensor
from maxframe.tensor.operators import TensorMapReduceOperator, TensorOperatorMixin
from maxframe.typing_ import EntityType
_DEFAULT_CHUNK_SIZE_LIMIT = 1e8
class TensorBinCount(TensorMapReduceOperator, TensorOperatorMixin):
_op_type_ = opcodes.BINCOUNT
weights = ReferenceField("weights", default=None)
minlength: Optional[int] = Int64Field("minlength", default=0)
chunk_size_limit: int = Int64Field("chunk_size_limit")
chunk_count: Optional[int] = Int64Field("chunk_count")
tileable_right_bound: Optional[int] = Int64Field("tileable_right_bound")
def __call__(self, x, weights=None):
inputs = [x]
self.weights = weights
dtype = np.dtype(np.int_)
if weights is not None:
inputs.append(weights)
dtype = weights.dtype
return self.new_tensor(inputs, dtype=dtype, shape=(np.nan,))
@classmethod
def _set_inputs(cls, op: "TensorBinCount", inputs: List[EntityType]):
super()._set_inputs(op, inputs)
if len(inputs) > 1:
op.weights = inputs[1]
[docs]
def bincount(x, weights=None, minlength=0, chunk_size_limit=None):
"""
Count number of occurrences of each value in array of non-negative ints.
The number of bins (of size 1) is one larger than the largest value in
`x`. If `minlength` is specified, there will be at least this number
of bins in the output array (though it will be longer if necessary,
depending on the contents of `x`).
Each bin gives the number of occurrences of its index value in `x`.
If `weights` is specified the input array is weighted by it, i.e. if a
value ``n`` is found at position ``i``, ``out[n] += weight[i]`` instead
of ``out[n] += 1``.
Parameters
----------
x : tensor or array_like, 1 dimension, nonnegative ints
Input array.
weights : tensor or array_like, optional
Weights, array of the same shape as `x`.
minlength : int, optional
A minimum number of bins for the output array.
Returns
-------
out : tensor of ints
The result of binning the input array.
The length of `out` is equal to ``np.amax(x)+1``.
Raises
------
ValueError
If the input is not 1-dimensional, or contains elements with negative
values, or if `minlength` is negative.
TypeError
If the type of the input is float or complex.
See Also
--------
histogram, digitize, unique
Examples
--------
>>> import maxframe.tensor as mt
>>> mt.bincount(mt.arange(5)).execute()
array([1, 1, 1, 1, 1])
>>> mt.bincount(mt.tensor([0, 1, 1, 3, 2, 1, 7])).execute()
array([1, 3, 1, 1, 0, 0, 0, 1])
The input array needs to be of integer dtype, otherwise a
TypeError is raised:
>>> mt.bincount(mt.arange(5, dtype=float)).execute()
Traceback (most recent call last):
....execute()
TypeError: Cannot cast array data from dtype('float64') to dtype('int64')
according to the rule 'safe'
A possible use of ``bincount`` is to perform sums over
variable-size chunks of an array, using the ``weights`` keyword.
>>> w = mt.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights
>>> x = mt.array([0, 1, 1, 2, 2, 2])
>>> mt.bincount(x, weights=w).execute()
array([ 0.3, 0.7, 1.1])
"""
x = astensor(x)
weights = astensor(weights) if weights is not None else None
if not np.issubdtype(x.dtype, np.int_):
raise TypeError(f"Cannot cast array data from {x.dtype} to {np.dtype(np.int_)}")
if x.ndim != 1:
raise ValueError("'x' must be 1 dimension")
if minlength < 0:
raise ValueError("'minlength' must not be negative")
chunk_size_limit = (
chunk_size_limit if chunk_size_limit is not None else _DEFAULT_CHUNK_SIZE_LIMIT
)
op = TensorBinCount(minlength=minlength, chunk_size_limit=chunk_size_limit)
return op(x, weights=weights)