Source code for maxframe.dataframe.missing.checkna

# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Any

import numpy as np
import pandas as pd

from ... import opcodes
from ... import tensor as mt
from ...core import ENTITY_TYPE, OutputType
from ...serialization.serializables import BoolField
from ...tensor.core import TENSOR_TYPE
from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE, MultiIndex
from ..operators import DataFrameOperator, DataFrameOperatorMixin


class DataFrameCheckNA(DataFrameOperator, DataFrameOperatorMixin):
    _op_type_ = opcodes.CHECK_NA

    positive = BoolField("positive", default=None)

    def __call__(self, df):
        if isinstance(df, DATAFRAME_TYPE):
            self.output_types = [OutputType.dataframe]
        elif isinstance(df, SERIES_TYPE):
            self.output_types = [OutputType.series]
        elif isinstance(df, TENSOR_TYPE):
            self.output_types = [OutputType.tensor]
        elif isinstance(df, INDEX_TYPE):
            self.output_types = [OutputType.index]
        else:
            raise TypeError(
                f"Expecting maxframe dataframe, series, index, or tensor, got {type(df)}"
            )

        params = df.params.copy()
        if self.output_types[0] == OutputType.dataframe:
            params["dtypes"] = pd.Series(
                [np.dtype("bool")] * len(df.dtypes), index=df.columns_value.to_pandas()
            )
        else:
            params["dtype"] = np.dtype("bool")
        return self.new_tileable([df], **params)


def _from_pandas(obj: Any):
    if isinstance(obj, pd.DataFrame):
        from ..datasource.dataframe import from_pandas

        return from_pandas(obj)
    elif isinstance(obj, pd.Series):
        from ..datasource.series import from_pandas

        return from_pandas(obj)
    elif isinstance(obj, np.ndarray):
        return mt.tensor(obj)
    else:
        return obj


[docs] def isna(obj): """ Detect missing values. Return a boolean same-sized object indicating if the values are NA. NA values, such as None or :attr:`numpy.NaN`, gets mapped to True values. Everything else gets mapped to False values. Characters such as empty strings ``''`` or :attr:`numpy.inf` are not considered NA values (unless you set ``pandas.options.mode.use_inf_as_na = True``). Returns ------- DataFrame Mask of bool values for each element in DataFrame that indicates whether an element is not an NA value. See Also -------- DataFrame.isnull : Alias of isna. DataFrame.notna : Boolean inverse of isna. DataFrame.dropna : Omit axes labels with missing values. isna : Top-level isna. Examples -------- Show which entries in a DataFrame are NA. >>> import numpy as np >>> import maxframe.dataframe as md >>> df = md.DataFrame({'age': [5, 6, np.NaN], ... 'born': [md.NaT, md.Timestamp('1939-05-27'), ... md.Timestamp('1940-04-25')], ... 'name': ['Alfred', 'Batman', ''], ... 'toy': [None, 'Batmobile', 'Joker']}) >>> df.execute() age born name toy 0 5.0 NaT Alfred None 1 6.0 1939-05-27 Batman Batmobile 2 NaN 1940-04-25 Joker >>> df.isna().execute() age born name toy 0 False True False True 1 False False False False 2 True False False False Show which entries in a Series are NA. >>> ser = md.Series([5, 6, np.NaN]) >>> ser.execute() 0 5.0 1 6.0 2 NaN dtype: float64 >>> ser.isna().execute() 0 False 1 False 2 True dtype: bool """ if isinstance(obj, MultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, ENTITY_TYPE): if isinstance(obj, TENSOR_TYPE): return mt.isnan(obj) else: op = DataFrameCheckNA(positive=True) return op(obj) else: return _from_pandas(pd.isna(obj))
[docs] def notna(obj): """ Detect existing (non-missing) values. Return a boolean same-sized object indicating if the values are not NA. Non-missing values get mapped to True. Characters such as empty strings ``''`` or :attr:`numpy.inf` are not considered NA values (unless you set ``pandas.options.mode.use_inf_as_na = True``). NA values, such as None or :attr:`numpy.NaN`, get mapped to False values. Returns ------- DataFrame Mask of bool values for each element in DataFrame that indicates whether an element is not an NA value. See Also -------- DataFrame.notnull : Alias of notna. DataFrame.isna : Boolean inverse of notna. DataFrame.dropna : Omit axes labels with missing values. notna : Top-level notna. Examples -------- Show which entries in a DataFrame are not NA. >>> import numpy as np >>> import maxframe.dataframe as md >>> df = md.DataFrame({'age': [5, 6, np.NaN], ... 'born': [md.NaT, md.Timestamp('1939-05-27'), ... md.Timestamp('1940-04-25')], ... 'name': ['Alfred', 'Batman', ''], ... 'toy': [None, 'Batmobile', 'Joker']}) >>> df.execute() age born name toy 0 5.0 NaT Alfred None 1 6.0 1939-05-27 Batman Batmobile 2 NaN 1940-04-25 Joker >>> df.notna().execute() age born name toy 0 True False True False 1 True True True True 2 False True True True Show which entries in a Series are not NA. >>> ser = md.Series([5, 6, np.NaN]) >>> ser.execute() 0 5.0 1 6.0 2 NaN dtype: float64 >>> ser.notna().execute() 0 True 1 True 2 False dtype: bool """ if isinstance(obj, MultiIndex): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, ENTITY_TYPE): if isinstance(obj, TENSOR_TYPE): return ~mt.isnan(obj) else: op = DataFrameCheckNA(positive=False) return op(obj) else: return _from_pandas(pd.notna(obj))
isnull = isna notnull = notna