Source code for maxframe.learn.metrics.pairwise.cosine

# Copyright 1999-2025 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List

import numpy as np

from .... import opcodes
from ....core import EntityData
from ....serialization.serializables import KeyField
from ....tensor.core import TensorOrder
from ...preprocessing import normalize
from .core import PairwiseDistances


class CosineDistances(PairwiseDistances):
    _op_type_ = opcodes.PAIRWISE_COSINE_DISTANCES

    x = KeyField("x")
    y = KeyField("y")

    @classmethod
    def _set_inputs(cls, op: "CosineDistances", inputs: List[EntityData]):
        super()._set_inputs(op, inputs)
        op.x, op.y = inputs[:2]

    def __call__(self, x, y=None):
        x, y = self.check_pairwise_arrays(x, y)
        return self.new_tensor(
            [x, y], shape=(x.shape[0], y.shape[0]), order=TensorOrder.C_ORDER
        )


[docs] def cosine_similarity(X, Y=None, dense_output=True): """Compute cosine similarity between samples in X and Y. Cosine similarity, or the cosine kernel, computes similarity as the normalized dot product of X and Y: K(X, Y) = <X, Y> / (||X||*||Y||) On L2-normalized data, this function is equivalent to linear_kernel. Read more in the :ref:`User Guide <cosine_similarity>`. Parameters ---------- X : Tensor or sparse tensor, shape: (n_samples_X, n_features) Input data. Y : Tensor or sparse tensor, shape: (n_samples_Y, n_features) Input data. If ``None``, the output will be the pairwise similarities between all samples in ``X``. dense_output : boolean (optional), default True Whether to return dense output even when the input is sparse. If ``False``, the output is sparse if both input tensors are sparse. Returns ------- kernel matrix : Tensor A tensor with shape (n_samples_X, n_samples_Y). """ X, Y = PairwiseDistances.check_pairwise_arrays(X, Y) X_normalized = normalize(X, copy=True) if X is Y: Y_normalized = X_normalized else: Y_normalized = normalize(Y, copy=True) K = X_normalized.dot(Y_normalized.T) if dense_output: K = K.todense() return K
[docs] def cosine_distances(X, Y=None): """Compute cosine distance between samples in X and Y. Cosine distance is defined as 1.0 minus the cosine similarity. Read more in the :ref:`User Guide <metrics>`. Parameters ---------- X : array_like, sparse matrix with shape (n_samples_X, n_features). Y : array_like, sparse matrix (optional) with shape (n_samples_Y, n_features). Returns ------- distance matrix : Tensor A tensor with shape (n_samples_X, n_samples_Y). See also -------- maxframe.learn.metrics.pairwise.cosine_similarity maxframe.tensor.spatial.distance.cosine : dense matrices only """ op = CosineDistances(x=X, y=Y, dtype=np.dtype(np.float64)) return op(X, y=Y)