Source code for maxframe.learn.metrics.pairwise.cosine
# Copyright 1999-2026 Alibaba Group Holding Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
import numpy as np
from maxframe import opcodes
from maxframe.core import EntityData
from maxframe.learn.metrics.pairwise.core import PairwiseDistances
from maxframe.learn.preprocessing import normalize
from maxframe.serialization.serializables import KeyField
from maxframe.tensor.core import TensorOrder
class CosineDistances(PairwiseDistances):
_op_type_ = opcodes.PAIRWISE_COSINE_DISTANCES
x = KeyField("x")
y = KeyField("y")
@classmethod
def _set_inputs(cls, op: "CosineDistances", inputs: List[EntityData]):
super()._set_inputs(op, inputs)
op.x, op.y = inputs[:2]
def __call__(self, x, y=None):
x, y = self.check_pairwise_arrays(x, y)
return self.new_tensor(
[x, y], shape=(x.shape[0], y.shape[0]), order=TensorOrder.C_ORDER
)
[docs]
def cosine_similarity(X, Y=None, dense_output=True):
"""Compute cosine similarity between samples in X and Y.
Cosine similarity, or the cosine kernel, computes similarity as the
normalized dot product of X and Y:
K(X, Y) = <X, Y> / (||X||*||Y||)
On L2-normalized data, this function is equivalent to linear_kernel.
Read more in the :ref:`User Guide <cosine_similarity>`.
Parameters
----------
X : Tensor or sparse tensor, shape: (n_samples_X, n_features)
Input data.
Y : Tensor or sparse tensor, shape: (n_samples_Y, n_features)
Input data. If ``None``, the output will be the pairwise
similarities between all samples in ``X``.
dense_output : boolean (optional), default True
Whether to return dense output even when the input is sparse. If
``False``, the output is sparse if both input tensors are sparse.
Returns
-------
kernel matrix : Tensor
A tensor with shape (n_samples_X, n_samples_Y).
"""
X, Y = PairwiseDistances.check_pairwise_arrays(X, Y)
X_normalized = normalize(X, copy=True)
if X is Y:
Y_normalized = X_normalized
else:
Y_normalized = normalize(Y, copy=True)
K = X_normalized.dot(Y_normalized.T)
if dense_output:
K = K.todense()
return K
[docs]
def cosine_distances(X, Y=None):
"""Compute cosine distance between samples in X and Y.
Cosine distance is defined as 1.0 minus the cosine similarity.
Read more in the :ref:`User Guide <metrics>`.
Parameters
----------
X : array_like, sparse matrix
with shape (n_samples_X, n_features).
Y : array_like, sparse matrix (optional)
with shape (n_samples_Y, n_features).
Returns
-------
distance matrix : Tensor
A tensor with shape (n_samples_X, n_samples_Y).
See also
--------
maxframe.learn.metrics.pairwise.cosine_similarity
maxframe.tensor.spatial.distance.cosine : dense matrices only
"""
op = CosineDistances(x=X, y=Y, dtype=np.dtype(np.float64))
return op(X, y=Y)