"""Tools for the Maximal Marginal Relevance (MMR) reranking.Duplicated from langchain_community to avoid cross-dependencies.Functions "maximal_marginal_relevance" and "cosine_similarity"are duplicated in this utility respectively from modules: - "libs/community/langchain_community/vectorstores/utils.py" - "libs/community/langchain_community/utils/math.py""""from__future__importannotationsimportloggingfromtypingimportList,Unionimportnumpyasnplogger=logging.getLogger(__name__)Matrix=Union[List[List[float]],List[np.ndarray],np.ndarray]
[docs]defcosine_similarity(x:Matrix,y:Matrix)->np.ndarray:"""Row-wise cosine similarity between two equal-width matrices."""iflen(x)==0orlen(y)==0:returnnp.array([])x=np.array(x)y=np.array(y)ifx.shape[1]!=y.shape[1]:msg=(f"Number of columns in X and Y must be the same. X has shape {x.shape} "f"and Y has shape {y.shape}.")raiseValueError(msg)try:importsimsimdassimd# type: ignore[import]exceptImportError:logger.info("Unable to import simsimd, defaulting to NumPy implementation. If you want ""to use simsimd please install with `pip install simsimd`.")x_norm=np.linalg.norm(x,axis=1)y_norm=np.linalg.norm(y,axis=1)# Ignore divide by zero errors run time warnings as those are handled below.withnp.errstate(divide="ignore",invalid="ignore"):similarity=np.dot(x,y.T)/np.outer(x_norm,y_norm)similarity[np.isnan(similarity)|np.isinf(similarity)]=0.0returnsimilarityelse:x=np.array(x,dtype=np.float32)y=np.array(y,dtype=np.float32)z=1-simd.cdist(x,y,metric="cosine")ifisinstance(z,float):returnnp.array([z])returnz