[docs]classSearchType(str,Enum):"""Enumerator of the types of search to perform."""similarity="similarity""""Similarity search."""similarity_score_threshold="similarity_score_threshold""""Similarity search with a score threshold."""mmr="mmr""""Maximal Marginal Relevance reranking of similarity search."""
[docs]classMultiVectorRetriever(BaseRetriever):"""Retrieve from a set of multiple embeddings for the same document."""vectorstore:VectorStore"""The underlying vectorstore to use to store small chunks and their embedding vectors"""byte_store:Optional[ByteStore]=None"""The lower-level backing storage layer for the parent documents"""docstore:BaseStore[str,Document]"""The storage interface for the parent documents"""id_key:str="doc_id"search_kwargs:dict=Field(default_factory=dict)"""Keyword arguments to pass to the search function."""search_type:SearchType=SearchType.similarity"""Type of search to perform (similarity / mmr)"""@model_validator(mode="before")@classmethoddefshim_docstore(cls,values:Dict)->Any:byte_store=values.get("byte_store")docstore=values.get("docstore")ifbyte_storeisnotNone:docstore=create_kv_docstore(byte_store)elifdocstoreisNone:raiseException("You must pass a `byte_store` parameter.")values["docstore"]=docstorereturnvaluesdef_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:"""Get documents relevant to a query. Args: query: String to find relevant documents for run_manager: The callbacks handler to use Returns: List of relevant documents """ifself.search_type==SearchType.mmr:sub_docs=self.vectorstore.max_marginal_relevance_search(query,**self.search_kwargs)elifself.search_type==SearchType.similarity_score_threshold:sub_docs_and_similarities=(self.vectorstore.similarity_search_with_relevance_scores(query,**self.search_kwargs))sub_docs=[sub_docforsub_doc,_insub_docs_and_similarities]else:sub_docs=self.vectorstore.similarity_search(query,**self.search_kwargs)# We do this to maintain the order of the ids that are returnedids=[]fordinsub_docs:ifself.id_keyind.metadataandd.metadata[self.id_key]notinids:ids.append(d.metadata[self.id_key])docs=self.docstore.mget(ids)return[dfordindocsifdisnotNone]asyncdef_aget_relevant_documents(self,query:str,*,run_manager:AsyncCallbackManagerForRetrieverRun)->List[Document]:"""Asynchronously get documents relevant to a query. Args: query: String to find relevant documents for run_manager: The callbacks handler to use Returns: List of relevant documents """ifself.search_type==SearchType.mmr:sub_docs=awaitself.vectorstore.amax_marginal_relevance_search(query,**self.search_kwargs)elifself.search_type==SearchType.similarity_score_threshold:sub_docs_and_similarities=(awaitself.vectorstore.asimilarity_search_with_relevance_scores(query,**self.search_kwargs))sub_docs=[sub_docforsub_doc,_insub_docs_and_similarities]else:sub_docs=awaitself.vectorstore.asimilarity_search(query,**self.search_kwargs)# We do this to maintain the order of the ids that are returnedids=[]fordinsub_docs:ifself.id_keyind.metadataandd.metadata[self.id_key]notinids:ids.append(d.metadata[self.id_key])docs=awaitself.docstore.amget(ids)return[dfordindocsifdisnotNone]