[docs]@deprecated(since="0.2.16",alternative=("Qdrant vector store now supports sparse retrievals natively. ""Use langchain_qdrant.QdrantVectorStore#as_retriever() instead. ""Reference: ""https://python.langchain.com/docs/integrations/vectorstores/qdrant/#sparse-vector-search"),removal="0.5.0",)classQdrantSparseVectorRetriever(BaseRetriever):"""Qdrant sparse vector retriever."""client:Any=None"""'qdrant_client' instance to use."""collection_name:str"""Qdrant collection name."""sparse_vector_name:str"""Name of the sparse vector to use."""sparse_encoder:Callable[[str],Tuple[List[int],List[float]]]"""Sparse encoder function to use."""k:int=4"""Number of documents to return per query. Defaults to 4."""filter:Optional[Any]=None"""Qdrant qdrant_client.models.Filter to use for queries. Defaults to None."""content_payload_key:str="content""""Payload field containing the document content. Defaults to 'content'"""metadata_payload_key:str="metadata""""Payload field containing the document metadata. Defaults to 'metadata'."""search_options:Dict[str,Any]={}"""Additional search options to pass to qdrant_client.QdrantClient.search()."""model_config=ConfigDict(arbitrary_types_allowed=True,extra="forbid",)
[docs]@pre_initdefvalidate_environment(cls,values:Dict)->Dict:"""Validate that 'qdrant_client' python package exists in environment."""try:fromgrpcimportRpcErrorfromqdrant_clientimportQdrantClient,modelsfromqdrant_client.http.exceptionsimportUnexpectedResponseexceptImportError:raiseImportError("Could not import qdrant-client python package. ""Please install it with `pip install qdrant-client`.")client=values["client"]ifnotisinstance(client,QdrantClient):raiseValueError(f"client should be an instance of qdrant_client.QdrantClient, "f"got {type(client)}")filter=values["filter"]iffilterisnotNoneandnotisinstance(filter,models.Filter):raiseValueError(f"filter should be an instance of qdrant_client.models.Filter, "f"got {type(filter)}")client=cast(QdrantClient,client)collection_name=values["collection_name"]sparse_vector_name=values["sparse_vector_name"]try:collection_info=client.get_collection(collection_name)sparse_vectors_config=collection_info.config.params.sparse_vectorsifsparse_vector_namenotinsparse_vectors_config:raiseQdrantException(f"Existing Qdrant collection {collection_name} does not "f"contain sparse vector named {sparse_vector_name}."f"Did you mean one of {', '.join(sparse_vectors_config.keys())}?")except(UnexpectedResponse,RpcError,ValueError):raiseQdrantException(f"Qdrant collection {collection_name} does not exist.")returnvalues
[docs]defadd_documents(self,documents:List[Document],**kwargs:Any)->List[str]:"""Run more documents through the embeddings and add to the vectorstore. Args: documents (List[Document]: Documents to add to the vectorstore. Returns: List[str]: List of IDs of the added texts. """texts=[doc.page_contentfordocindocuments]metadatas=[doc.metadatafordocindocuments]returnself.add_texts(texts,metadatas,**kwargs)
def_generate_rest_batches(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[Sequence[str]]=None,batch_size:int=64,)->Generator[Tuple[List[str],List[Any]],None,None]:fromqdrant_clientimportmodelsasresttexts_iterator=iter(texts)metadatas_iterator=iter(metadatasor[])ids_iterator=iter(idsor[uuid.uuid4().hexfor_initer(texts)])whilebatch_texts:=list(islice(texts_iterator,batch_size)):# Take the corresponding metadata and id for each text in a batchbatch_metadatas=list(islice(metadatas_iterator,batch_size))orNonebatch_ids=list(islice(ids_iterator,batch_size))# Generate the sparse embeddings for all the texts in a batchbatch_embeddings:List[Tuple[List[int],List[float]]]=[self.sparse_encoder(text)fortextinbatch_texts]points=[rest.PointStruct(id=point_id,vector={self.sparse_vector_name:rest.SparseVector(indices=sparse_vector[0],values=sparse_vector[1],)},payload=payload,)forpoint_id,sparse_vector,payloadinzip(batch_ids,batch_embeddings,Qdrant._build_payloads(batch_texts,batch_metadatas,self.content_payload_key,self.metadata_payload_key,),)]yieldbatch_ids,points