[docs]classBagel(VectorStore):"""``Bagel.net`` Inference platform. To use, you should have the ``bagelML`` python package installed. Example: .. code-block:: python from langchain_community.vectorstores import Bagel vectorstore = Bagel(cluster_name="langchain_store") """_LANGCHAIN_DEFAULT_CLUSTER_NAME:str="langchain"
[docs]def__init__(self,cluster_name:str=_LANGCHAIN_DEFAULT_CLUSTER_NAME,client_settings:Optional[bagel.config.Settings]=None,embedding_function:Optional[Embeddings]=None,cluster_metadata:Optional[Dict]=None,client:Optional[bagel.Client]=None,relevance_score_fn:Optional[Callable[[float],float]]=None,)->None:"""Initialize with bagel client"""try:importbagelimportbagel.configexceptImportError:raiseImportError("Please install bagel `pip install bagelML`.")ifclientisnotNone:self._client_settings=client_settingsself._client=clientelse:ifclient_settings:_client_settings=client_settingselse:_client_settings=bagel.config.Settings(bagel_api_impl="rest",bagel_server_host="api.bageldb.ai",)self._client_settings=_client_settingsself._client=bagel.Client(_client_settings)self._cluster=self._client.get_or_create_cluster(name=cluster_name,metadata=cluster_metadata,)self.override_relevance_score_fn=relevance_score_fnself._embedding_function=embedding_function
@propertydefembeddings(self)->Optional[Embeddings]:returnself._embedding_function@xor_args(("query_texts","query_embeddings"))def__query_cluster(self,query_texts:Optional[List[str]]=None,query_embeddings:Optional[List[List[float]]]=None,n_results:int=4,where:Optional[Dict[str,str]]=None,**kwargs:Any,)->List[Document]:"""Query the Bagel cluster based on the provided parameters."""try:importbagel# noqa: F401exceptImportError:raiseImportError("Please install bagel `pip install bagelML`.")ifself._embedding_functionandquery_embeddingsisNoneandquery_texts:texts=list(query_texts)query_embeddings=self._embedding_function.embed_documents(texts)query_texts=Nonereturnself._cluster.find(query_texts=query_texts,query_embeddings=query_embeddings,n_results=n_results,where=where,**kwargs,)
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,embeddings:Optional[List[List[float]]]=None,**kwargs:Any,)->List[str]:""" Add texts along with their corresponding embeddings and optional metadata to the Bagel cluster. Args: texts (Iterable[str]): Texts to be added. embeddings (Optional[List[float]]): List of embeddingvectors metadatas (Optional[List[dict]]): Optional list of metadatas. ids (Optional[List[str]]): List of unique ID for the texts. Returns: List[str]: List of unique ID representing the added texts. """# creating unique ids if NoneifidsisNone:ids=[str(uuid.uuid4())for_intexts]texts=list(texts)ifself._embedding_functionandembeddingsisNoneandtexts:embeddings=self._embedding_function.embed_documents(texts)ifmetadatas:length_diff=len(texts)-len(metadatas)iflength_diff:metadatas=metadatas+[{}]*length_diffempty_ids=[]non_empty_ids=[]foridx,metadatainenumerate(metadatas):ifmetadata:non_empty_ids.append(idx)else:empty_ids.append(idx)ifnon_empty_ids:metadatas=[metadatas[idx]foridxinnon_empty_ids]texts_with_metadatas=[texts[idx]foridxinnon_empty_ids]embeddings_with_metadatas=([embeddings[idx]foridxinnon_empty_ids]ifembeddingselseNone)ids_with_metadata=[ids[idx]foridxinnon_empty_ids]self._cluster.upsert(embeddings=embeddings_with_metadatas,metadatas=metadatas,documents=texts_with_metadatas,ids=ids_with_metadata,)ifempty_ids:texts_without_metadatas=[texts[j]forjinempty_ids]embeddings_without_metadatas=([embeddings[j]forjinempty_ids]ifembeddingselseNone)ids_without_metadatas=[ids[j]forjinempty_ids]self._cluster.upsert(embeddings=embeddings_without_metadatas,documents=texts_without_metadatas,ids=ids_without_metadatas,)else:metadatas=[{}]*len(texts)self._cluster.upsert(embeddings=embeddings,documents=texts,metadatas=metadatas,ids=ids,)returnids
[docs]defsimilarity_search(self,query:str,k:int=DEFAULT_K,where:Optional[Dict[str,str]]=None,**kwargs:Any,)->List[Document]:""" Run a similarity search with Bagel. Args: query (str): The query text to search for similar documents/texts. k (int): The number of results to return. where (Optional[Dict[str, str]]): Metadata filters to narrow down. Returns: List[Document]: List of documents objects representing the documents most similar to the query text. """docs_and_scores=self.similarity_search_with_score(query,k,where=where)return[docfordoc,_indocs_and_scores]
[docs]defsimilarity_search_with_score(self,query:str,k:int=DEFAULT_K,where:Optional[Dict[str,str]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:""" Run a similarity search with Bagel and return documents with their corresponding similarity scores. Args: query (str): The query text to search for similar documents. k (int): The number of results to return. where (Optional[Dict[str, str]]): Filter using metadata. Returns: List[Tuple[Document, float]]: List of tuples, each containing a Document object representing a similar document and its corresponding similarity score. """results=self.__query_cluster(query_texts=[query],n_results=k,where=where)return_results_to_docs_and_scores(results)
[docs]@classmethoddeffrom_texts(cls:Type[Bagel],texts:List[str],embedding:Optional[Embeddings]=None,metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,cluster_name:str=_LANGCHAIN_DEFAULT_CLUSTER_NAME,client_settings:Optional[bagel.config.Settings]=None,cluster_metadata:Optional[Dict]=None,client:Optional[bagel.Client]=None,text_embeddings:Optional[List[List[float]]]=None,**kwargs:Any,)->Bagel:""" Create and initialize a Bagel instance from list of texts. Args: texts (List[str]): List of text content to be added. cluster_name (str): The name of the Bagel cluster. client_settings (Optional[bagel.config.Settings]): Client settings. cluster_metadata (Optional[Dict]): Metadata of the cluster. embeddings (Optional[Embeddings]): List of embedding. metadatas (Optional[List[dict]]): List of metadata. ids (Optional[List[str]]): List of unique ID. Defaults to None. client (Optional[bagel.Client]): Bagel client instance. Returns: Bagel: Bagel vectorstore. """bagel_cluster=cls(cluster_name=cluster_name,embedding_function=embedding,client_settings=client_settings,client=client,cluster_metadata=cluster_metadata,**kwargs,)_=bagel_cluster.add_texts(texts=texts,embeddings=text_embeddings,metadatas=metadatas,ids=ids)returnbagel_cluster
[docs]defdelete_cluster(self)->None:"""Delete the cluster."""self._client.delete_cluster(self._cluster.name)
[docs]defsimilarity_search_by_vector_with_relevance_scores(self,query_embeddings:List[float],k:int=DEFAULT_K,where:Optional[Dict[str,str]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:""" Return docs most similar to embedding vector and similarity score. """results=self.__query_cluster(query_embeddings=query_embeddings,n_results=k,where=where)return_results_to_docs_and_scores(results)
[docs]defsimilarity_search_by_vector(self,embedding:List[float],k:int=DEFAULT_K,where:Optional[Dict[str,str]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to embedding vector."""results=self.__query_cluster(query_embeddings=embedding,n_results=k,where=where)return_results_to_docs(results)
def_select_relevance_score_fn(self)->Callable[[float],float]:""" Select and return the appropriate relevance score function based on the distance metric used in the Bagel cluster. """ifself.override_relevance_score_fn:returnself.override_relevance_score_fndistance="l2"distance_key="hnsw:space"metadata=self._cluster.metadataifmetadataanddistance_keyinmetadata:distance=metadata[distance_key]ifdistance=="cosine":returnself._cosine_relevance_score_fnelifdistance=="l2":returnself._euclidean_relevance_score_fnelifdistance=="ip":returnself._max_inner_product_relevance_score_fnelse:raiseValueError("No supported normalization function for distance"f" metric of type: {distance}. Consider providing"" relevance_score_fn to Bagel constructor.")
[docs]@classmethoddeffrom_documents(cls:Type[Bagel],documents:List[Document],embedding:Optional[Embeddings]=None,ids:Optional[List[str]]=None,cluster_name:str=_LANGCHAIN_DEFAULT_CLUSTER_NAME,client_settings:Optional[bagel.config.Settings]=None,client:Optional[bagel.Client]=None,cluster_metadata:Optional[Dict]=None,**kwargs:Any,)->Bagel:""" Create a Bagel vectorstore from a list of documents. Args: documents (List[Document]): List of Document objects to add to the Bagel vectorstore. embedding (Optional[List[float]]): List of embedding. ids (Optional[List[str]]): List of IDs. Defaults to None. cluster_name (str): The name of the Bagel cluster. client_settings (Optional[bagel.config.Settings]): Client settings. client (Optional[bagel.Client]): Bagel client instance. cluster_metadata (Optional[Dict]): Metadata associated with the Bagel cluster. Defaults to None. Returns: Bagel: Bagel vectorstore. """texts=[doc.page_contentfordocindocuments]metadatas=[doc.metadatafordocindocuments]returncls.from_texts(texts=texts,embedding=embedding,metadatas=metadatas,ids=ids,cluster_name=cluster_name,client_settings=client_settings,client=client,cluster_metadata=cluster_metadata,**kwargs,)
[docs]defupdate_document(self,document_id:str,document:Document)->None:"""Update a document in the cluster. Args: document_id (str): ID of the document to update. document (Document): Document to update. """text=document.page_contentmetadata=document.metadataself._cluster.update(ids=[document_id],documents=[text],metadatas=[metadata],)
[docs]defget(self,ids:Optional[OneOrMany[ID]]=None,where:Optional[Where]=None,limit:Optional[int]=None,offset:Optional[int]=None,where_document:Optional[WhereDocument]=None,include:Optional[List[str]]=None,)->Dict[str,Any]:"""Gets the collection."""kwargs={"ids":ids,"where":where,"limit":limit,"offset":offset,"where_document":where_document,}ifincludeisnotNone:kwargs["include"]=includereturnself._cluster.get(**kwargs)
[docs]defdelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->None:""" Delete by IDs. Args: ids: List of ids to delete. """self._cluster.delete(ids=ids)