Source code for langchain_community.vectorstores.epsilla
"""Wrapper around Epsilla vector database."""from__future__importannotationsimportloggingimportuuidfromtypingimportTYPE_CHECKING,Any,Iterable,List,Optional,Typefromlangchain_core.documentsimportDocumentfromlangchain_core.embeddingsimportEmbeddingsfromlangchain_core.vectorstoresimportVectorStoreifTYPE_CHECKING:frompyepsillaimportvectordblogger=logging.getLogger()
[docs]classEpsilla(VectorStore):""" Wrapper around Epsilla vector database. As a prerequisite, you need to install ``pyepsilla`` package and have a running Epsilla vector database (for example, through our docker image) See the following documentation for how to run an Epsilla vector database: https://epsilla-inc.gitbook.io/epsilladb/quick-start Args: client (Any): Epsilla client to connect to. embeddings (Embeddings): Function used to embed the texts. db_path (Optional[str]): The path where the database will be persisted. Defaults to "/tmp/langchain-epsilla". db_name (Optional[str]): Give a name to the loaded database. Defaults to "langchain_store". Example: .. code-block:: python from langchain_community.vectorstores import Epsilla from pyepsilla import vectordb client = vectordb.Client() embeddings = OpenAIEmbeddings() db_path = "/tmp/vectorstore" db_name = "langchain_store" epsilla = Epsilla(client, embeddings, db_path, db_name) """_LANGCHAIN_DEFAULT_DB_NAME:str="langchain_store"_LANGCHAIN_DEFAULT_DB_PATH:str="/tmp/langchain-epsilla"_LANGCHAIN_DEFAULT_TABLE_NAME:str="langchain_collection"
[docs]def__init__(self,client:Any,embeddings:Embeddings,db_path:Optional[str]=_LANGCHAIN_DEFAULT_DB_PATH,db_name:Optional[str]=_LANGCHAIN_DEFAULT_DB_NAME,):"""Initialize with necessary components."""try:importpyepsillaexceptImportErrorase:raiseImportError("Could not import pyepsilla python package. ""Please install pyepsilla package with `pip install pyepsilla`.")fromeifnotisinstance(client,(pyepsilla.vectordb.Client,pyepsilla.cloud.client.Vectordb)):raiseTypeError("client should be an instance of pyepsilla.vectordb.Client or "f"pyepsilla.cloud.client.Vectordb, got {type(client)}")self._client:vectordb.Client=clientself._db_name=db_nameself._embeddings=embeddingsself._collection_name=Epsilla._LANGCHAIN_DEFAULT_TABLE_NAMEself._client.load_db(db_name=db_name,db_path=db_path)self._client.use_db(db_name=db_name)
[docs]defuse_collection(self,collection_name:str)->None:""" Set default collection to use. Args: collection_name (str): The name of the collection. """self._collection_name=collection_name
[docs]defclear_data(self,collection_name:str="")->None:""" Clear data in a collection. Args: collection_name (Optional[str]): The name of the collection. If not provided, the default collection will be used. """ifnotcollection_name:collection_name=self._collection_nameself._client.drop_table(collection_name)
[docs]defget(self,collection_name:str="",response_fields:Optional[List[str]]=None)->List[dict]:"""Get the collection. Args: collection_name (Optional[str]): The name of the collection to retrieve data from. If not provided, the default collection will be used. response_fields (Optional[List[str]]): List of field names in the result. If not specified, all available fields will be responded. Returns: A list of the retrieved data. """ifnotcollection_name:collection_name=self._collection_namestatus_code,response=self._client.get(table_name=collection_name,response_fields=response_fields)ifstatus_code!=200:logger.error(f"Failed to get records: {response['message']}")raiseException("Error: {}.".format(response["message"]))returnresponse["result"]
def_create_collection(self,table_name:str,embeddings:list,metadatas:Optional[list[dict]]=None)->None:ifnotembeddings:raiseValueError("Embeddings list is empty.")dim=len(embeddings[0])fields:List[dict]=[{"name":"id","dataType":"INT"},{"name":"text","dataType":"STRING"},{"name":"embeddings","dataType":"VECTOR_FLOAT","dimensions":dim},]ifmetadatasisnotNone:field_names=[field["name"]forfieldinfields]formetadatainmetadatas:forkey,valueinmetadata.items():ifkeyinfield_names:continued_type:strifisinstance(value,str):d_type="STRING"elifisinstance(value,int):d_type="INT"elifisinstance(value,float):d_type="FLOAT"elifisinstance(value,bool):d_type="BOOL"else:raiseValueError(f"Unsupported data type for {key}.")fields.append({"name":key,"dataType":d_type})field_names.append(key)status_code,response=self._client.create_table(table_name,table_fields=fields)ifstatus_code!=200:ifstatus_code==409:logger.info(f"Continuing with the existing table {table_name}.")else:logger.error(f"Failed to create collection {table_name}: {response['message']}")raiseException("Error: {}.".format(response["message"]))
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,collection_name:Optional[str]="",drop_old:Optional[bool]=False,**kwargs:Any,)->List[str]:""" Embed texts and add them to the database. Args: texts (Iterable[str]): The texts to embed. metadatas (Optional[List[dict]]): Metadata dicts attached to each of the texts. Defaults to None. collection_name (Optional[str]): Which collection to use. Defaults to "langchain_collection". If provided, default collection name will be set as well. drop_old (Optional[bool]): Whether to drop the previous collection and create a new one. Defaults to False. Returns: List of ids of the added texts. """ifnotcollection_name:collection_name=self._collection_nameelse:self._collection_name=collection_nameifdrop_old:self._client.drop_db(db_name=collection_name)texts=list(texts)try:embeddings=self._embeddings.embed_documents(texts)exceptNotImplementedError:embeddings=[self._embeddings.embed_query(x)forxintexts]iflen(embeddings)==0:logger.debug("Nothing to insert, skipping.")return[]self._create_collection(table_name=collection_name,embeddings=embeddings,metadatas=metadatas)ids=[hash(uuid.uuid4())for_intexts]records=[]forindex,idinenumerate(ids):record={"id":id,"text":texts[index],"embeddings":embeddings[index],}ifmetadatasisnotNone:metadata=metadatas[index].items()forkey,valueinmetadata:record[key]=valuerecords.append(record)status_code,response=self._client.insert(table_name=collection_name,records=records)ifstatus_code!=200:logger.error(f"Failed to add records to {collection_name}: {response['message']}")raiseException("Error: {}.".format(response["message"]))return[str(id)foridinids]
[docs]defsimilarity_search(self,query:str,k:int=4,collection_name:str="",**kwargs:Any)->List[Document]:""" Return the documents that are semantically most relevant to the query. Args: query (str): String to query the vectorstore with. k (Optional[int]): Number of documents to return. Defaults to 4. collection_name (Optional[str]): Collection to use. Defaults to "langchain_store" or the one provided before. Returns: List of documents that are semantically most relevant to the query """ifnotcollection_name:collection_name=self._collection_namequery_vector=self._embeddings.embed_query(query)status_code,response=self._client.query(table_name=collection_name,query_field="embeddings",query_vector=query_vector,limit=k,)ifstatus_code!=200:logger.error(f"Search failed: {response['message']}.")raiseException("Error: {}.".format(response["message"]))exclude_keys=["id","text","embeddings"]returnlist(map(lambdaitem:Document(page_content=item["text"],metadata={key:item[key]forkeyinitemifkeynotinexclude_keys},),response["result"],))
[docs]@classmethoddeffrom_texts(cls:Type[Epsilla],texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,client:Any=None,db_path:Optional[str]=_LANGCHAIN_DEFAULT_DB_PATH,db_name:Optional[str]=_LANGCHAIN_DEFAULT_DB_NAME,collection_name:Optional[str]=_LANGCHAIN_DEFAULT_TABLE_NAME,drop_old:Optional[bool]=False,**kwargs:Any,)->Epsilla:"""Create an Epsilla vectorstore from raw documents. Args: texts (List[str]): List of text data to be inserted. embeddings (Embeddings): Embedding function. client (pyepsilla.vectordb.Client): Epsilla client to connect to. metadatas (Optional[List[dict]]): Metadata for each text. Defaults to None. db_path (Optional[str]): The path where the database will be persisted. Defaults to "/tmp/langchain-epsilla". db_name (Optional[str]): Give a name to the loaded database. Defaults to "langchain_store". collection_name (Optional[str]): Which collection to use. Defaults to "langchain_collection". If provided, default collection name will be set as well. drop_old (Optional[bool]): Whether to drop the previous collection and create a new one. Defaults to False. Returns: Epsilla: Epsilla vector store. """instance=Epsilla(client,embedding,db_path=db_path,db_name=db_name)instance.add_texts(texts,metadatas=metadatas,collection_name=collection_name,drop_old=drop_old,**kwargs,)returninstance
[docs]@classmethoddeffrom_documents(cls:Type[Epsilla],documents:List[Document],embedding:Embeddings,client:Any=None,db_path:Optional[str]=_LANGCHAIN_DEFAULT_DB_PATH,db_name:Optional[str]=_LANGCHAIN_DEFAULT_DB_NAME,collection_name:Optional[str]=_LANGCHAIN_DEFAULT_TABLE_NAME,drop_old:Optional[bool]=False,**kwargs:Any,)->Epsilla:"""Create an Epsilla vectorstore from a list of documents. Args: texts (List[str]): List of text data to be inserted. embeddings (Embeddings): Embedding function. client (pyepsilla.vectordb.Client): Epsilla client to connect to. metadatas (Optional[List[dict]]): Metadata for each text. Defaults to None. db_path (Optional[str]): The path where the database will be persisted. Defaults to "/tmp/langchain-epsilla". db_name (Optional[str]): Give a name to the loaded database. Defaults to "langchain_store". collection_name (Optional[str]): Which collection to use. Defaults to "langchain_collection". If provided, default collection name will be set as well. drop_old (Optional[bool]): Whether to drop the previous collection and create a new one. Defaults to False. Returns: Epsilla: Epsilla vector store. """texts=[doc.page_contentfordocindocuments]metadatas=[doc.metadatafordocindocuments]returncls.from_texts(texts,embedding,metadatas=metadatas,client=client,db_path=db_path,db_name=db_name,collection_name=collection_name,drop_old=drop_old,**kwargs,)