[docs]classAtlasDB(VectorStore):"""`Atlas` vector store. Atlas is the `Nomic's` neural database and `rhizomatic` instrument. To use, you should have the ``nomic`` python package installed. Example: .. code-block:: python from langchain_community.vectorstores import AtlasDB from langchain_community.embeddings.openai import OpenAIEmbeddings embeddings = OpenAIEmbeddings() vectorstore = AtlasDB("my_project", embeddings.embed_query) """_ATLAS_DEFAULT_ID_FIELD:str="atlas_id"
[docs]def__init__(self,name:str,embedding_function:Optional[Embeddings]=None,api_key:Optional[str]=None,description:str="A description for your project",is_public:bool=True,reset_project_if_exists:bool=False,)->None:""" Initialize the Atlas Client Args: name (str): The name of your project. If the project already exists, it will be loaded. embedding_function (Optional[Embeddings]): An optional function used for embedding your data. If None, data will be embedded with Nomic's embed model. api_key (str): Your nomic API key description (str): A description for your project. is_public (bool): Whether your project is publicly accessible. True by default. reset_project_if_exists (bool): Whether to reset this project if it already exists. Default False. Generally useful during development and testing. """try:importnomicfromnomicimportAtlasProjectexceptImportError:raiseImportError("Could not import nomic python package. ""Please install it with `pip install nomic`.")ifapi_keyisNone:raiseValueError("No API key provided. Sign up at atlas.nomic.ai!")nomic.login(api_key)self._embedding_function=embedding_functionmodality="text"ifself._embedding_functionisnotNone:modality="embedding"# Check if the project exists, create it if notself.project=AtlasProject(name=name,description=description,modality=modality,is_public=is_public,reset_project_if_exists=reset_project_if_exists,unique_id_field=AtlasDB._ATLAS_DEFAULT_ID_FIELD,)self.project._latest_project_state()
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,refresh:bool=True,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts (Iterable[str]): Texts to add to the vectorstore. metadatas (Optional[List[dict]], optional): Optional list of metadatas. ids (Optional[List[str]]): An optional list of ids. refresh(bool): Whether or not to refresh indices with the updated data. Default True. Returns: List[str]: List of IDs of the added texts. """if(metadatasisnotNoneandlen(metadatas)>0and"text"inmetadatas[0].keys()):raiseValueError("Cannot accept key text in metadata!")texts=list(texts)ifidsisNone:ids=[str(uuid.uuid4())for_intexts]# Embedding upload caseifself._embedding_functionisnotNone:_embeddings=self._embedding_function.embed_documents(texts)embeddings=np.stack(_embeddings)ifmetadatasisNone:data=[{AtlasDB._ATLAS_DEFAULT_ID_FIELD:ids[i],"text":texts[i]}fori,_inenumerate(texts)]else:foriinrange(len(metadatas)):metadatas[i][AtlasDB._ATLAS_DEFAULT_ID_FIELD]=ids[i]metadatas[i]["text"]=texts[i]data=metadatasself.project._validate_map_data_inputs([],id_field=AtlasDB._ATLAS_DEFAULT_ID_FIELD,data=data)withself.project.wait_for_project_lock():self.project.add_embeddings(embeddings=embeddings,data=data)# Text upload caseelse:ifmetadatasisNone:data=[{"text":text,AtlasDB._ATLAS_DEFAULT_ID_FIELD:ids[i]}fori,textinenumerate(texts)]else:fori,textinenumerate(texts):metadatas[i]["text"]=textsmetadatas[i][AtlasDB._ATLAS_DEFAULT_ID_FIELD]=ids[i]data=metadatasself.project._validate_map_data_inputs([],id_field=AtlasDB._ATLAS_DEFAULT_ID_FIELD,data=data)withself.project.wait_for_project_lock():self.project.add_text(data)ifrefresh:iflen(self.project.indices)>0:withself.project.wait_for_project_lock():self.project.rebuild_maps()returnids
[docs]defcreate_index(self,**kwargs:Any)->Any:"""Creates an index in your project. See https://docs.nomic.ai/atlas_api.html#nomic.project.AtlasProject.create_index for full detail. """withself.project.wait_for_project_lock():returnself.project.create_index(**kwargs)
[docs]defsimilarity_search(self,query:str,k:int=4,**kwargs:Any,)->List[Document]:"""Run similarity search with AtlasDB Args: query (str): Query text to search for. k (int): Number of results to return. Defaults to 4. Returns: List[Document]: List of documents most similar to the query text. """ifself._embedding_functionisNone:raiseNotImplementedError("AtlasDB requires an embedding_function for text similarity search!")_embedding=self._embedding_function.embed_documents([query])[0]embedding=np.array(_embedding).reshape(1,-1)withself.project.wait_for_project_lock():neighbors,_=self.project.projections[0].vector_search(queries=embedding,k=k)data=self.project.get_data(ids=neighbors[0])docs=[Document(page_content=data[i]["text"],metadata=data[i])fori,neighborinenumerate(neighbors)]returndocs
[docs]@classmethoddeffrom_texts(cls:Type[AtlasDB],texts:List[str],embedding:Optional[Embeddings]=None,metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,name:Optional[str]=None,api_key:Optional[str]=None,description:str="A description for your project",is_public:bool=True,reset_project_if_exists:bool=False,index_kwargs:Optional[dict]=None,**kwargs:Any,)->AtlasDB:"""Create an AtlasDB vectorstore from a raw documents. Args: texts (List[str]): The list of texts to ingest. name (str): Name of the project to create. api_key (str): Your nomic API key, embedding (Optional[Embeddings]): Embedding function. Defaults to None. metadatas (Optional[List[dict]]): List of metadatas. Defaults to None. ids (Optional[List[str]]): Optional list of document IDs. If None, ids will be auto created description (str): A description for your project. is_public (bool): Whether your project is publicly accessible. True by default. reset_project_if_exists (bool): Whether to reset this project if it already exists. Default False. Generally useful during development and testing. index_kwargs (Optional[dict]): Dict of kwargs for index creation. See https://docs.nomic.ai/atlas_api.html Returns: AtlasDB: Nomic's neural database and finest rhizomatic instrument """ifnameisNoneorapi_keyisNone:raiseValueError("`name` and `api_key` cannot be None.")# Inject relevant kwargsall_index_kwargs={"name":name+"_index","indexed_field":"text"}ifindex_kwargsisnotNone:fork,vinindex_kwargs.items():all_index_kwargs[k]=v# Build projectatlasDB=cls(name,embedding_function=embedding,api_key=api_key,description="A description for your project",is_public=is_public,reset_project_if_exists=reset_project_if_exists,)withatlasDB.project.wait_for_project_lock():atlasDB.add_texts(texts=texts,metadatas=metadatas,ids=ids)atlasDB.create_index(**all_index_kwargs)returnatlasDB
[docs]@classmethoddeffrom_documents(cls:Type[AtlasDB],documents:List[Document],embedding:Optional[Embeddings]=None,ids:Optional[List[str]]=None,name:Optional[str]=None,api_key:Optional[str]=None,persist_directory:Optional[str]=None,description:str="A description for your project",is_public:bool=True,reset_project_if_exists:bool=False,index_kwargs:Optional[dict]=None,**kwargs:Any,)->AtlasDB:"""Create an AtlasDB vectorstore from a list of documents. Args: name (str): Name of the collection to create. api_key (str): Your nomic API key, documents (List[Document]): List of documents to add to the vectorstore. embedding (Optional[Embeddings]): Embedding function. Defaults to None. ids (Optional[List[str]]): Optional list of document IDs. If None, ids will be auto created description (str): A description for your project. is_public (bool): Whether your project is publicly accessible. True by default. reset_project_if_exists (bool): Whether to reset this project if it already exists. Default False. Generally useful during development and testing. index_kwargs (Optional[dict]): Dict of kwargs for index creation. See https://docs.nomic.ai/atlas_api.html Returns: AtlasDB: Nomic's neural database and finest rhizomatic instrument """ifnameisNoneorapi_keyisNone:raiseValueError("`name` and `api_key` cannot be None.")texts=[doc.page_contentfordocindocuments]metadatas=[doc.metadatafordocindocuments]returncls.from_texts(name=name,api_key=api_key,texts=texts,embedding=embedding,metadatas=metadatas,ids=ids,description=description,is_public=is_public,reset_project_if_exists=reset_project_if_exists,index_kwargs=index_kwargs,)