[docs]@deprecated(since="0.0.25",removal="1.0",alternative_import="langchain_mongodb.MongoDBAtlasVectorSearch",)classMongoDBAtlasVectorSearch(VectorStore):"""`MongoDB Atlas Vector Search` vector store. To use, you should have both: - the ``pymongo`` python package installed - a connection string associated with a MongoDB Atlas Cluster having deployed an Atlas Search index Example: .. code-block:: python from langchain_community.vectorstores import MongoDBAtlasVectorSearch from langchain_community.embeddings.openai import OpenAIEmbeddings from pymongo import MongoClient mongo_client = MongoClient("<YOUR-CONNECTION-STRING>") collection = mongo_client["<db_name>"]["<collection_name>"] embeddings = OpenAIEmbeddings() vectorstore = MongoDBAtlasVectorSearch(collection, embeddings) """
[docs]def__init__(self,collection:Collection[MongoDBDocumentType],embedding:Embeddings,*,index_name:str="default",text_key:str="text",embedding_key:str="embedding",relevance_score_fn:str="cosine",):""" Args: collection: MongoDB collection to add the texts to. embedding: Text embedding model to use. text_key: MongoDB field that will contain the text for each document. embedding_key: MongoDB field that will contain the embedding for each document. index_name: Name of the Atlas Search index. relevance_score_fn: The similarity score used for the index. Currently supported: Euclidean, cosine, and dot product. """self._collection=collectionself._embedding=embeddingself._index_name=index_nameself._text_key=text_keyself._embedding_key=embedding_keyself._relevance_score_fn=relevance_score_fn
@propertydefembeddings(self)->Embeddings:returnself._embeddingdef_select_relevance_score_fn(self)->Callable[[float],float]:ifself._relevance_score_fn=="euclidean":returnself._euclidean_relevance_score_fnelifself._relevance_score_fn=="dotProduct":returnself._max_inner_product_relevance_score_fnelifself._relevance_score_fn=="cosine":returnself._cosine_relevance_score_fnelse:raiseNotImplementedError(f"No relevance score function for ${self._relevance_score_fn}")
[docs]@classmethoddeffrom_connection_string(cls,connection_string:str,namespace:str,embedding:Embeddings,**kwargs:Any,)->MongoDBAtlasVectorSearch:"""Construct a `MongoDB Atlas Vector Search` vector store from a MongoDB connection URI. Args: connection_string: A valid MongoDB connection URI. namespace: A valid MongoDB namespace (database and collection). embedding: The text embedding model to use for the vector store. Returns: A new MongoDBAtlasVectorSearch instance. """try:fromimportlib.metadataimportversionfrompymongoimportMongoClientfrompymongo.driver_infoimportDriverInfoexceptImportError:raiseImportError("Could not import pymongo, please install it with ""`pip install pymongo`.")client:MongoClient=MongoClient(connection_string,driver=DriverInfo(name="Langchain",version=version("langchain")),)db_name,collection_name=namespace.split(".")collection=client[db_name][collection_name]returncls(collection,embedding,**kwargs)
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[Dict[str,Any]]]=None,**kwargs:Any,)->List:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. Returns: List of ids from adding the texts into the vectorstore. """batch_size=kwargs.get("batch_size",DEFAULT_INSERT_BATCH_SIZE)_metadatas:Union[List,Generator]=metadatasor({}for_intexts)texts_batch=[]metadatas_batch=[]result_ids=[]fori,(text,metadata)inenumerate(zip(texts,_metadatas)):texts_batch.append(text)metadatas_batch.append(metadata)if(i+1)%batch_size==0:result_ids.extend(self._insert_texts(texts_batch,metadatas_batch))texts_batch=[]metadatas_batch=[]iftexts_batch:result_ids.extend(self._insert_texts(texts_batch,metadatas_batch))returnresult_ids
def_insert_texts(self,texts:List[str],metadatas:List[Dict[str,Any]])->List:ifnottexts:return[]# Embed and create the documentsembeddings=self._embedding.embed_documents(texts)to_insert=[{self._text_key:t,self._embedding_key:embedding,**m}fort,m,embeddinginzip(texts,metadatas,embeddings)]# insert the documents in MongoDB Atlasinsert_result=self._collection.insert_many(to_insert)# type: ignorereturninsert_result.inserted_idsdef_similarity_search_with_score(self,embedding:List[float],k:int=4,pre_filter:Optional[Dict]=None,post_filter_pipeline:Optional[List[Dict]]=None,)->List[Tuple[Document,float]]:params={"queryVector":embedding,"path":self._embedding_key,"numCandidates":k*10,"limit":k,"index":self._index_name,}ifpre_filter:params["filter"]=pre_filterquery={"$vectorSearch":params}pipeline=[query,{"$set":{"score":{"$meta":"vectorSearchScore"}}},]ifpost_filter_pipelineisnotNone:pipeline.extend(post_filter_pipeline)cursor=self._collection.aggregate(pipeline)# type: ignore[arg-type]docs=[]forresincursor:text=res.pop(self._text_key)score=res.pop("score")docs.append((Document(page_content=text,metadata=res),score))returndocs
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,pre_filter:Optional[Dict]=None,post_filter_pipeline:Optional[List[Dict]]=None,)->List[Tuple[Document,float]]:"""Return MongoDB documents most similar to the given query and their scores. Uses the vectorSearch operator available in MongoDB Atlas Search. For more: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ Args: query: Text to look up documents similar to. k: (Optional) number of documents to return. Defaults to 4. pre_filter: (Optional) dictionary of argument(s) to prefilter document fields on. post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages following the vectorSearch stage. Returns: List of documents most similar to the query and their scores. """embedding=self._embedding.embed_query(query)docs=self._similarity_search_with_score(embedding,k=k,pre_filter=pre_filter,post_filter_pipeline=post_filter_pipeline,)returndocs
[docs]defsimilarity_search(self,query:str,k:int=4,pre_filter:Optional[Dict]=None,post_filter_pipeline:Optional[List[Dict]]=None,**kwargs:Any,)->List[Document]:"""Return MongoDB documents most similar to the given query. Uses the vectorSearch operator available in MongoDB Atlas Search. For more: https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-stage/ Args: query: Text to look up documents similar to. k: (Optional) number of documents to return. Defaults to 4. pre_filter: (Optional) dictionary of argument(s) to prefilter document fields on. post_filter_pipeline: (Optional) Pipeline of MongoDB aggregation stages following the vectorSearch stage. Returns: List of documents most similar to the query and their scores. """additional=kwargs.get("additional")docs_and_scores=self.similarity_search_with_score(query,k=k,pre_filter=pre_filter,post_filter_pipeline=post_filter_pipeline,)ifadditionaland"similarity_score"inadditional:fordoc,scoreindocs_and_scores:doc.metadata["score"]=scorereturn[docfordoc,_indocs_and_scores]
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,pre_filter:Optional[Dict]=None,post_filter_pipeline:Optional[List[Dict]]=None,**kwargs:Any,)->List[Document]:"""Return documents selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: Text to look up documents similar to. k: (Optional) number of documents to return. Defaults to 4. fetch_k: (Optional) number of documents to fetch before passing to MMR algorithm. Defaults to 20. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. pre_filter: (Optional) dictionary of argument(s) to prefilter on document fields. post_filter_pipeline: (Optional) pipeline of MongoDB aggregation stages following the vectorSearch stage. Returns: List of documents selected by maximal marginal relevance. """query_embedding=self._embedding.embed_query(query)docs=self._similarity_search_with_score(query_embedding,k=fetch_k,pre_filter=pre_filter,post_filter_pipeline=post_filter_pipeline,)mmr_doc_indexes=maximal_marginal_relevance(np.array(query_embedding),[doc.metadata[self._embedding_key]fordoc,_indocs],k=k,lambda_mult=lambda_mult,)mmr_docs=[docs[i][0]foriinmmr_doc_indexes]returnmmr_docs
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[Dict]]=None,collection:Optional[Collection[MongoDBDocumentType]]=None,**kwargs:Any,)->MongoDBAtlasVectorSearch:"""Construct a `MongoDB Atlas Vector Search` vector store from raw documents. This is a user-friendly interface that: 1. Embeds documents. 2. Adds the documents to a provided MongoDB Atlas Vector Search index (Lucene) This is intended to be a quick way to get started. Example: .. code-block:: python from pymongo import MongoClient from langchain_community.vectorstores import MongoDBAtlasVectorSearch from langchain_community.embeddings import OpenAIEmbeddings mongo_client = MongoClient("<YOUR-CONNECTION-STRING>") collection = mongo_client["<db_name>"]["<collection_name>"] embeddings = OpenAIEmbeddings() vectorstore = MongoDBAtlasVectorSearch.from_texts( texts, embeddings, metadatas=metadatas, collection=collection ) """ifcollectionisNone:raiseValueError("Must provide 'collection' named parameter.")vectorstore=cls(collection,embedding,**kwargs)vectorstore.add_texts(texts,metadatas=metadatas)returnvectorstore