Source code for langchain_azure_ai.vectorstores.azure_cosmos_db_no_sql
"""Vector Store for CosmosDB NoSql."""from__future__importannotationsimportuuidimportwarningsfromtypingimport(TYPE_CHECKING,Any,ClassVar,Collection,Dict,Iterable,List,Optional,Tuple,)importnumpyasnpfromlangchain_core.callbacksimportCallbackManagerForRetrieverRunfromlangchain_core.documentsimportDocumentfromlangchain_core.embeddingsimportEmbeddingsfromlangchain_core.vectorstoresimportVectorStore,VectorStoreRetrieverfrompydanticimportConfigDict,model_validatorfromlangchain_azure_ai.vectorstores.utilsimportmaximal_marginal_relevanceifTYPE_CHECKING:fromazure.cosmosimportContainerProxy,CosmosClientfromazure.identityimportDefaultAzureCredentialUSER_AGENT=("LangChain-CDBNoSql-VectorStore-Python",)
[docs]classAzureCosmosDBNoSqlVectorSearch(VectorStore):"""`Azure Cosmos DB for NoSQL` vector store. To use, you should have both: - the ``azure-cosmos`` python package installed You can read more about vector search, full text search and hybrid search using AzureCosmosDBNoSQL here: https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/vector-search https://learn.microsoft.com/en-us/azure/cosmos-db/gen-ai/full-text-search https://learn.microsoft.com/en-us/azure/cosmos-db/gen-ai/hybrid-search """
[docs]def__init__(self,*,cosmos_client:CosmosClient,embedding:Embeddings,vector_embedding_policy:Dict[str,Any],indexing_policy:Dict[str,Any],cosmos_container_properties:Dict[str,Any],cosmos_database_properties:Dict[str,Any],vector_search_fields:Dict[str,Any],full_text_policy:Optional[Dict[str,Any]]=None,database_name:str="vectorSearchDB",container_name:str="vectorSearchContainer",search_type:str="vector",metadata_key:str="metadata",create_container:bool=True,full_text_search_enabled:bool=False,table_alias:str="c",):"""Constructor for AzureCosmosDBNoSqlVectorSearch. Args: cosmos_client: Client used to connect to azure cosmosdb no sql account. database_name: Name of the database to be created. container_name: Name of the container to be created. embedding: Text embedding model to use. vector_embedding_policy: Vector Embedding Policy for the container. full_text_policy: Full Text Policy for the container. indexing_policy: Indexing Policy for the container. cosmos_container_properties: Container Properties for the container. cosmos_database_properties: Database Properties for the container. vector_search_fields: Vector Search Fields for the container. search_type: CosmosDB Search Type to be performed. metadata_key: Metadata key to use for data schema. create_container: Set to true if the container does not exist. full_text_search_enabled: Set to true if the full text search is enabled. table_alias: Alias for the table to use in the WHERE clause. """self._cosmos_client=cosmos_clientself._database_name=database_nameself._container_name=container_nameself._embedding=embeddingself._vector_embedding_policy=vector_embedding_policyself._full_text_policy=full_text_policyself._indexing_policy=indexing_policyself._cosmos_container_properties=cosmos_container_propertiesself._cosmos_database_properties=cosmos_database_propertiesself._vector_search_fields=vector_search_fieldsself._metadata_key=metadata_keyself._create_container=create_containerself._full_text_search_enabled=full_text_search_enabledself._search_type=search_typeself._table_alias=table_aliasifself._create_container:if(self._indexing_policy["vectorIndexes"]isNoneorlen(self._indexing_policy["vectorIndexes"])==0):raiseValueError("vectorIndexes cannot be null or empty in the indexing_policy.")if(self._vector_embedding_policyisNoneorlen(vector_embedding_policy["vectorEmbeddings"])==0):raiseValueError("vectorEmbeddings cannot be null ""or empty in the vector_embedding_policy.")ifself._cosmos_container_properties["partition_key"]isNone:raiseValueError("partition_key cannot be null or empty for a container.")ifself._full_text_search_enabled:if(self._indexing_policy["fullTextIndexes"]isNoneorlen(self._indexing_policy["fullTextIndexes"])==0):raiseValueError("fullTextIndexes cannot be null or empty in the ""indexing_policy if full text search is enabled.")if(self._full_text_policyisNoneorlen(self._full_text_policy["fullTextPaths"])==0):raiseValueError("fullTextPaths cannot be null or empty in the ""full_text_policy if full text search is enabled.")ifself._vector_search_fieldsisNone:raiseValueError("vectorSearchFields cannot be null or empty in the vector_search_fields."# noqa:E501)# Create the database if it already doesn't existself._database=self._cosmos_client.create_database_if_not_exists(id=self._database_name,offer_throughput=self._cosmos_database_properties.get("offer_throughput"),session_token=self._cosmos_database_properties.get("session_token"),initial_headers=self._cosmos_database_properties.get("initial_headers"),etag=self._cosmos_database_properties.get("etag"),match_condition=self._cosmos_database_properties.get("match_condition"),)# Create the collection if it already doesn't existself._container=self._database.create_container_if_not_exists(id=self._container_name,partition_key=self._cosmos_container_properties["partition_key"],indexing_policy=self._indexing_policy,default_ttl=self._cosmos_container_properties.get("default_ttl"),offer_throughput=self._cosmos_container_properties.get("offer_throughput"),unique_key_policy=self._cosmos_container_properties.get("unique_key_policy"),conflict_resolution_policy=self._cosmos_container_properties.get("conflict_resolution_policy"),analytical_storage_ttl=self._cosmos_container_properties.get("analytical_storage_ttl"),computed_properties=self._cosmos_container_properties.get("computed_properties"),etag=self._cosmos_container_properties.get("etag"),match_condition=self._cosmos_container_properties.get("match_condition"),session_token=self._cosmos_container_properties.get("session_token"),initial_headers=self._cosmos_container_properties.get("initial_headers"),vector_embedding_policy=self._vector_embedding_policy,full_text_policy=self._full_text_policy,)
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. **kwargs: Additional keyword arguments to pass to the embedding method. Returns: List of ids from adding the texts into the vectorstore. """_metadatas=list(metadatasifmetadatasisnotNoneelse({}for_intexts))returnself._insert_texts(list(texts),_metadatas)
def_insert_texts(self,texts:List[str],metadatas:List[Dict[str,Any]])->List[str]:"""Used to Load Documents into the collection. Args: texts: The list of documents strings to load metadatas: The list of metadata objects associated with each document Returns: List of ids from adding the texts into the vectorstore. """# If the texts is empty, throw an errorifnottexts:raiseException("Texts can not be null or empty")# Embed and create the documentsembeddings=self._embedding.embed_documents(texts)text_key=self._vector_search_fields["text_field"]embedding_key=self._vector_search_fields["embedding_field"]to_insert=[{"id":str(uuid.uuid4()),text_key:t,embedding_key:embedding,"metadata":m,}fort,m,embeddinginzip(texts,metadatas,embeddings)]# insert the documents in CosmosDB No Sqldoc_ids:List[str]=[]foriteminto_insert:created_doc=self._container.create_item(item)doc_ids.append(created_doc["id"])returndoc_ids@classmethoddef_from_kwargs(cls,embedding:Embeddings,*,cosmos_client:CosmosClient,vector_embedding_policy:Dict[str,Any],indexing_policy:Dict[str,Any],cosmos_container_properties:Dict[str,Any],cosmos_database_properties:Dict[str,Any],vector_search_fields:Dict[str,Any],full_text_policy:Optional[Dict[str,Any]]=None,database_name:str="vectorSearchDB",container_name:str="vectorSearchContainer",metadata_key:str="metadata",create_container:bool=True,full_text_search_enabled:bool=False,search_type:str="vector",**kwargs:Any,)->AzureCosmosDBNoSqlVectorSearch:ifkwargs:warnings.warn("Method 'from_texts' of AzureCosmosDBNoSql vector ""store invoked with "f"unsupported arguments "f"({', '.join(sorted(kwargs))}), ""which will be ignored.")returncls(embedding=embedding,cosmos_client=cosmos_client,vector_embedding_policy=vector_embedding_policy,full_text_policy=full_text_policy,indexing_policy=indexing_policy,cosmos_container_properties=cosmos_container_properties,cosmos_database_properties=cosmos_database_properties,database_name=database_name,container_name=container_name,vector_search_fields=vector_search_fields,metadata_key=metadata_key,create_container=create_container,full_text_search_enabled=full_text_search_enabled,search_type=search_type,)
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,**kwargs:Any,)->AzureCosmosDBNoSqlVectorSearch:"""Create an AzureCosmosDBNoSqlVectorSearch vectorstore from raw texts. Args: texts: the texts to insert. embedding: the embedding function to use in the store. metadatas: metadata dicts for the texts. **kwargs: you can pass any argument that you would to :meth:`~add_texts` and/or to the 'AstraDB' constructor (see these methods for details). These arguments will be routed to the respective methods as they are. Returns: an `AzureCosmosDBNoSqlVectorSearch` vectorstore. """vectorstore=AzureCosmosDBNoSqlVectorSearch._from_kwargs(embedding,**kwargs)vectorstore.add_texts(texts=texts,metadatas=metadatas,)returnvectorstore
[docs]@classmethoddeffrom_connection_string_and_aad(cls,connection_string:str,defaultAzureCredential:DefaultAzureCredential,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,**kwargs:Any,)->AzureCosmosDBNoSqlVectorSearch:"""Initialize an AzureCosmosDBNoSqlVectorSearch vectorstore."""cosmos_client=CosmosClient(connection_string,defaultAzureCredential,user_agent=USER_AGENT)kwargs["cosmos_client"]=cosmos_clientvectorstore=cls._from_kwargs(embedding,**kwargs)vectorstore.add_texts(texts=texts,metadatas=metadatas,)returnvectorstore
[docs]@classmethoddeffrom_connection_string_and_key(cls,connection_string:str,key:str,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,**kwargs:Any,)->AzureCosmosDBNoSqlVectorSearch:"""Initialize an AzureCosmosDBNoSqlVectorSearch vectorstore."""cosmos_client=CosmosClient(connection_string,key,user_agent=USER_AGENT)kwargs["cosmos_client"]=cosmos_clientvectorstore=cls._from_kwargs(embedding,**kwargs)vectorstore.add_texts(texts=texts,metadatas=metadatas,)returnvectorstore
[docs]defdelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->Optional[bool]:"""Removes the documents based on ids."""ifidsisNone:raiseValueError("No document ids provided to delete.")fordocument_idinids:self._container.delete_item(document_id,self._cosmos_container_properties["partition_key"])# noqa: E501returnTrue
[docs]defdelete_document_by_id(self,document_id:Optional[str]=None)->None:"""Removes a Specific Document by id. Args: document_id: The document identifier """ifdocument_idisNone:raiseValueError("No document ids provided to delete.")self._container.delete_item(document_id,partition_key=document_id)
[docs]defsimilarity_search(self,query:str,k:int=4,with_embedding:bool=False,search_type:Optional[str]="vector",offset_limit:Optional[str]=None,projection_mapping:Optional[Dict[str,Any]]=None,full_text_rank_filter:Optional[List[Dict[str,str]]]=None,where:Optional[str]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to query."""search_type=search_typeorself._search_typedocs_and_scores=self.similarity_search_with_score(query,k=k,with_embedding=with_embedding,search_type=search_type,offset_limit=offset_limit,projection_mapping=projection_mapping,full_text_rank_filter=full_text_rank_filter,where=where,kwargs=kwargs,)return[docfordoc,_indocs_and_scores]
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,with_embedding:bool=False,search_type:Optional[str]="vector",offset_limit:Optional[str]=None,full_text_rank_filter:Optional[List[Dict[str,str]]]=None,projection_mapping:Optional[Dict[str,Any]]=None,where:Optional[str]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Run similarity search with distance."""docs_and_scores=[]search_type=search_typeorself._search_typeifsearch_type=="vector":embeddings=self._embedding.embed_query(query)docs_and_scores=self.vector_search_with_score(search_type=search_type,embeddings=embeddings,k=k,with_embedding=with_embedding,offset_limit=offset_limit,projection_mapping=projection_mapping,where=where,)elifsearch_type=="full_text_search":docs_and_scores=self.full_text_search(k=k,search_type=search_type,offset_limit=offset_limit,projection_mapping=projection_mapping,where=where,)elifsearch_type=="full_text_ranking":docs_and_scores=self.full_text_ranking(k=k,search_type=search_type,offset_limit=offset_limit,full_text_rank_filter=full_text_rank_filter,projection_mapping=projection_mapping,where=where,)elifsearch_type=="hybrid":embeddings=self._embedding.embed_query(query)docs_and_scores=self.hybrid_search_with_score(search_type=search_type,embeddings=embeddings,k=k,with_embedding=with_embedding,offset_limit=offset_limit,full_text_rank_filter=full_text_rank_filter,projection_mapping=projection_mapping,where=where,)returndocs_and_scores
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,search_type:str="vector",with_embedding:bool=False,where:Optional[str]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance."""# noqa:E501docs=self.vector_search_with_score(embeddings=embedding,k=fetch_k,search_type=search_type,with_embedding=with_embedding,where=where,)# Re-ranks the docs using MMRmmr_doc_indexes=maximal_marginal_relevance(np.array(embedding),[doc.metadata[self._vector_search_fields["embedding_field"]]fordoc,_indocs],k=k,lambda_mult=lambda_mult,)mmr_docs=[docs[i][0]foriinmmr_doc_indexes]returnmmr_docs
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,search_type:str="vector",with_embedding:bool=False,where:Optional[str]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance."""# noqa:E501embeddings=self._embedding.embed_query(query)docs=self.max_marginal_relevance_search_by_vector(embeddings,k=k,fetch_k=fetch_k,lambda_mult=lambda_mult,search_type=search_type,with_embedding=with_embedding,where=where,)returndocs
[docs]defvector_search_with_score(self,search_type:str,embeddings:List[float],k:int=4,with_embedding:bool=False,offset_limit:Optional[str]=None,*,projection_mapping:Optional[Dict[str,Any]]=None,where:Optional[str]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Returns the most similar indexed documents to the embeddings."""# noqa:E501query,parameters=self._construct_query(k=k,search_type=search_type,embeddings=embeddings,offset_limit=offset_limit,projection_mapping=projection_mapping,with_embedding=with_embedding,where=where,)returnself._execute_query(query=query,search_type=search_type,parameters=parameters,with_embedding=with_embedding,projection_mapping=projection_mapping,)
[docs]deffull_text_search(self,search_type:str,k:int=4,offset_limit:Optional[str]=None,*,projection_mapping:Optional[Dict[str,Any]]=None,where:Optional[str]=None,)->List[Tuple[Document,float]]:"""Returns the documents based on the search text provided in the filters."""# noqa:E501query,parameters=self._construct_query(k=k,search_type=search_type,offset_limit=offset_limit,projection_mapping=projection_mapping,where=where,)returnself._execute_query(query=query,search_type=search_type,parameters=parameters,with_embedding=False,projection_mapping=projection_mapping,)
[docs]deffull_text_ranking(self,search_type:str,k:int=4,offset_limit:Optional[str]=None,*,projection_mapping:Optional[Dict[str,Any]]=None,full_text_rank_filter:Optional[List[Dict[str,str]]]=None,where:Optional[str]=None,)->List[Tuple[Document,float]]:"""Returns the documents based on the search text provided full text rank filters."""# noqa:E501query,parameters=self._construct_query(k=k,search_type=search_type,offset_limit=offset_limit,projection_mapping=projection_mapping,full_text_rank_filter=full_text_rank_filter,where=where,)returnself._execute_query(query=query,search_type=search_type,parameters=parameters,with_embedding=False,projection_mapping=projection_mapping,)
[docs]defhybrid_search_with_score(self,search_type:str,embeddings:List[float],k:int=4,with_embedding:bool=False,offset_limit:Optional[str]=None,*,projection_mapping:Optional[Dict[str,Any]]=None,full_text_rank_filter:Optional[List[Dict[str,str]]]=None,where:Optional[str]=None,)->List[Tuple[Document,float]]:"""Returns the documents based on the embeddings and text provided full text rank filters."""# noqa:E501query,parameters=self._construct_query(k=k,search_type=search_type,embeddings=embeddings,offset_limit=offset_limit,projection_mapping=projection_mapping,full_text_rank_filter=full_text_rank_filter,where=where,)returnself._execute_query(query=query,search_type=search_type,parameters=parameters,with_embedding=with_embedding,projection_mapping=projection_mapping,)
def_construct_query(self,k:int,search_type:str,embeddings:Optional[List[float]]=None,full_text_rank_filter:Optional[List[Dict[str,str]]]=None,offset_limit:Optional[str]=None,projection_mapping:Optional[Dict[str,Any]]=None,with_embedding:bool=False,where:Optional[str]=None,)->Tuple[str,List[Dict[str,Any]]]:ifsearch_type=="full_text_ranking"orsearch_type=="hybrid":query=f"SELECT {'TOP '+str(k)+' 'ifnotoffset_limitelse''}"else:query=f"""SELECT {'TOP @limit 'ifnotoffset_limitelse''}"""query+=self._generate_projection_fields(projection_mapping,search_type,embeddings,full_text_rank_filter,with_embedding,)table=self._table_aliasquery+=f" FROM {table} "# Add where_clause if specifiedifwhere:query+=f"WHERE {where}"# TODO: Update the code to use parameters once parametrized queries# are allowed for these query functionsifsearch_type=="full_text_ranking":iffull_text_rank_filterisNone:raiseValueError("full_text_rank_filter cannot be None for FULL_TEXT_RANK queries.")iflen(full_text_rank_filter)==1:query+=f""" ORDER BY RANK FullTextScore({table}.{full_text_rank_filter[0]["search_field"]}, [{", ".join(f"'{term}'"forterminfull_text_rank_filter[0]["search_text"].split())}])"""# noqa:E501else:rank_components=[f"FullTextScore({table}.{search_item['search_field']}, ["+", ".join(f"'{term}'"forterminsearch_item["search_text"].split())+"])"forsearch_iteminfull_text_rank_filter]query=f" ORDER BY RANK RRF({', '.join(rank_components)})"elifsearch_type=="vector":query+=" ORDER BY VectorDistance(c[@embeddingKey], @embeddings)"elifsearch_type=="hybrid":iffull_text_rank_filterisNone:raiseValueError("full_text_rank_filter cannot be None for HYBRID queries.")rank_components=[f"FullTextScore({table}.{search_item['search_field']}, ["+", ".join(f"'{term}'"forterminsearch_item["search_text"].split())+"])"forsearch_iteminfull_text_rank_filter]query+=f""" ORDER BY RANK RRF({', '.join(rank_components)}, VectorDistance({table}.{self._vector_search_fields["embedding_field"]}, {embeddings}))"""# noqa:E501else:query+=""# Add limit_offset_clause if specifiedifoffset_limitisnotNone:query+=f""" {offset_limit}"""# TODO: Remove this if check once parametrized queries# are allowed for these query functionsparameters=[]ifsearch_type=="full_text_search"orsearch_type=="vector":parameters=self._build_parameters(k=k,search_type=search_type,embeddings=embeddings,projection_mapping=projection_mapping,)returnquery,parametersdef_generate_projection_fields(self,projection_mapping:Optional[Dict[str,Any]],search_type:str,embeddings:Optional[List[float]]=None,full_text_rank_filter:Optional[List[Dict[str,str]]]=None,with_embedding:bool=False,)->str:# TODO: Remove the if check, lines 704-726 once parametrized queries# are supported for these query functions.table=self._table_aliasifsearch_type=="full_text_ranking"orsearch_type=="hybrid":ifprojection_mapping:projection=", ".join(f"{table}.{key} as {alias}"forkey,aliasinprojection_mapping.items())eliffull_text_rank_filter:projection=(table+".id, "+", ".join(f"{table}.{search_item['search_field']} "f"as {search_item['search_field']}"forsearch_iteminfull_text_rank_filter))else:projection=(f"{table}.id, {table}.{self._vector_search_fields['text_field']} ")f"as text, {table}.{self._metadata_key} as metadata"ifsearch_type=="hybrid":ifwith_embedding:projection+=f", {table}.{self._vector_search_fields['embedding_field']} as embedding"# noqa:E501projection+=(f", VectorDistance({table}.{self._vector_search_fields['embedding_field']}, "# noqa:E501f"{embeddings}) as SimilarityScore")else:ifprojection_mapping:projection=", ".join(f"{table}[@{key}] as {alias}"forkey,aliasinprojection_mapping.items())eliffull_text_rank_filter:projection=f"{table}.id"+", ".join(f"{table}.{search_item['search_field']} as {search_item['search_field']}"# noqa: E501forsearch_iteminfull_text_rank_filter)else:projection=f"{table}.id, {table}[@textKey] as text, {table}[@metadataKey] as metadata"# noqa: E501ifsearch_type=="vector":ifwith_embedding:projection+=f", {table}[@embeddingKey] as embedding"projection+=(f", VectorDistance({table}[@embeddingKey], ""@embeddings) as SimilarityScore")returnprojectiondef_build_parameters(self,k:int,search_type:str,embeddings:Optional[List[float]],projection_mapping:Optional[Dict[str,Any]]=None,)->List[Dict[str,Any]]:parameters:List[Dict[str,Any]]=[{"name":"@limit","value":k},]ifprojection_mapping:forkeyinprojection_mapping.keys():parameters.append({"name":f"@{key}","value":key})else:parameters.append({"name":"@textKey","value":self._vector_search_fields["text_field"]})parameters.append({"name":"@metadataKey","value":self._metadata_key})ifsearch_type=="vector":parameters.append({"name":"@embeddingKey","value":self._vector_search_fields["embedding_field"],})parameters.append({"name":"@embeddings","value":embeddings})returnparametersdef_execute_query(self,query:str,search_type:str,parameters:List[Dict[str,Any]],with_embedding:bool,projection_mapping:Optional[Dict[str,Any]],)->List[Tuple[Document,float]]:docs_and_scores=[]items=list(self._container.query_items(query=query,parameters=parameters,enable_cross_partition_query=True))foriteminitems:text=item[self._vector_search_fields["text_field"]]metadata=item.pop(self._metadata_key,{})score=0.0ifprojection_mapping:forkey,aliasinprojection_mapping.items():ifkey==self._vector_search_fields["text_field"]:continuemetadata[alias]=item[alias]else:metadata["id"]=item["id"]ifsearch_type=="vector"orsearch_type=="hybrid":score=item["SimilarityScore"]ifwith_embedding:metadata[self._vector_search_fields["embedding_field"]]=item[self._vector_search_fields["embedding_field"]]docs_and_scores.append((Document(page_content=text,metadata=metadata),score))returndocs_and_scores
[docs]defget_container(self)->ContainerProxy:"""Gets the container for the vector store."""returnself._container
[docs]defas_retriever(self,**kwargs:Any)->AzureCosmosDBNoSqlVectorStoreRetriever:"""Return AzureCosmosDBNoSqlVectorStoreRetriever initialized from this VectorStore. Args: search_type (Optional[str]): Overrides the type of search that the Retriever should perform. Defaults to `self._search_type`. Can be "vector", "hybrid", "full_text_ranking", "full_text_search". search_kwargs (Optional[Dict]): Keyword arguments to pass to the search function. Can include things like: score_threshold: Minimum relevance threshold for similarity_score_threshold fetch_k: Amount of documents to pass to MMR algorithm (Default: 20) lambda_mult: Diversity of results returned by MMR; 1 for minimum diversity and 0 for maximum. (Default: 0.5) filter: Filter by document metadata **kwargs: Additional keyword arguments to pass to the Returns: AzureCosmosDBNoSqlVectorStoreRetriever: Retriever class for VectorStore. """# noqa:E501search_type=kwargs.get("search_type",self._search_type)kwargs["search_type"]=search_typetags=kwargs.pop("tags",None)or[]tags.extend(self._get_retriever_tags())returnAzureCosmosDBNoSqlVectorStoreRetriever(vectorstore=self,**kwargs,tags=tags)
[docs]classAzureCosmosDBNoSqlVectorStoreRetriever(VectorStoreRetriever):"""Retriever that uses `Azure CosmosDB No Sql Search`."""vectorstore:AzureCosmosDBNoSqlVectorSearch"""Azure Search instance used to find similar documents."""search_type:str="vector""""Type of search to perform. Options are "vector", "hybrid", "full_text_ranking", "full_text_search"."""k:int=4"""Number of documents to return."""search_kwargs:dict={}"""Search params. score_threshold: Minimum relevance threshold for similarity_score_threshold fetch_k: Amount of documents to pass to MMR algorithm (Default: 20) lambda_mult: Diversity of results returned by MMR; 1 for minimum diversity and 0 for maximum. (Default: 0.5) filter: Filter by document metadata """allowed_search_types:ClassVar[Collection[str]]=("vector","hybrid","full_text_ranking","full_text_search",)model_config=ConfigDict(arbitrary_types_allowed=True,)@model_validator(mode="before")@classmethoddefvalidate_search_type(cls,values:Dict)->Any:"""Validate search type."""if"search_type"invalues:search_type=values["search_type"]ifsearch_typenotincls.allowed_search_types:raiseValueError(f"search_type of {search_type} not allowed. Valid values are: "f"{cls.allowed_search_types}")returnvaluesdef_get_relevant_documents(self,query:str,run_manager:CallbackManagerForRetrieverRun,**kwargs:Any,)->List[Document]:params={**self.search_kwargs,**kwargs}ifself.search_type=="vector":docs=self.vectorstore.similarity_search(query,k=self.k,**params)elifself.search_type=="hybrid":docs=self.vectorstore.similarity_search(query,k=self.k,search_type="hybrid",**params)elifself.search_type=="full_text_ranking":docs=self.vectorstore.similarity_search(query,k=self.k,search_type="full_text_ranking",**params)elifself.search_type=="full_text_search":docs=self.vectorstore.similarity_search(query,k=self.k,search_type="full_text_search",**params)else:raiseValueError(f"Query type of {self.search_type} is not allowed.")returndocs