Source code for langchain_community.retrievers.azure_ai_search
from__future__importannotationsimportjsonfromtypingimportAny,Dict,List,Optionalimportaiohttpimportrequestsfromlangchain_core.callbacksimport(AsyncCallbackManagerForRetrieverRun,CallbackManagerForRetrieverRun,)fromlangchain_core.documentsimportDocumentfromlangchain_core.retrieversimportBaseRetrieverfromlangchain_core.utilsimportget_from_dict_or_env,get_from_envfrompydanticimportConfigDict,model_validatorDEFAULT_URL_SUFFIX="search.windows.net""""Default URL Suffix for endpoint connection - commercial cloud"""
[docs]classAzureAISearchRetriever(BaseRetriever):"""`Azure AI Search` service retriever. Setup: See here for more detail: https://python.langchain.com/docs/integrations/retrievers/azure_ai_search/ We will need to install the below dependencies and set the required environment variables: .. code-block:: bash pip install -U langchain-community azure-identity azure-search-documents export AZURE_AI_SEARCH_SERVICE_NAME="<YOUR_SEARCH_SERVICE_NAME>" export AZURE_AI_SEARCH_INDEX_NAME="<YOUR_SEARCH_INDEX_NAME>" export AZURE_AI_SEARCH_API_KEY="<YOUR_API_KEY>" or export AZURE_AI_SEARCH_BEARER_TOKEN="<YOUR_BEARER_TOKEN>" Key init args: content_key: str top_k: int index_name: str Instantiate: .. code-block:: python from langchain_community.retrievers import AzureAISearchRetriever retriever = AzureAISearchRetriever( content_key="content", top_k=1, index_name="langchain-vector-demo" ) Usage: .. code-block:: python retriever.invoke("here is my unstructured query string") Use within a chain: .. code-block:: python from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_openai import AzureChatOpenAI prompt = ChatPromptTemplate.from_template( \"\"\"Answer the question based only on the context provided. Context: {context} Question: {question}\"\"\" ) llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo") def format_docs(docs): return "\\n\\n".join(doc.page_content for doc in docs) chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) chain.invoke("...") """# noqa: E501service_name:str="""""Name of Azure AI Search service"""index_name:str="""""Name of Index inside Azure AI Search service"""api_key:str="""""API Key. Both Admin and Query keys work, but for reading data it's recommended to use a Query key."""api_version:str="2023-11-01""""API version"""aiosession:Optional[aiohttp.ClientSession]=None"""ClientSession, in case we want to reuse connection for better performance."""azure_ad_token:str="""""Your Azure Active Directory token. Automatically inferred from env var `AZURE_AI_SEARCH_AD_TOKEN` if not provided. For more: https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id. """content_key:str="content""""Key in a retrieved result to set as the Document page_content."""top_k:Optional[int]=None"""Number of results to retrieve. Set to None to retrieve all results."""filter:Optional[str]=None"""OData $filter expression to apply to the search query."""model_config=ConfigDict(arbitrary_types_allowed=True,extra="forbid",)@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validate that service name, index name and api key exists in environment."""values["service_name"]=get_from_dict_or_env(values,"service_name","AZURE_AI_SEARCH_SERVICE_NAME")values["index_name"]=get_from_dict_or_env(values,"index_name","AZURE_AI_SEARCH_INDEX_NAME")values["azure_ad_token"]=get_from_dict_or_env(values,"azure_ad_token","AZURE_AI_SEARCH_AD_TOKEN",default="")values["api_key"]=get_from_dict_or_env(values,"api_key","AZURE_AI_SEARCH_API_KEY",default="")ifvalues["azure_ad_token"]==""andvalues["api_key"]=="":raiseValueError("Missing credentials. Please pass one of `api_key`, `azure_ad_token`, ""or the `AZURE_AI_SEARCH_API_KEY` or `AZURE_AI_SEARCH_AD_TOKEN` ""environment variables.")returnvaluesdef_build_search_url(self,query:str)->str:url_suffix=get_from_env("","AZURE_AI_SEARCH_URL_SUFFIX",DEFAULT_URL_SUFFIX)ifurl_suffixinself.service_nameand"https://"inself.service_name:base_url=f"{self.service_name}/"elifurl_suffixinself.service_nameand"https://"notinself.service_name:base_url=f"https://{self.service_name}/"elifurl_suffixnotinself.service_nameand"https://"inself.service_name:base_url=f"{self.service_name}.{url_suffix}/"elif(url_suffixnotinself.service_nameand"https://"notinself.service_name):base_url=f"https://{self.service_name}.{url_suffix}/"else:# pass to Azure to throw a specific errorbase_url=self.service_nameendpoint_path=f"indexes/{self.index_name}/docs?api-version={self.api_version}"top_param=f"&$top={self.top_k}"ifself.top_kelse""filter_param=f"&$filter={self.filter}"ifself.filterelse""returnbase_url+endpoint_path+f"&search={query}"+top_param+filter_param@propertydef_headers(self)->Dict[str,str]:headers={"Content-Type":"application/json",}ifself.azure_ad_token:headers["Authorization"]=f"Bearer {self.azure_ad_token}"elifself.api_key:headers["api-key"]=f"{self.api_key}"returnheadersdef_search(self,query:str)->List[dict]:search_url=self._build_search_url(query)response=requests.get(search_url,headers=self._headers)ifresponse.status_code!=200:raiseException(f"Error in search request: {response}")returnjson.loads(response.text)["value"]asyncdef_asearch(self,query:str)->List[dict]:search_url=self._build_search_url(query)ifnotself.aiosession:asyncwithaiohttp.ClientSession()assession:asyncwithsession.get(search_url,headers=self._headers)asresponse:response_json=awaitresponse.json()else:asyncwithself.aiosession.get(search_url,headers=self._headers)asresponse:response_json=awaitresponse.json()returnresponse_json["value"]def_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:search_results=self._search(query)return[Document(page_content=result.pop(self.content_key),metadata=result)forresultinsearch_results]asyncdef_aget_relevant_documents(self,query:str,*,run_manager:AsyncCallbackManagerForRetrieverRun)->List[Document]:search_results=awaitself._asearch(query)return[Document(page_content=result.pop(self.content_key),metadata=result)forresultinsearch_results]
# For backwards compatibility
[docs]classAzureCognitiveSearchRetriever(AzureAISearchRetriever):"""`Azure Cognitive Search` service retriever. This version of the retriever will soon be depreciated. Please switch to AzureAISearchRetriever """