Source code for langchain_community.retrievers.azure_ai_search
from__future__importannotationsimportjsonfromtypingimportAny,Dict,List,Optionalimportaiohttpimportrequestsfromlangchain_core.callbacksimport(AsyncCallbackManagerForRetrieverRun,CallbackManagerForRetrieverRun,)fromlangchain_core.documentsimportDocumentfromlangchain_core.retrieversimportBaseRetrieverfromlangchain_core.utilsimportget_from_dict_or_env,get_from_envfrompydanticimportConfigDict,model_validatorDEFAULT_URL_SUFFIX="search.windows.net""""Default URL Suffix for endpoint connection - commercial cloud"""
[docs]classAzureAISearchRetriever(BaseRetriever):"""`Azure AI Search` service retriever. Setup: See here for more detail: https://python.langchain.com/docs/integrations/retrievers/azure_ai_search/ We will need to install the below dependencies and set the required environment variables: .. code-block:: bash pip install -U langchain-community azure-identity azure-search-documents export AZURE_AI_SEARCH_SERVICE_NAME="<YOUR_SEARCH_SERVICE_NAME>" export AZURE_AI_SEARCH_INDEX_NAME="<YOUR_SEARCH_INDEX_NAME>" export AZURE_AI_SEARCH_API_KEY="<YOUR_API_KEY>" Key init args: content_key: str top_k: int index_name: str Instantiate: .. code-block:: python from langchain_community.retrievers import AzureAISearchRetriever retriever = AzureAISearchRetriever( content_key="content", top_k=1, index_name="langchain-vector-demo" ) Usage: .. code-block:: python retriever.invoke("here is my unstructured query string") Use within a chain: .. code-block:: python from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_openai import AzureChatOpenAI prompt = ChatPromptTemplate.from_template( \"\"\"Answer the question based only on the context provided. Context: {context} Question: {question}\"\"\" ) llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo") def format_docs(docs): return "\\n\\n".join(doc.page_content for doc in docs) chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) chain.invoke("...") """# noqa: E501service_name:str="""""Name of Azure AI Search service"""index_name:str="""""Name of Index inside Azure AI Search service"""api_key:str="""""API Key. Both Admin and Query keys work, but for reading data it's recommended to use a Query key."""api_version:str="2023-11-01""""API version"""aiosession:Optional[aiohttp.ClientSession]=None"""ClientSession, in case we want to reuse connection for better performance."""content_key:str="content""""Key in a retrieved result to set as the Document page_content."""top_k:Optional[int]=None"""Number of results to retrieve. Set to None to retrieve all results."""filter:Optional[str]=None"""OData $filter expression to apply to the search query."""model_config=ConfigDict(arbitrary_types_allowed=True,extra="forbid",)@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validate that service name, index name and api key exists in environment."""values["service_name"]=get_from_dict_or_env(values,"service_name","AZURE_AI_SEARCH_SERVICE_NAME")values["index_name"]=get_from_dict_or_env(values,"index_name","AZURE_AI_SEARCH_INDEX_NAME")values["api_key"]=get_from_dict_or_env(values,"api_key","AZURE_AI_SEARCH_API_KEY")returnvaluesdef_build_search_url(self,query:str)->str:url_suffix=get_from_env("","AZURE_AI_SEARCH_URL_SUFFIX",DEFAULT_URL_SUFFIX)ifurl_suffixinself.service_nameand"https://"inself.service_name:base_url=f"{self.service_name}/"elifurl_suffixinself.service_nameand"https://"notinself.service_name:base_url=f"https://{self.service_name}/"elifurl_suffixnotinself.service_nameand"https://"inself.service_name:base_url=f"{self.service_name}.{url_suffix}/"elif(url_suffixnotinself.service_nameand"https://"notinself.service_name):base_url=f"https://{self.service_name}.{url_suffix}/"else:# pass to Azure to throw a specific errorbase_url=self.service_nameendpoint_path=f"indexes/{self.index_name}/docs?api-version={self.api_version}"top_param=f"&$top={self.top_k}"ifself.top_kelse""filter_param=f"&$filter={self.filter}"ifself.filterelse""returnbase_url+endpoint_path+f"&search={query}"+top_param+filter_param@propertydef_headers(self)->Dict[str,str]:return{"Content-Type":"application/json","api-key":self.api_key,}def_search(self,query:str)->List[dict]:search_url=self._build_search_url(query)response=requests.get(search_url,headers=self._headers)ifresponse.status_code!=200:raiseException(f"Error in search request: {response}")returnjson.loads(response.text)["value"]asyncdef_asearch(self,query:str)->List[dict]:search_url=self._build_search_url(query)ifnotself.aiosession:asyncwithaiohttp.ClientSession()assession:asyncwithsession.get(search_url,headers=self._headers)asresponse:response_json=awaitresponse.json()else:asyncwithself.aiosession.get(search_url,headers=self._headers)asresponse:response_json=awaitresponse.json()returnresponse_json["value"]def_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:search_results=self._search(query)return[Document(page_content=result.pop(self.content_key),metadata=result)forresultinsearch_results]asyncdef_aget_relevant_documents(self,query:str,*,run_manager:AsyncCallbackManagerForRetrieverRun)->List[Document]:search_results=awaitself._asearch(query)return[Document(page_content=result.pop(self.content_key),metadata=result)forresultinsearch_results]
# For backwards compatibility
[docs]classAzureCognitiveSearchRetriever(AzureAISearchRetriever):"""`Azure Cognitive Search` service retriever. This version of the retriever will soon be depreciated. Please switch to AzureAISearchRetriever """