fromtypingimport(# type: ignore[import-not-found, import-not-found]Any,Dict,List,Literal,Optional,Union,)fromexa_pyimportExa# type: ignorefromexa_py.apiimportHighlightsContentsOptions,TextContentsOptions# type: ignorefromlangchain_core.callbacksimportCallbackManagerForRetrieverRunfromlangchain_core.documentsimportDocumentfromlangchain_core.pydantic_v1importField,SecretStr,root_validatorfromlangchain_core.retrieversimportBaseRetrieverfromlangchain_exa._utilitiesimportinitialize_clientdef_get_metadata(result:Any)->Dict[str,Any]:"""Get the metadata from a result object."""metadata={"title":result.title,"url":result.url,"id":result.id,"score":result.score,"published_date":result.published_date,"author":result.author,}ifgetattr(result,"highlights"):metadata["highlights"]=result.highlightsifgetattr(result,"highlight_scores"):metadata["highlight_scores"]=result.highlight_scoresreturnmetadata
[docs]classExaSearchRetriever(BaseRetriever):"""Exa Search retriever."""k:int=10# num_results"""The number of search results to return."""include_domains:Optional[List[str]]=None"""A list of domains to include in the search."""exclude_domains:Optional[List[str]]=None"""A list of domains to exclude from the search."""start_crawl_date:Optional[str]=None"""The start date for the crawl (in YYYY-MM-DD format)."""end_crawl_date:Optional[str]=None"""The end date for the crawl (in YYYY-MM-DD format)."""start_published_date:Optional[str]=None"""The start date for when the document was published (in YYYY-MM-DD format)."""end_published_date:Optional[str]=None"""The end date for when the document was published (in YYYY-MM-DD format)."""use_autoprompt:Optional[bool]=None"""Whether to use autoprompt for the search."""type:str="neural""""The type of search, 'keyword' or 'neural'. Default: neural"""highlights:Optional[Union[HighlightsContentsOptions,bool]]=None"""Whether to set the page content to the highlights of the results."""text_contents_options:Union[TextContentsOptions,Literal[True]]=True"""How to set the page content of the results"""client:Exa=Field(default=None)exa_api_key:SecretStr=Field(default=None)exa_base_url:Optional[str]=None@root_validator(pre=True)defvalidate_environment(cls,values:Dict)->Dict:"""Validate the environment."""values=initialize_client(values)returnvaluesdef_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:response=self.client.search_and_contents(# type: ignore[misc]query,num_results=self.k,text=self.text_contents_options,highlights=self.highlights,# type: ignoreinclude_domains=self.include_domains,exclude_domains=self.exclude_domains,start_crawl_date=self.start_crawl_date,end_crawl_date=self.end_crawl_date,start_published_date=self.start_published_date,end_published_date=self.end_published_date,use_autoprompt=self.use_autoprompt,)results=response.resultsreturn[Document(page_content=(result.text),metadata=_get_metadata(result),)forresultinresults]