[docs]classPubMedAPIWrapper(BaseModel):""" Wrapper around PubMed API. This wrapper will use the PubMed API to conduct searches and fetch document summaries. By default, it will return the document summaries of the top-k results of an input search. Parameters: top_k_results: number of the top-scored document used for the PubMed tool MAX_QUERY_LENGTH: maximum length of the query. Default is 300 characters. doc_content_chars_max: maximum length of the document content. Content will be truncated if it exceeds this length. Default is 2000 characters. max_retry: maximum number of retries for a request. Default is 5. sleep_time: time to wait between retries. Default is 0.2 seconds. email: email address to be used for the PubMed API. api_key: API key to be used for the PubMed API. """parse:Any#: :meta private:base_url_esearch:str=("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?")base_url_efetch:str="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?"max_retry:int=5sleep_time:float=0.2# Default values for the parameterstop_k_results:int=3MAX_QUERY_LENGTH:int=300doc_content_chars_max:int=2000email:str="your_email@example.com"api_key:str=""@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validate that the python package exists in environment."""try:importxmltodictvalues["parse"]=xmltodict.parseexceptImportError:raiseImportError("Could not import xmltodict python package. ""Please install it with `pip install xmltodict`.")returnvalues
[docs]defrun(self,query:str)->str:""" Run PubMed search and get the article meta information. See https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch It uses only the most informative fields of article meta information. """try:# Retrieve the top-k results for the querydocs=[f"Published: {result['Published']}\n"f"Title: {result['Title']}\n"f"Copyright Information: {result['Copyright Information']}\n"f"Summary::\n{result['Summary']}"forresultinself.load(query[:self.MAX_QUERY_LENGTH])]# Join the results and limit the character countreturn("\n\n".join(docs)[:self.doc_content_chars_max]ifdocselse"No good PubMed Result was found")exceptExceptionasex:returnf"PubMed exception: {ex}"
[docs]deflazy_load(self,query:str)->Iterator[dict]:""" Search PubMed for documents matching the query. Return an iterator of dictionaries containing the document metadata. """url=(self.base_url_esearch+"db=pubmed&term="+str({urllib.parse.quote(query)})+f"&retmode=json&retmax={self.top_k_results}&usehistory=y")ifself.api_key!="":url+=f"&api_key={self.api_key}"result=urllib.request.urlopen(url)text=result.read().decode("utf-8")json_text=json.loads(text)webenv=json_text["esearchresult"]["webenv"]foruidinjson_text["esearchresult"]["idlist"]:yieldself.retrieve_article(uid,webenv)
[docs]defload(self,query:str)->List[dict]:""" Search PubMed for documents matching the query. Return a list of dictionaries containing the document metadata. """returnlist(self.lazy_load(query))
[docs]defretrieve_article(self,uid:str,webenv:str)->dict:url=(self.base_url_efetch+"db=pubmed&retmode=xml&id="+uid+"&webenv="+webenv)ifself.api_key!="":url+=f"&api_key={self.api_key}"retry=0whileTrue:try:result=urllib.request.urlopen(url)breakexcepturllib.error.HTTPErrorase:ife.code==429andretry<self.max_retry:# Too Many Requests errors# wait for an exponentially increasing amount of timeprint(# noqa: T201f"Too Many Requests, "f"waiting for {self.sleep_time:.2f} seconds...")time.sleep(self.sleep_time)self.sleep_time*=2retry+=1else:raiseexml_text=result.read().decode("utf-8")text_dict=self.parse(xml_text)returnself._parse_article(uid,text_dict)