Source code for langchain_community.utilities.google_scholar
"""Util that calls Google Scholar Search."""fromtypingimportAny,Dict,Optionalfromlangchain_core.utilsimportget_from_dict_or_envfrompydanticimportBaseModel,ConfigDict,model_validator
[docs]classGoogleScholarAPIWrapper(BaseModel):"""Wrapper for Google Scholar API You can create serpapi key by signing up at: https://serpapi.com/users/sign_up. The wrapper uses the serpapi python package: https://serpapi.com/integrations/python#search-google-scholar To use, you should have the environment variable ``SERP_API_KEY`` set with your API key, or pass `serp_api_key` as a named parameter to the constructor. Attributes: top_k_results: number of results to return from google-scholar query search. By default it returns top 10 results. hl: attribute defines the language to use for the Google Scholar search. It's a two-letter language code. (e.g., en for English, es for Spanish, or fr for French). Head to the Google languages page for a full list of supported Google languages: https://serpapi.com/google-languages lr: attribute defines one or multiple languages to limit the search to. It uses lang_{two-letter language code} to specify languages and | as a delimiter. (e.g., lang_fr|lang_de will only search French and German pages). Head to the Google lr languages for a full list of supported languages: https://serpapi.com/google-lr-languages Example: .. code-block:: python from langchain_community.utilities import GoogleScholarAPIWrapper google_scholar = GoogleScholarAPIWrapper() google_scholar.run('langchain') """top_k_results:int=10hl:str="en"lr:str="lang_en"serp_api_key:Optional[str]=Nonegoogle_scholar_engine:Any=Nonemodel_config=ConfigDict(extra="forbid",)@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validate that api key and python package exists in environment."""serp_api_key=get_from_dict_or_env(values,"serp_api_key","SERP_API_KEY")values["serp_api_key"]=serp_api_keytry:fromserpapiimportGoogleScholarSearchexceptImportError:raiseImportError("google-search-results is not installed. ""Please install it with `pip install google-search-results"">=2.4.2`")GoogleScholarSearch.SERP_API_KEY=serp_api_keyvalues["google_scholar_engine"]=GoogleScholarSearchreturnvalues
[docs]defrun(self,query:str)->str:"""Run query through GoogleSearchScholar and parse result"""total_results=[]page=0whilepage<max((self.top_k_results-20),1):# We are getting 20 results from every page# which is the max in order to reduce the number of API CALLS.# 0 is the first page of results, 20 is the 2nd page of results,# 40 is the 3rd page of results, etc.results=(self.google_scholar_engine(# type: ignore{"q":query,"start":page,"hl":self.hl,"num":min(self.top_k_results,20),# if top_k_result is less than 20."lr":self.lr,}).get_dict().get("organic_results",[]))total_results.extend(results)ifnotresults:# No need to search for more pages if current page# has returned no resultsbreakpage+=20if(self.top_k_results%20!=0andpage>20andtotal_results):# From the last page we would only need top_k_results%20 results# if k is not divisible by 20.results=(self.google_scholar_engine(# type: ignore{"q":query,"start":page,"num":self.top_k_results%20,"hl":self.hl,"lr":self.lr,}).get_dict().get("organic_results",[]))total_results.extend(results)ifnottotal_results:return"No good Google Scholar Result was found"docs=[f"Title: {result.get('title','')}\n"f"Authors: {','.join([author.get('name')forauthorinresult.get('publication_info',{}).get('authors',[])])}\n"# noqa: E501f"Summary: {result.get('publication_info',{}).get('summary','')}\n"f"Total-Citations: {result.get('inline_links',{}).get('cited_by',{}).get('total','')}"# noqa: E501forresultintotal_results]return"\n\n".join(docs)