Source code for langchain_community.utilities.wikidata
"""Util that calls Wikidata."""importloggingfromtypingimportAny,Dict,List,Optionalfromlangchain_core.documentsimportDocumentfromlangchain_core.pydantic_v1importBaseModel,root_validatorlogger=logging.getLogger(__name__)WIKIDATA_MAX_QUERY_LENGTH=300# Common properties you probably want to see filtered from https://www.wikidata.org/wiki/Wikidata:Database_reports/List_of_properties/allDEFAULT_PROPERTIES=["P31","P279","P27","P361","P527","P495","P17","P585","P131","P106","P21","P569","P570","P577","P50","P571","P641","P625","P19","P69","P108","P136","P39","P161","P20","P101","P179","P175","P7937","P57","P607","P509","P800","P449","P580","P582","P276","P69","P112","P740","P159","P452","P102","P1142","P1387","P1576","P140","P178","P287","P25","P22","P40","P185","P802","P1416",]DEFAULT_LANG_CODE="en"WIKIDATA_USER_AGENT="langchain-wikidata"WIKIDATA_API_URL="https://www.wikidata.org/w/api.php"WIKIDATA_REST_API_URL="https://www.wikidata.org/w/rest.php/wikibase/v0/"
[docs]classWikidataAPIWrapper(BaseModel):"""Wrapper around the Wikidata API. To use, you should have the ``wikibase-rest-api-client`` and ``mediawikiapi `` python packages installed. This wrapper will use the Wikibase APIs to conduct searches and fetch item content. By default, it will return the item content of the top-k results. It limits the Document content by doc_content_chars_max. """wikidata_mw:Any#: :meta private:wikidata_rest:Any# : :meta private:top_k_results:int=2load_all_available_meta:bool=Falsedoc_content_chars_max:int=4000wikidata_props:List[str]=DEFAULT_PROPERTIESlang:str=DEFAULT_LANG_CODE@root_validator(pre=True)defvalidate_environment(cls,values:Dict)->Dict:"""Validate that the python package exists in environment."""try:frommediawikiapiimportMediaWikiAPIfrommediawikiapi.configimportConfigvalues["wikidata_mw"]=MediaWikiAPI(Config(user_agent=WIKIDATA_USER_AGENT,mediawiki_url=WIKIDATA_API_URL))exceptImportError:raiseImportError("Could not import mediawikiapi python package. ""Please install it with `pip install mediawikiapi`.")try:fromwikibase_rest_api_clientimportClientclient=Client(timeout=60,base_url=WIKIDATA_REST_API_URL,headers={"User-Agent":WIKIDATA_USER_AGENT},follow_redirects=True,)values["wikidata_rest"]=clientexceptImportError:raiseImportError("Could not import wikibase_rest_api_client python package. ""Please install it with `pip install wikibase-rest-api-client`.")returnvaluesdef_item_to_document(self,qid:str)->Optional[Document]:fromwikibase_rest_api_client.utilities.fluentimportFluentWikibaseClientfluent_client:FluentWikibaseClient=FluentWikibaseClient(self.wikidata_rest,supported_props=self.wikidata_props,lang=self.lang)resp=fluent_client.get_item(qid)ifnotresp:logger.warning(f"Could not find item {qid} in Wikidata")returnNonedoc_lines=[]ifresp.label:doc_lines.append(f"Label: {resp.label}")ifresp.description:doc_lines.append(f"Description: {resp.description}")ifresp.aliases:doc_lines.append(f"Aliases: {', '.join(resp.aliases)}")forprop,valuesinresp.statements.items():ifvalues:doc_lines.append(f"{prop.label}: {', '.join(values)}")returnDocument(page_content=("\n".join(doc_lines))[:self.doc_content_chars_max],meta={"title":qid,"source":f"https://www.wikidata.org/wiki/{qid}"},)
[docs]defload(self,query:str)->List[Document]:""" Run Wikidata search and get the item documents plus the meta information. """clipped_query=query[:WIKIDATA_MAX_QUERY_LENGTH]items=self.wikidata_mw.search(clipped_query,results=self.top_k_results)docs=[]foriteminitems[:self.top_k_results]:ifdoc:=self._item_to_document(item):docs.append(doc)returndocs
[docs]defrun(self,query:str)->str:"""Run Wikidata search and get item summaries."""clipped_query=query[:WIKIDATA_MAX_QUERY_LENGTH]items=self.wikidata_mw.search(clipped_query,results=self.top_k_results)docs=[]foriteminitems[:self.top_k_results]:ifdoc:=self._item_to_document(item):docs.append(f"Result {item}:\n{doc.page_content}")ifnotdocs:return"No good Wikidata Search Result was found"return"\n\n".join(docs)[:self.doc_content_chars_max]