[docs]classIFixitLoader(BaseLoader):"""Load `iFixit` repair guides, device wikis and answers. iFixit is the largest, open repair community on the web. The site contains nearly 100k repair manuals, 200k Questions & Answers on 42k devices, and all the data is licensed under CC-BY. This loader will allow you to download the text of a repair guide, text of Q&A's and wikis from devices on iFixit using their open APIs and web scraping. """
[docs]def__init__(self,web_path:str):"""Initialize with a web path."""ifnotweb_path.startswith("https://www.ifixit.com"):raiseValueError("web path must start with 'https://www.ifixit.com'")path=web_path.replace("https://www.ifixit.com","")allowed_paths=["/Device","/Guide","/Answers","/Teardown"]""" TODO: Add /Wiki """ifnotany(path.startswith(allowed_path)forallowed_pathinallowed_paths):raiseValueError("web path must start with /Device, /Guide, /Teardown or /Answers")pieces=[xforxinpath.split("/")ifx]"""Teardowns are just guides by a different name"""self.page_type=pieces[0]ifpieces[0]!="Teardown"else"Guide"ifself.page_type=="Guide"orself.page_type=="Answers":self.id=pieces[2]else:self.id=pieces[1]self.web_path=web_path
[docs]@staticmethoddefload_suggestions(query:str="",doc_type:str="all")->List[Document]:"""Load suggestions. Args: query: A query string doc_type: The type of document to search for. Can be one of "all", "device", "guide", "teardown", "answer", "wiki". Returns: """res=requests.get(IFIXIT_BASE_URL+"/suggest/"+query+"?doctypes="+doc_type)ifres.status_code!=200:raiseValueError('Could not load suggestions for "'+query+'"\n'+res.json())data=res.json()results=data["results"]output=[]forresultinresults:try:loader=IFixitLoader(result["url"])ifloader.page_type=="Device":output+=loader.load_device(include_guides=False)else:output+=loader.load()exceptValueError:continuereturnoutput
[docs]defload_questions_and_answers(self,url_override:Optional[str]=None)->List[Document]:"""Load a list of questions and answers. Args: url_override: A URL to override the default URL. Returns: List[Document] """loader=WebBaseLoader(self.web_pathifurl_overrideisNoneelseurl_override)soup=loader.scrape()output=[]title=soup.find("h1","post-title").textoutput.append("# "+title)output.append(soup.select_one(".post-content .post-text").text.strip())answersHeader=soup.find("div","post-answers-header")ifanswersHeader:output.append("\n## "+answersHeader.text.strip())foranswerinsoup.select(".js-answers-list .post.post-answer"):ifanswer.has_attr("itemprop")and"acceptedAnswer"inanswer["itemprop"]:output.append("\n### Accepted Answer")elif"post-helpful"inanswer["class"]:output.append("\n### Most Helpful Answer")else:output.append("\n### Other Answer")output+=[a.text.strip()forainanswer.select(".post-content .post-text")]output.append("\n")text="\n".join(output).strip()metadata={"source":self.web_path,"title":title}return[Document(page_content=text,metadata=metadata)]
[docs]defload_device(self,url_override:Optional[str]=None,include_guides:bool=True)->List[Document]:"""Loads a device Args: url_override: A URL to override the default URL. include_guides: Whether to include guides linked to from the device. Defaults to True. Returns: """documents=[]ifurl_overrideisNone:url=IFIXIT_BASE_URL+"/wikis/CATEGORY/"+self.idelse:url=url_overrideres=requests.get(url)data=res.json()text="\n".join([data[key]forkeyin["title","description","contents_raw"]ifkeyindata]).strip()metadata={"source":self.web_path,"title":data["title"]}documents.append(Document(page_content=text,metadata=metadata))ifinclude_guides:"""Load and return documents for each guide linked to from the device"""guide_urls=[guide["url"]forguideindata["guides"]]forguide_urlinguide_urls:documents.append(IFixitLoader(guide_url).load()[0])returndocuments
[docs]defload_guide(self,url_override:Optional[str]=None)->List[Document]:"""Load a guide Args: url_override: A URL to override the default URL. Returns: List[Document] """ifurl_overrideisNone:url=IFIXIT_BASE_URL+"/guides/"+self.idelse:url=url_overrideres=requests.get(url)ifres.status_code!=200:raiseValueError("Could not load guide: "+self.web_path+"\n"+res.json())data=res.json()doc_parts=["# "+data["title"],data["introduction_raw"]]doc_parts.append("\n\n###Tools Required:")iflen(data["tools"])==0:doc_parts.append("\n - None")else:fortoolindata["tools"]:doc_parts.append("\n - "+tool["text"])doc_parts.append("\n\n###Parts Required:")iflen(data["parts"])==0:doc_parts.append("\n - None")else:forpartindata["parts"]:doc_parts.append("\n - "+part["text"])forrowindata["steps"]:doc_parts.append("\n\n## "+(row["title"]ifrow["title"]!=""else"Step {}".format(row["orderby"])))forlineinrow["lines"]:doc_parts.append(line["text_raw"])doc_parts.append(data["conclusion_raw"])text="\n".join(doc_parts)metadata={"source":self.web_path,"title":data["title"]}return[Document(page_content=text,metadata=metadata)]