[docs]classHtml2TextTransformer(BaseDocumentTransformer):"""Replace occurrences of a particular search pattern with a replacement string Arguments: ignore_links: Whether links should be ignored; defaults to True. ignore_images: Whether images should be ignored; defaults to True. Example: .. code-block:: python from langchain_community.document_transformers import Html2TextTransformer html2text = Html2TextTransformer() docs_transform = html2text.transform_documents(docs) """
[docs]deftransform_documents(self,documents:Sequence[Document],**kwargs:Any,)->Sequence[Document]:try:importhtml2textexceptImportError:raiseImportError("""html2text package not found, please install it with `pip install html2text`""")# Create a html2text.HTML2Text object and override some propertiesh=html2text.HTML2Text()h.ignore_links=self.ignore_linksh.ignore_images=self.ignore_imagesnew_documents=[]fordindocuments:new_document=Document(page_content=h.handle(d.page_content),metadata={**d.metadata})new_documents.append(new_document)returnnew_documents