[docs]classMsWordParser(BaseBlobParser):"""Parse the Microsoft Word documents from a blob."""
[docs]deflazy_parse(self,blob:Blob)->Iterator[Document]:"""Parse a Microsoft Word document into the Document iterator. Args: blob: The blob to parse. Returns: An iterator of Documents. """try:fromunstructured.partition.docimportpartition_docfromunstructured.partition.docximportpartition_docxexceptImportErrorase:raiseImportError("Could not import unstructured, please install with `pip install ""unstructured`.")fromemime_type_parser={"application/msword":partition_doc,"application/vnd.openxmlformats-officedocument.wordprocessingml.document":(partition_docx),}ifblob.mimetypenotin("application/msword","application/vnd.openxmlformats-officedocument.wordprocessingml.document",):raiseValueError("This blob type is not supported for this parser.")withblob.as_bytes_io()asword_document:elements=mime_type_parser[blob.mimetype](file=word_document)text="\n\n".join([str(el)forelinelements])metadata={"source":blob.source}yieldDocument(page_content=text,metadata=metadata)