Source code for langchain_community.document_loaders.tomarkdown

from __future__ import annotations

from typing import Iterator

import requests
from langchain_core.documents import Document

from langchain_community.document_loaders.base import BaseLoader


[docs]class ToMarkdownLoader(BaseLoader): """Load `HTML` using `2markdown API`."""
[docs] def __init__(self, url: str, api_key: str): """Initialize with url and api key.""" self.url = url self.api_key = api_key
[docs] def lazy_load( self, ) -> Iterator[Document]: """Lazily load the file.""" response = requests.post( "https://api.2markdown.com/v1/url2md", headers={"X-Api-Key": self.api_key}, json={"url": self.url}, ) text = response.json()["article"] metadata = {"source": self.url} yield Document(page_content=text, metadata=metadata)