Source code for langchain_community.document_loaders.mastodon
from__future__importannotationsimportosfromtypingimport(TYPE_CHECKING,Any,Dict,Iterable,Iterator,List,Optional,Sequence,)fromlangchain_core.documentsimportDocumentfromlangchain_community.document_loaders.baseimportBaseLoaderifTYPE_CHECKING:importmastodondef_dependable_mastodon_import()->mastodon:try:importmastodonexceptImportError:raiseImportError("Mastodon.py package not found, ""please install it with `pip install Mastodon.py`")returnmastodon
[docs]classMastodonTootsLoader(BaseLoader):"""Load the `Mastodon` 'toots'."""
[docs]def__init__(self,mastodon_accounts:Sequence[str],number_toots:Optional[int]=100,exclude_replies:bool=False,access_token:Optional[str]=None,api_base_url:str="https://mastodon.social",):"""Instantiate Mastodon toots loader. Args: mastodon_accounts: The list of Mastodon accounts to query. number_toots: How many toots to pull for each account. Defaults to 100. exclude_replies: Whether to exclude reply toots from the load. Defaults to False. access_token: An access token if toots are loaded as a Mastodon app. Can also be specified via the environment variables "MASTODON_ACCESS_TOKEN". api_base_url: A Mastodon API base URL to talk to, if not using the default. Defaults to "https://mastodon.social". """mastodon=_dependable_mastodon_import()access_token=access_tokenoros.environ.get("MASTODON_ACCESS_TOKEN")self.api=mastodon.Mastodon(access_token=access_token,api_base_url=api_base_url)self.mastodon_accounts=mastodon_accountsself.number_toots=number_tootsself.exclude_replies=exclude_replies
[docs]deflazy_load(self)->Iterator[Document]:"""Load toots into documents."""foraccountinself.mastodon_accounts:user=self.api.account_lookup(account)toots=self.api.account_statuses(user.id,only_media=False,pinned=False,exclude_replies=self.exclude_replies,exclude_reblogs=True,limit=self.number_toots,)yield fromself._format_toots(toots,user)
def_format_toots(self,toots:List[Dict[str,Any]],user_info:dict)->Iterable[Document]:"""Format toots into documents. Adding user info, and selected toot fields into the metadata. """fortootintoots:metadata={"created_at":toot["created_at"],"user_info":user_info,"is_reply":toot["in_reply_to_id"]isnotNone,}yieldDocument(page_content=toot["content"],metadata=metadata,)