Source code for langchain_community.document_loaders.reddit
from__future__importannotationsfromtypingimportTYPE_CHECKING,Iterable,List,Optional,Sequencefromlangchain_core.documentsimportDocumentfromlangchain_community.document_loaders.baseimportBaseLoaderifTYPE_CHECKING:importprawdef_dependable_praw_import()->praw:try:importprawexceptImportError:raiseImportError("praw package not found, please install it with `pip install praw`")returnpraw
[docs]classRedditPostsLoader(BaseLoader):"""Load `Reddit` posts. Read posts on a subreddit. First, you need to go to https://www.reddit.com/prefs/apps/ and create your application """
[docs]def__init__(self,client_id:str,client_secret:str,user_agent:str,search_queries:Sequence[str],mode:str,categories:Sequence[str]=["new"],number_posts:Optional[int]=10,):""" Initialize with client_id, client_secret, user_agent, search_queries, mode, categories, number_posts. Example: https://www.reddit.com/r/learnpython/ Args: client_id: Reddit client id. client_secret: Reddit client secret. user_agent: Reddit user agent. search_queries: The search queries. mode: The mode. categories: The categories. Default: ["new"] number_posts: The number of posts. Default: 10 """self.client_id=client_idself.client_secret=client_secretself.user_agent=user_agentself.search_queries=search_queriesself.mode=modeself.categories=categoriesself.number_posts=number_posts
[docs]defload(self)->List[Document]:"""Load reddits."""praw=_dependable_praw_import()reddit=praw.Reddit(client_id=self.client_id,client_secret=self.client_secret,user_agent=self.user_agent,)results:List[Document]=[]ifself.mode=="subreddit":forsearch_queryinself.search_queries:forcategoryinself.categories:docs=self._subreddit_posts_loader(search_query=search_query,category=category,reddit=reddit)results.extend(docs)elifself.mode=="username":forsearch_queryinself.search_queries:forcategoryinself.categories:docs=self._user_posts_loader(search_query=search_query,category=category,reddit=reddit)results.extend(docs)else:raiseValueError("mode not correct, please enter 'username' or 'subreddit' as mode")returnresults
def_subreddit_posts_loader(self,search_query:str,category:str,reddit:praw.reddit.Reddit)->Iterable[Document]:subreddit=reddit.subreddit(search_query)method=getattr(subreddit,category)cat_posts=method(limit=self.number_posts)"""Format reddit posts into a string."""forpostincat_posts:metadata={"post_subreddit":post.subreddit_name_prefixed,"post_category":category,"post_title":post.title,"post_score":post.score,"post_id":post.id,"post_url":post.url,"post_author":post.author,}yieldDocument(page_content=post.selftext,metadata=metadata,)def_user_posts_loader(self,search_query:str,category:str,reddit:praw.reddit.Reddit)->Iterable[Document]:user=reddit.redditor(search_query)method=getattr(user.submissions,category)cat_posts=method(limit=self.number_posts)"""Format reddit posts into a string."""forpostincat_posts:metadata={"post_subreddit":post.subreddit_name_prefixed,"post_category":category,"post_title":post.title,"post_score":post.score,"post_id":post.id,"post_url":post.url,"post_author":post.author,}yieldDocument(page_content=post.selftext,metadata=metadata,)