[docs]classTelegramChatLoader(BaseChatLoader):"""Load `telegram` conversations to LangChain chat messages. To export, use the Telegram Desktop app from https://desktop.telegram.org/, select a conversation, click the three dots in the top right corner, and select "Export chat history". Then select "Machine-readable JSON" (preferred) to export. Note: the 'lite' versions of the desktop app (like "Telegram for MacOS") do not support exporting chat history. """
[docs]def__init__(self,path:Union[str,Path],):"""Initialize the TelegramChatLoader. Args: path (Union[str, Path]): Path to the exported Telegram chat zip, directory, json, or HTML file. """self.path=pathifisinstance(path,str)elsestr(path)
@staticmethoddef_load_single_chat_session_html(file_path:str)->ChatSession:"""Load a single chat session from an HTML file. Args: file_path (str): Path to the HTML file. Returns: ChatSession: The loaded chat session. """try:frombs4importBeautifulSoupexceptImportError:raiseImportError("Please install the 'beautifulsoup4' package to load"" Telegram HTML files. You can do this by running""'pip install beautifulsoup4' in your terminal.")withopen(file_path,"r",encoding="utf-8")asfile:soup=BeautifulSoup(file,"html.parser")results:List[Union[HumanMessage,AIMessage]]=[]previous_sender=Noneformessageinsoup.select(".message.default"):timestamp=message.select_one(".pull_right.date.details")["title"]from_name_element=message.select_one(".from_name")iffrom_name_elementisNoneandprevious_senderisNone:logger.debug("from_name not found in message")continueeliffrom_name_elementisNone:from_name=previous_senderelse:from_name=from_name_element.text.strip()text=message.select_one(".text").text.strip()results.append(HumanMessage(content=text,additional_kwargs={"sender":from_name,"events":[{"message_time":timestamp}],},))previous_sender=from_namereturnChatSession(messages=results)@staticmethoddef_load_single_chat_session_json(file_path:str)->ChatSession:"""Load a single chat session from a JSON file. Args: file_path (str): Path to the JSON file. Returns: ChatSession: The loaded chat session. """withopen(file_path,"r",encoding="utf-8")asfile:data=json.load(file)messages=data.get("messages",[])results:List[BaseMessage]=[]formessageinmessages:text=message.get("text","")timestamp=message.get("date","")from_name=message.get("from","")results.append(HumanMessage(content=text,additional_kwargs={"sender":from_name,"events":[{"message_time":timestamp}],},))returnChatSession(messages=results)@staticmethoddef_iterate_files(path:str)->Iterator[str]:"""Iterate over files in a directory or zip file. Args: path (str): Path to the directory or zip file. Yields: str: Path to each file. """ifos.path.isfile(path)andpath.endswith((".html",".json")):yieldpathelifos.path.isdir(path):forroot,_,filesinos.walk(path):forfileinfiles:iffile.endswith((".html",".json")):yieldos.path.join(root,file)elifzipfile.is_zipfile(path):withzipfile.ZipFile(path)aszip_file:forfileinzip_file.namelist():iffile.endswith((".html",".json")):withtempfile.TemporaryDirectory()astemp_dir:yieldzip_file.extract(file,path=temp_dir)
[docs]deflazy_load(self)->Iterator[ChatSession]:"""Lazy load the messages from the chat file and yield them in as chat sessions. Yields: ChatSession: The loaded chat session. """forfile_pathinself._iterate_files(self.path):iffile_path.endswith(".html"):yieldself._load_single_chat_session_html(file_path)eliffile_path.endswith(".json"):yieldself._load_single_chat_session_json(file_path)