[docs]classWhatsAppChatLoader(BaseChatLoader):"""Load `WhatsApp` conversations from a dump zip file or directory."""
[docs]def__init__(self,path:str):"""Initialize the WhatsAppChatLoader. Args: path (str): Path to the exported WhatsApp chat zip directory, folder, or file. To generate the dump, open the chat, click the three dots in the top right corner, and select "More". Then select "Export chat" and choose "Without media". """self.path=pathignore_lines=["This message was deleted","<Media omitted>","image omitted","Messages and calls are end-to-end encrypted. No one outside of this chat,"" not even WhatsApp, can read or listen to them.",]self._ignore_lines=re.compile(r"("+"|".join([r"\u200E*"+lineforlineinignore_lines])+r")",flags=re.IGNORECASE,)self._message_line_regex=re.compile(r"\u200E*\[?(\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2}:\d{2} (?:AM|PM))\]?[ \u200E]*([^:]+): (.+)",# noqaflags=re.IGNORECASE,)
def_load_single_chat_session(self,file_path:str)->ChatSession:"""Load a single chat session from a file. Args: file_path (str): Path to the chat file. Returns: ChatSession: The loaded chat session. """withopen(file_path,"r",encoding="utf-8")asfile:txt=file.read()# Split messages by newlines, but keep multi-line messages groupedchat_lines:List[str]=[]current_message=""forlineintxt.split("\n"):ifself._message_line_regex.match(line):ifcurrent_message:chat_lines.append(current_message)current_message=lineelse:current_message+=" "+line.strip()ifcurrent_message:chat_lines.append(current_message)results:List[Union[HumanMessage,AIMessage]]=[]forlineinchat_lines:result=self._message_line_regex.match(line.strip())ifresult:timestamp,sender,text=result.groups()ifnotself._ignore_lines.match(text.strip()):results.append(HumanMessage(role=sender,content=text,additional_kwargs={"sender":sender,"events":[{"message_time":timestamp}],},))else:logger.debug(f"Could not parse line: {line}")returnChatSession(messages=results)@staticmethoddef_iterate_files(path:str)->Iterator[str]:"""Iterate over the files in a directory or zip file. Args: path (str): Path to the directory or zip file. Yields: str: The path to each file. """ifos.path.isfile(path):yieldpathelifos.path.isdir(path):forroot,_,filesinos.walk(path):forfileinfiles:iffile.endswith(".txt"):yieldos.path.join(root,file)elifzipfile.is_zipfile(path):withzipfile.ZipFile(path)aszip_file:forfileinzip_file.namelist():iffile.endswith(".txt"):yieldzip_file.extract(file)
[docs]deflazy_load(self)->Iterator[ChatSession]:"""Lazy load the messages from the chat file and yield them as chat sessions. Yields: Iterator[ChatSession]: The loaded chat sessions. """yieldself._load_single_chat_session(self.path)