[docs]classAcreomLoader(BaseLoader):"""Load `acreom` vault from a directory."""FRONT_MATTER_REGEX:Pattern=re.compile(r"^---\n(.*?)\n---\n",re.MULTILINE|re.DOTALL)"""Regex to match front matter metadata in markdown files."""
[docs]def__init__(self,path:Union[str,Path],encoding:str="UTF-8",collect_metadata:bool=True,):"""Initialize the loader."""self.file_path=path"""Path to the directory containing the markdown files."""self.encoding=encoding"""Encoding to use when reading the files."""self.collect_metadata=collect_metadata"""Whether to collect metadata from the front matter."""
def_parse_front_matter(self,content:str)->dict:"""Parse front matter metadata from the content and return it as a dict."""ifnotself.collect_metadata:return{}match=self.FRONT_MATTER_REGEX.search(content)front_matter={}ifmatch:lines=match.group(1).split("\n")forlineinlines:if":"inline:key,value=line.split(":",1)front_matter[key.strip()]=value.strip()else:# Skip lines without a coloncontinuereturnfront_matterdef_remove_front_matter(self,content:str)->str:"""Remove front matter metadata from the given content."""ifnotself.collect_metadata:returncontentreturnself.FRONT_MATTER_REGEX.sub("",content)def_process_acreom_content(self,content:str)->str:# remove acreom specific elements from content that# do not contribute to the context of current documentcontent=re.sub(r"\s*-\s\[\s\]\s.*|\s*\[\s\]\s.*","",content)# rm taskscontent=re.sub(r"#","",content)# rm hashtagscontent=re.sub(r"\[\[.*?\]\]","",content)# rm doclinksreturncontent