[docs]classNLTKTextSplitter(TextSplitter):"""Splitting text using NLTK package."""
[docs]def__init__(self,separator:str="\n\n",language:str="english",**kwargs:Any)->None:"""Initialize the NLTK splitter."""super().__init__(**kwargs)try:fromnltk.tokenizeimportsent_tokenizeself._tokenizer=sent_tokenizeexceptImportError:raiseImportError("NLTK is not installed, please install it with `pip install nltk`.")self._separator=separatorself._language=language
[docs]defsplit_text(self,text:str)->List[str]:"""Split incoming text and return chunks."""# First we naively split the large input into a bunch of smaller ones.splits=self._tokenizer(text,language=self._language)returnself._merge_splits(splits,self._separator)