Source code for langchain_text_splitters.jsx

import re
from typing import Any, List, Optional

from langchain_text_splitters import RecursiveCharacterTextSplitter


[docs] class JSFrameworkTextSplitter(RecursiveCharacterTextSplitter): """Text splitter that handles React (JSX), Vue, and Svelte code. This splitter extends RecursiveCharacterTextSplitter to handle React (JSX), Vue, and Svelte code by: 1. Detecting and extracting custom component tags from the text 2. Using those tags as additional separators along with standard JS syntax The splitter combines: - Custom component tags as separators (e.g. <Component, <div) - JavaScript syntax elements (function, const, if, etc) - Standard text splitting on newlines This allows chunks to break at natural boundaries in React, Vue, and Svelte component code. """
[docs] def __init__( self, separators: Optional[List[str]] = None, chunk_size: int = 2000, chunk_overlap: int = 0, **kwargs: Any, ) -> None: """Initialize the JS Framework text splitter. Args: separators: Optional list of custom separator strings to use chunk_size: Maximum size of chunks to return chunk_overlap: Overlap in characters between chunks **kwargs: Additional arguments to pass to parent class """ super().__init__(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs) self._separators = separators or []
[docs] def split_text(self, text: str) -> List[str]: """Split text into chunks. This method splits the text into chunks by: - Extracting unique opening component tags using regex - Creating separators list with extracted tags and JS separators - Splitting the text using the separators by calling the parent class method Args: text: String containing code to split Returns: List of text chunks split on component and JS boundaries """ # Extract unique opening component tags using regex # Regex to match opening tags, excluding self-closing tags opening_tags = re.findall(r"<\s*([a-zA-Z0-9]+)[^>]*>", text) component_tags = [] for tag in opening_tags: if tag not in component_tags: component_tags.append(tag) component_separators = [f"<{tag}" for tag in component_tags] js_separators = [ "\nexport ", " export ", "\nfunction ", "\nasync function ", " async function ", "\nconst ", "\nlet ", "\nvar ", "\nclass ", " class ", "\nif ", " if ", "\nfor ", " for ", "\nwhile ", " while ", "\nswitch ", " switch ", "\ncase ", " case ", "\ndefault ", " default ", ] separators = ( self._separators + js_separators + component_separators + ["<>", "\n\n", "&&\n", "||\n"] ) self._separators = separators chunks = super().split_text(text) return chunks