Source code for langchain_community.document_loaders.parsers.language.sql
from typing import TYPE_CHECKING
from langchain_community.document_loaders.parsers.language.tree_sitter_segmenter import ( # noqa: E501
TreeSitterSegmenter,
)
if TYPE_CHECKING:
from tree_sitter import Language
CHUNK_QUERY = """
[
(create_table_statement) @create
(select_statement) @select
(insert_statement) @insert
(update_statement) @update
(delete_statement) @delete
]
"""
[docs]
class SQLSegmenter(TreeSitterSegmenter):
"""Code segmenter for SQL.
This class uses Tree-sitter to segment SQL code into its
constituent statements (e.g., SELECT, CREATE TABLE).
It also provides functionality to extract these
statements and simplify the code into commented descriptions.
"""
[docs]
def get_language(self) -> "Language":
"""Return the SQL language grammar for Tree-sitter."""
from tree_sitter_languages import get_language
return get_language("sql")
[docs]
def get_chunk_query(self) -> str:
"""Return the Tree-sitter query for SQL segmentation."""
return CHUNK_QUERY
[docs]
def simplify_code(self) -> str:
"""Simplify the extracted SQL code into comments.
Converts SQL statements into commented descriptions
for easy readability.
"""
return "\n".join(
[
f"-- Code for: {stmt.strip()}"
for stmt in self.extract_functions_classes()
]
)