[docs]classKuzuGraph:"""Kùzu wrapper for graph operations. *Security note*: Make sure that the database connection uses credentials that are narrowly-scoped to only include necessary permissions. Failure to do so may result in data corruption or loss, since the calling code may attempt commands that would result in deletion, mutation of data if appropriately prompted or reading sensitive data if such data is present in the database. The best way to guard against such negative outcomes is to (as appropriate) limit the permissions granted to the credentials used with this tool. See https://python.langchain.com/docs/security for more information. """
[docs]def__init__(self,db:Any,database:str="kuzu",allow_dangerous_requests:bool=False)->None:"""Initializes the Kùzu graph database connection."""ifallow_dangerous_requestsisnotTrue:raiseValueError("The KuzuGraph class is a powerful tool that can be used to execute ""arbitrary queries on the database. To enable this functionality, ""set the `allow_dangerous_requests` parameter to `True` when ""constructing the KuzuGraph object.")try:importkuzuexceptImportError:raiseImportError("Could not import Kùzu python package.""Please install Kùzu with `pip install kuzu`.")self.db=dbself.conn=kuzu.Connection(self.db)self.database=databaseself.refresh_schema()
@propertydefget_schema(self)->str:"""Returns the schema of the Kùzu database"""returnself.schema
def_create_chunk_node_table(self)->None:self.conn.execute(""" CREATE NODE TABLE IF NOT EXISTS Chunk ( id STRING, text STRING, type STRING, PRIMARY KEY(id) ); """)def_create_entity_node_table(self,node_label:str)->None:self.conn.execute(f""" CREATE NODE TABLE IF NOT EXISTS {node_label} ( id STRING, type STRING, PRIMARY KEY(id) ); """)def_create_entity_relationship_table(self,rel:Relationship)->None:self.conn.execute(f""" CREATE REL TABLE IF NOT EXISTS {rel.type} ( FROM {rel.source.type} TO {rel.target.type} ); """)
[docs]defadd_graph_documents(self,graph_documents:List[GraphDocument],allowed_relationships:List[Tuple[str,str,str]],include_source:bool=False,)->None:""" Adds a list of `GraphDocument` objects that represent nodes and relationships in a graph to a Kùzu backend. Parameters: - graph_documents (List[GraphDocument]): A list of `GraphDocument` objects that contain the nodes and relationships to be added to the graph. Each `GraphDocument` should encapsulate the structure of part of the graph, including nodes, relationships, and the source document information. - allowed_relationships (List[Tuple[str, str, str]]): A list of allowed relationships that exist in the graph. Each tuple contains three elements: the source node type, the relationship type, and the target node type. Required for Kùzu, as the names of the relationship tables that need to pre-exist are derived from these tuples. - include_source (bool): If True, stores the source document and links it to nodes in the graph using the `MENTIONS` relationship. This is useful for tracing back the origin of data. Merges source documents based on the `id` property from the source document metadata if available; otherwise it calculates the MD5 hash of `page_content` for merging process. Defaults to False. """# Get unique node labels in the graph documentsnode_labels=list({node.typefordocumentingraph_documentsfornodeindocument.nodes})fordocumentingraph_documents:# Add chunk nodes and create source document relationships if include_source# is Trueifinclude_source:self._create_chunk_node_table()ifnotdocument.source.metadata.get("id"):# Add a unique id to each document chunk via an md5 hashdocument.source.metadata["id"]=md5(document.source.page_content.encode("utf-8")).hexdigest()self.conn.execute(f""" MERGE (c:Chunk {{id: $id}}) SET c.text = $text, c.type = "text_chunk" """,# noqa: F541parameters={"id":document.source.metadata["id"],"text":document.source.page_content,},)fornode_labelinnode_labels:self._create_entity_node_table(node_label)# Add entity nodes from datafornodeindocument.nodes:self.conn.execute(f""" MERGE (e:{node.type}{{id: $id}}) SET e.type = "entity" """,parameters={"id":node.id},)ifinclude_source:# If include_source is True, we need to create a relationship table# between the chunk nodes and the entity nodesself._create_chunk_node_table()ddl="CREATE REL TABLE GROUP IF NOT EXISTS MENTIONS ("table_names=[]fornode_labelinnode_labels:table_names.append(f"FROM Chunk TO {node_label}")table_names=list(set(table_names))ddl+=", ".join(table_names)# Add common properties for all the tables hereddl+=", label STRING, triplet_source_id STRING)"ifddl:self.conn.execute(ddl)# Only allow relationships that exist in the schemaifnode.typeinnode_labels:self.conn.execute(f""" MATCH (c:Chunk {{id: $id}}), (e:{node.type}{{id: $node_id}}) MERGE (c)-[m:MENTIONS]->(e) SET m.triplet_source_id = $id """,parameters={"id":document.source.metadata["id"],"node_id":node.id,},)# Add entity relationshipsforrelindocument.relationships:self._create_entity_relationship_table(rel)# Create relationshipsource_label=rel.source.typesource_id=rel.source.idtarget_label=rel.target.typetarget_id=rel.target.idself.conn.execute(f""" MATCH (e1:{source_label}{{id: $source_id}}), (e2:{target_label}{{id: $target_id}}) MERGE (e1)-[:{rel.type}]->(e2) """,parameters={"source_id":source_id,"target_id":target_id,},)