Source code for langchain_community.graphs.memgraph_graph
importloggingfromhashlibimportmd5fromtypingimportAny,Dict,List,Optionalfromlangchain_core.utilsimportget_from_dict_or_envfromlangchain_community.graphs.graph_documentimportGraphDocument,Node,Relationshipfromlangchain_community.graphs.graph_storeimportGraphStorelogger=logging.getLogger(__name__)BASE_ENTITY_LABEL="__Entity__"SCHEMA_QUERY="""SHOW SCHEMA INFO"""NODE_PROPERTIES_QUERY="""CALL schema.node_type_properties()YIELD nodeType AS label, propertyName AS property, propertyTypes AS typeWITH label AS nodeLabels, collect({key: property, types: type}) AS propertiesRETURN {labels: nodeLabels, properties: properties} AS output"""REL_QUERY="""MATCH (n)-[e]->(m)WITH DISTINCT labels(n) AS start_node_labels, type(e) AS rel_type, labels(m) AS end_node_labels, e, keys(e) AS propertiesUNWIND CASE WHEN size(properties) > 0 THEN properties ELSE [null] END AS propWITH start_node_labels, rel_type, end_node_labels, CASE WHEN prop IS NULL THEN [] ELSE [prop, valueType(e[prop])] END AS property_infoRETURN start_node_labels, rel_type, end_node_labels, COLLECT(DISTINCT CASE WHEN property_info <> [] THEN property_info ELSE null END) AS properties_info"""NODE_IMPORT_QUERY="""UNWIND $data AS rowCALL merge.node(row.label, row.properties, {}, {}) YIELD node RETURN distinct 'done' AS result"""REL_NODES_IMPORT_QUERY="""UNWIND $data AS rowMERGE (source {id: row.source_id})MERGE (target {id: row.target_id})RETURN distinct 'done' AS result"""REL_IMPORT_QUERY="""UNWIND $data AS rowMATCH (source {id: row.source_id})MATCH (target {id: row.target_id})WITH source, target, rowCALL merge.relationship(source, row.type, {}, {}, target, {})YIELD relRETURN distinct 'done' AS result"""INCLUDE_DOCS_QUERY="""MERGE (d:Document {id:$document.metadata.id})SET d.content = $document.page_contentSET d += $document.metadataRETURN distinct 'done' AS result"""INCLUDE_DOCS_SOURCE_QUERY="""UNWIND $data AS rowMATCH (source {id: row.source_id}), (d:Document {id: $document.metadata.id})MERGE (d)-[:MENTIONS]->(source)RETURN distinct 'done' AS result"""NODE_PROPS_TEXT="""Node labels and properties (name and type) are:"""REL_PROPS_TEXT="""Relationship labels and properties are:"""REL_TEXT="""Nodes are connected with the following relationships:"""
[docs]classMemgraphGraph(GraphStore):"""Memgraph wrapper for graph operations. Parameters: url (Optional[str]): The URL of the Memgraph database server. username (Optional[str]): The username for database authentication. password (Optional[str]): The password for database authentication. database (str): The name of the database to connect to. Default is 'memgraph'. refresh_schema (bool): A flag whether to refresh schema information at initialization. Default is True. driver_config (Dict): Configuration passed to Neo4j Driver. *Security note*: Make sure that the database connection uses credentials that are narrowly-scoped to only include necessary permissions. Failure to do so may result in data corruption or loss, since the calling code may attempt commands that would result in deletion, mutation of data if appropriately prompted or reading sensitive data if such data is present in the database. The best way to guard against such negative outcomes is to (as appropriate) limit the permissions granted to the credentials used with this tool. See https://python.langchain.com/docs/security for more information. """
[docs]def__init__(self,url:Optional[str]=None,username:Optional[str]=None,password:Optional[str]=None,database:Optional[str]=None,refresh_schema:bool=True,*,driver_config:Optional[Dict]=None,)->None:"""Create a new Memgraph graph wrapper instance."""try:importneo4jexceptImportError:raiseImportError("Could not import neo4j python package. ""Please install it with `pip install neo4j`.")url=get_from_dict_or_env({"url":url},"url","MEMGRAPH_URI")# if username and password are "", assume auth is disabledifusername==""andpassword=="":auth=Noneelse:username=get_from_dict_or_env({"username":username},"username","MEMGRAPH_USERNAME",)password=get_from_dict_or_env({"password":password},"password","MEMGRAPH_PASSWORD",)auth=(username,password)database=get_from_dict_or_env({"database":database},"database","MEMGRAPH_DATABASE","memgraph")self._driver=neo4j.GraphDatabase.driver(url,auth=auth,**(driver_configor{}))self._database=databaseself.schema:str=""self.structured_schema:Dict[str,Any]={}# Verify connectiontry:self._driver.verify_connectivity()exceptneo4j.exceptions.ServiceUnavailable:raiseValueError("Could not connect to Memgraph database. ""Please ensure that the url is correct")exceptneo4j.exceptions.AuthError:raiseValueError("Could not connect to Memgraph database. ""Please ensure that the username and password are correct")# Set schemaifrefresh_schema:try:self.refresh_schema()exceptneo4j.exceptions.ClientErrorase:raisee
[docs]defclose(self)->None:ifself._driver:logger.info("Closing the driver connection.")self._driver.close()self._driver=None
@propertydefget_schema(self)->str:"""Returns the schema of the Graph database"""returnself.schema@propertydefget_structured_schema(self)->Dict[str,Any]:"""Returns the structured schema of the Graph database"""returnself.structured_schema
[docs]defquery(self,query:str,params:dict={})->List[Dict[str,Any]]:"""Query the graph. Args: query (str): The Cypher query to execute. params (dict): The parameters to pass to the query. Returns: List[Dict[str, Any]]: The list of dictionaries containing the query results. """fromneo4j.exceptionsimportNeo4jErrortry:data,_,_=self._driver.execute_query(query,database_=self._database,parameters_=params,)json_data=[r.data()forrindata]returnjson_dataexceptNeo4jErrorase:ifnot(((# isCallInTransactionErrore.code=="Neo.DatabaseError.Statement.ExecutionFailed"ore.code=="Neo.DatabaseError.Transaction.TransactionStartFailed")and"in an implicit transaction"ine.message)or(# isPeriodicCommitErrore.code=="Neo.ClientError.Statement.SemanticError"and("in an open transaction is not possible"ine.messageor"tried to execute in an explicit transaction"ine.message))or(e.code=="Memgraph.ClientError.MemgraphError.MemgraphError"and("in multicommand transactions"ine.message))or(e.code=="Memgraph.ClientError.MemgraphError.MemgraphError"and"SchemaInfo disabled"ine.message)):raise# fallback to allow implicit transactionswithself._driver.session(database=self._database)assession:data=session.run(query,params)json_data=[r.data()forrindata]returnjson_data
[docs]defrefresh_schema(self)->None:""" Refreshes the Memgraph graph schema information. """importastfromneo4j.exceptionsimportNeo4jError# leave schema empty if db is emptyifself.query("MATCH (n) RETURN n LIMIT 1")==[]:return# first try with SHOW SCHEMA INFOtry:result=self.query(SCHEMA_QUERY)[0].get("schema")ifresultisnotNoneandisinstance(result,(str,ast.AST)):schema_result=ast.literal_eval(result)else:schema_result=resultassertschema_resultisnotNonestructured_schema=get_schema_subset(schema_result)self.structured_schema=structured_schemaself.schema=transform_schema_to_text(structured_schema)returnexceptNeo4jErrorase:if(e.code=="Memgraph.ClientError.MemgraphError.MemgraphError"and"SchemaInfo disabled"ine.message):logger.info("Schema generation with SHOW SCHEMA INFO query failed. ""Set --schema-info-enabled=true to use SHOW SCHEMA INFO query. ""Falling back to alternative queries.")# fallback on Cypher without SHOW SCHEMA INFOnodes=[query["output"]forqueryinself.query(NODE_PROPERTIES_QUERY)]rels=self.query(REL_QUERY)structured_schema=get_reformated_schema(nodes,rels)self.structured_schema=structured_schemaself.schema=transform_schema_to_text(structured_schema)
[docs]defadd_graph_documents(self,graph_documents:List[GraphDocument],include_source:bool=False,baseEntityLabel:bool=False,)->None:""" Take GraphDocument as input as uses it to construct a graph in Memgraph. Parameters: - graph_documents (List[GraphDocument]): A list of GraphDocument objects that contain the nodes and relationships to be added to the graph. Each GraphDocument should encapsulate the structure of part of the graph, including nodes, relationships, and the source document information. - include_source (bool, optional): If True, stores the source document and links it to nodes in the graph using the MENTIONS relationship. This is useful for tracing back the origin of data. Merges source documents based on the `id` property from the source document metadata if available; otherwise it calculates the MD5 hash of `page_content` for merging process. Defaults to False. - baseEntityLabel (bool, optional): If True, each newly created node gets a secondary __Entity__ label, which is indexed and improves import speed and performance. Defaults to False. """ifbaseEntityLabel:self.query(f"CREATE CONSTRAINT ON (b:{BASE_ENTITY_LABEL}) ASSERT b.id IS UNIQUE;")self.query(f"CREATE INDEX ON :{BASE_ENTITY_LABEL}(id);")self.query(f"CREATE INDEX ON :{BASE_ENTITY_LABEL};")fordocumentingraph_documents:ifinclude_source:ifnotdocument.source.metadata.get("id"):document.source.metadata["id"]=md5(document.source.page_content.encode("utf-8")).hexdigest()self.query(INCLUDE_DOCS_QUERY,{"document":document.source.__dict__})self.query(NODE_IMPORT_QUERY,{"data":_transform_nodes(document.nodes,baseEntityLabel)},)rel_data=_transform_relationships(document.relationships,baseEntityLabel)self.query(REL_NODES_IMPORT_QUERY,{"data":rel_data},)self.query(REL_IMPORT_QUERY,{"data":rel_data},)ifinclude_source:self.query(INCLUDE_DOCS_SOURCE_QUERY,{"data":rel_data,"document":document.source.__dict__},)self.refresh_schema()