Source code for langchain_community.graph_vectorstores.networkx

"""Utilities for using Graph Vector Stores with networkx."""

import typing

from langchain_core.documents import Document

from langchain_community.graph_vectorstores.links import get_links

if typing.TYPE_CHECKING:
    import networkx as nx


[docs] def documents_to_networkx( documents: typing.Iterable[Document], *, tag_nodes: bool = True, ) -> "nx.DiGraph": """Return the networkx directed graph corresponding to the documents. Args: documents: The documents to convenrt to networkx. tag_nodes: If `True`, each tag will be rendered as a node, with edges to/from the corresponding documents. If `False`, edges will be between documents, with a label corresponding to the tag(s) connecting them. """ import networkx as nx graph = nx.DiGraph() tag_ids: typing.Dict[typing.Tuple[str, str], str] = {} tag_labels: typing.Dict[str, str] = {} documents_by_incoming: typing.Dict[str, typing.Set[str]] = {} # First pass: # - Register tag IDs for each unique (kind, tag). # - If rendering tag nodes, add them to the graph. # - If not rendering tag nodes, create a dictionary of documents by incoming tags. for document in documents: if document.id is None: raise ValueError(f"Illegal graph document without ID: {document}") for link in get_links(document): tag_key = (link.kind, link.tag) tag_id = tag_ids.get(tag_key) if tag_id is None: tag_id = f"tag_{len(tag_ids)}" tag_ids[tag_key] = tag_id if tag_nodes: graph.add_node(tag_id, label=f"{link.kind}:{link.tag}") if not tag_nodes and (link.direction == "in" or link.direction == "bidir"): tag_labels[tag_id] = f"{link.kind}:{link.tag}" documents_by_incoming.setdefault(tag_id, set()).add(document.id) # Second pass: # - Render document nodes # - If rendering tag nodes, render edges to/from documents and tag nodes. # - If not rendering tag nodes, render edges to/from documents based on tags. for document in documents: graph.add_node(document.id, text=document.page_content) targets: typing.Dict[str, typing.List[str]] = {} for link in get_links(document): tag_id = tag_ids[(link.kind, link.tag)] if tag_nodes: if link.direction == "in" or link.direction == "bidir": graph.add_edge(tag_id, document.id) if link.direction == "out" or link.direction == "bidir": graph.add_edge(document.id, tag_id) else: if link.direction == "out" or link.direction == "bidir": label = tag_labels[tag_id] for target in documents_by_incoming[tag_id]: if target != document.id: targets.setdefault(target, []).append(label) # Avoid a multigraph by collecting the list of labels for each edge. if not tag_nodes: for target, labels in targets.items(): graph.add_edge(document.id, target, label=str(labels)) return graph