[docs]defgenerate_random_string(length:int)->str:# Define the characters to use: uppercase, lowercase, digits, and# punctuationcharacters=string.ascii_letters# Randomly choose 'length' characters from the pool of possible charactersrandom_string="".join(random.choice(characters)for_inrange(length))returnrandom_string
[docs]classSearchType(str,enum.Enum):""" Enumerator for different search strategies in FalkorDB VectorStore. - `SearchType.VECTOR`: This option searches using only the vector indexes in the vectorstore, relying on the similarity between vector embeddings to return relevant results. - `SearchType.HYBRID`: This option performs a combined search, querying both the full-text indexes and the vector indexes. It integrates traditional text search with vector-based search for more comprehensive results. """VECTOR="vector"HYBRID="hybrid"
DEFAULT_SEARCH_TYPE=SearchType.VECTOR
[docs]classIndexType(str,enum.Enum):"""Enumerator of the index types."""NODE="NODE"RELATIONSHIP="RELATIONSHIP"
DEFAULT_INDEX_TYPE=IndexType.NODE
[docs]defdict_to_yaml_str(input_dict:Dict,indent:int=0)->str:""" Convert a dictionary to a YAML-like string without using external libraries. Parameters: - input_dict (dict): The dictionary to convert. - indent (int): The current indentation level. Returns: - str: The YAML-like string representation of the input dictionary. """yaml_str=""forkey,valueininput_dict.items():padding=" "*indentifisinstance(value,dict):yaml_str+=f"{padding}{key}:\n{dict_to_yaml_str(value,indent+1)}"elifisinstance(value,list):yaml_str+=f"{padding}{key}:\n"foriteminvalue:yaml_str+=f"{padding}- {item}\n"else:yaml_str+=f"{padding}{key}: {value}\n"returnyaml_str
[docs]defconstruct_metadata_filter(filter:Optional[Dict[str,Any]]=None,)->Tuple[str,Dict[str,Any]]:""" Construct a metadata filter by directly injecting the filter values into the query. Args: filter (Optional[Dict[str, Any]]): Dictionary representing the filter condition. Returns: Tuple[str, Dict[str, Any]]: Filter snippet and an empty dictionary (since we don't need parameters). """ifnotfilter:return"",{}filter_snippet=""fori,(key,value)inenumerate(filter.items(),start=1):iffilter_snippet:filter_snippet+=" AND "# If the value is a string, wrap it in quotes. Otherwise, directly# inject the value.ifisinstance(value,str):filter_snippet+=f"n.{key} = '{value}'"else:filter_snippet+=f"n.{key} = {value}"returnfilter_snippet,{}
def_get_search_index_query(search_type:SearchType,index_type:IndexType=DEFAULT_INDEX_TYPE)->str:ifindex_type==IndexType.NODE:ifsearch_type==SearchType.VECTOR:return("CALL db.idx.vector.queryNodes($entity_label, ""$entity_property, $k, vecf32($embedding)) ""YIELD node, score ")elifsearch_type==SearchType.HYBRID:return("CALL { ""CALL db.idx.vector.queryNodes($entity_label, ""$entity_property, $k, vecf32($embedding)) ""YIELD node, score ""WITH collect({node: node, score: score})"" AS nodes, max(score) AS max_score ""UNWIND nodes AS n ""RETURN n.node AS node, (n.score / max_score) AS score ""UNION ""CALL db.idx.fulltext.queryNodes($entity_label, $query) ""YIELD node, score ""WITH collect({node: node, score: score})"" AS nodes, max(score) AS max_score ""UNWIND nodes AS n ""RETURN n.node AS node, (n.score / max_score) AS score ""} ""WITH node, max(score) AS score ""ORDER BY score DESC LIMIT $k ")elifindex_type==IndexType.RELATIONSHIP:return("CALL db.idx.vector.queryRelationships""($entity_label, $entity_property, $k, vecf32($embedding)) ""YIELD relationship, score ")
[docs]defprocess_index_data(data:List[List[Any]])->List[Dict[str,Any]]:""" Processes a nested list of entity data to extract information about labels, entity types, properties, index types, and index details (if applicable). Args: data (List[List[Any]]): A nested list containing details about entitys, their properties, index types, and configuration information. Returns: List[Dict[str, Any]]: A list of dictionaries where each dictionary contains: - entity_label (str): The label or name of the entity or relationship (e.g., 'Person', 'Song'). - entity_property (str): The property of the entity or relationship on which an index was created (e.g., 'first_name'). - index_type (str or List[str]): The type(s) of index applied to the property (e.g., 'FULLTEXT', 'VECTOR'). - index_status (str): The status of the index (e.g., 'OPERATIONAL', 'PENDING'). - index_dimension (Optional[int]): The dimension of the vector index, if applicable. - index_similarityFunction (Optional[str]): The similarity function used by the vector index, if applicable. - entity_type (str): The type of entity. That is either entity or relationship Notes: - The entity label is extracted from the first element of each entity list. - The entity property and associated index types are extracted from the second element. - If the index type includes 'VECTOR', additional details such as dimension and similarity function are extracted from the entity configuration. - The function handles cases where entitys have multiple index types (e.g., both 'FULLTEXT' and 'VECTOR'). """result=[]forentityindata:# Extract basic informationentity_label=entity[0]index_type_dict=entity[2]index_status=entity[7]entity_type=entity[6]# Process each property and its index type(s)forprop,index_typesinindex_type_dict.items():entity_info={"entity_label":entity_label,"entity_property":prop,"entity_type":entity_type,"index_type":index_types[0],"index_status":index_status,"index_dimension":None,"index_similarityFunction":None,}# Check for VECTOR type and extract additional detailsif"VECTOR"inindex_types:ifisinstance(entity[3],str):entity_info["index_dimension"]=Noneentity_info["index_similarityFunction"]=Noneelse:vector_info=entity[3].get(prop,{})entity_info["index_dimension"]=vector_info.get("dimension")entity_info["index_similarityFunction"]=vector_info.get("similarityFunction")result.append(entity_info)returnresult
[docs]classFalkorDBVector(VectorStore):"""`FalkorDB` vector index. To use, you should have the ``falkordb`` python package installed Args: host: FalkorDB host port: FalkorDB port username: Optionally provide your username details if you are connecting to a FalkorDB Cloud database instance password: Optionally provide your password details if you are connecting to a FalkorDB Cloud database instance embedding: Any embedding function implementing `langchain.embeddings.base.Embeddings` interface. distance_strategy The distance strategy to use. (default: "EUCLIDEAN") pre_delete_collection: If True, will delete existing data if it exists.(default: False). Useful for testing. search_type: Similiarity search type to use. Could be either SearchType.VECTOR or SearchType.HYBRID (default: SearchType.VECTOR) database: Optionally provide the name of the database to use else FalkorDBVector will generate a random database for you. node_label: Provide the label of the node you want the embeddings of your data to be stored in. (default: "Chunk") relation_type: Provide the relationship type of the relationship you want the embeddings of your data to be stored in. (default: "") embedding_node_property: Provide the name of the property in which you want your embeddings to be stored. (default: "embedding") text_node_property: Provide the name of the property in which you want your texts to be stored. (default: "text") embedding_dimension: Provide the dimension of your embeddings or it will be calculated for you. retrieval_query: Optionally a provide a retrieval_query else the default retrieval query will be used. index_type: Provide the index type for the VectorStore else the default index type will be used. graph: Optionally provide the graph you would like to use relevance_score_fn: Optionally provide a function that computes a relevance score based on the similarity score returned by the search. ssl: Specify whether the connection to the database should be secured using SSL/TLS encryption (default: False) Example: .. code-block:: python from langchain_community.vectorstores.falkordb_vector import FalkorDBVector from langchain_community.embeddings.openai import OpenAIEmbeddings from langchain_text_splitters import CharacterTextSplitter host="localhost" port=6379 raw_documents = TextLoader('../../../state_of_the_union.txt').load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) documents = text_splitter.split_documents(raw_documents) embeddings=OpenAIEmbeddings() vectorstore = FalkorDBVector.from_documents( embedding=embeddings, documents=documents, host=host, port=port, ) """
[docs]def__init__(self,embedding:Embeddings,*,search_type:SearchType=SearchType.VECTOR,username:Optional[str]=None,password:Optional[str]=None,host:str="localhost",port:int=6379,distance_strategy:DistanceStrategy=DEFAULT_DISTANCE_STRATEGY,database:Optional[str]=generate_random_string(4),node_label:str="Chunk",relation_type:str="",embedding_node_property:str="embedding",text_node_property:str="text",embedding_dimension:Optional[int]=None,retrieval_query:Optional[str]="",index_type:IndexType=DEFAULT_INDEX_TYPE,graph:Optional[FalkorDBGraph]=None,relevance_score_fn:Optional[Callable[[float],float]]=None,ssl:bool=False,pre_delete_collection:bool=False,metadata:List[Any]=[],)->None:try:importfalkordbexceptImportError:raiseImportError("Could not import falkordb python package.""Please install it with `pip install falkordb`")try:importredis.exceptionsexceptImportError:raiseImportError("Could not import redis.exceptions.""Please install it with `pip install redis`")# Allow only cosine and euclidean distance strategiesifdistance_strategynotin[DistanceStrategy.EUCLIDEAN_DISTANCE,DistanceStrategy.COSINE,]:raiseValueError("`distance_strategy` must be either 'EULIDEAN_DISTANCE` or `COSINE`")# Graph object takes precedent over env or input paramsifgraph:self._database=graph._graphself._driver=graph._driverelse:# Handle credentials via environment variables or input paramsself._host=hostself._port=portself._username=usernameoros.environ.get("FALKORDB_USERNAME")self._password=passwordoros.environ.get("FALKORDB_PASSWORD")self._ssl=ssl# Initialize the FalkorDB connectiontry:self._driver=falkordb.FalkorDB(host=self._host,port=self._port,username=self._username,password=self._password,ssl=self._ssl,)exceptredis.exceptions.ConnectionError:raiseValueError("Could not connect to FalkorDB database.""Please ensure that the host and port is correct")exceptredis.exceptions.AuthenticationError:raiseValueError("Could not connect to FalkorDB database. ""Please ensure that the username and password are correct")# Verify that required values are not nullifnotembedding_node_property:raiseValueError("The `embedding_node_property` must not be None or empty string")ifnotnode_label:raiseValueError("The `node_label` must not be None or empty string")self._database=self._driver.select_graph(database)self.database_name=databaseself.embedding=embeddingself.node_label=node_labelself.relation_type=relation_typeself.embedding_node_property=embedding_node_propertyself.text_node_property=text_node_propertyself._distance_strategy=distance_strategyself.override_relevance_score_fn=relevance_score_fnself.pre_delete_collection=pre_delete_collectionself.retrieval_query=retrieval_queryself.search_type=search_typeself._index_type=index_typeself.metadata=metadata# Calculate embedding_dimensions if not givenifnotembedding_dimension:self.embedding_dimension=len(self.embedding.embed_query("foo"))# Delete existing data if flaggedifpre_delete_collection:self._database.query(f"""MATCH (n:`{self.node_label}`) DELETE n""")
@propertydefembeddings(self)->Embeddings:"""Returns the `Embeddings` model being used by the Vectorstore"""returnself.embeddingdef_query(self,query:str,*,params:Optional[dict]=None,retry_on_timeout:bool=True,)->List[List]:""" This method sends a Cypher query to the connected FalkorDB database and returns the results as a list of lists. Args: query (str): The Cypher query to execute. params (dict, optional): Dictionary of query parameters. Defaults to {}. Returns: List[List]: List of Lists containing the query results """params=paramsor{}try:data=self._database.query(query,params)returndata.result_setexceptExceptionase:if"Invalid input"instr(e):raiseValueError(f"Cypher Statement is not valid\n{e}")ifretry_on_timeout:returnself._query(query,params=params,retry_on_timeout=False)else:raisee
[docs]defretrieve_existing_node_index(self,node_label:Optional[str]="")->Tuple[Optional[int],Optional[str],Optional[str],Optional[str]]:""" Check if the vector index exists in the FalkorDB database and returns its embedding dimension, entity_type, entity_label, entity_property This method; 1. queries the FalkorDB database for existing indexes 2. attempts to retrieve the dimension of the vector index with the specified node label & index type 3. If the index exists, its dimension is returned. 4. Else if the index doesn't exist, `None` is returned. Returns: int or None: The embedding dimension of the existing index if found, str or None: The entity type found. str or None: The label of the entity that the vector index was created with str or None: The property of the entity for which the vector index was created on """ifnode_label:passelifself.node_label:node_label=self.node_labelelse:raiseValueError("`node_label` property must be set to use this function")embedding_dimension=Noneentity_type=Noneentity_label=Noneentity_property=Noneindex_information=self._database.query("CALL db.indexes()")ifindex_information:processed_index_information=process_index_data(index_information.result_set)fordictinprocessed_index_information:if(dict.get("entity_label",False)==node_labelanddict.get("entity_type",False)=="NODE"):ifdict["index_type"]=="VECTOR":embedding_dimension=int(dict["index_dimension"])entity_type=str(dict["entity_type"])entity_label=str(dict["entity_label"])entity_property=str(dict["entity_property"])breakifembedding_dimensionandentity_typeandentity_labelandentity_property:self._index_type=IndexType(entity_type)returnembedding_dimension,entity_type,entity_label,entity_propertyelse:returnNone,None,None,Noneelse:returnNone,None,None,None
[docs]defretrieve_existing_relationship_index(self,relation_type:Optional[str]="")->Tuple[Optional[int],Optional[str],Optional[str],Optional[str]]:""" Check if the vector index exists in the FalkorDB database and returns its embedding dimension, entity_type, entity_label, entity_property This method; 1. queries the FalkorDB database for existing indexes 2. attempts to retrieve the dimension of the vector index with the specified label & index type 3. If the index exists, its dimension is returned. 4. Else if the index doesn't exist, `None` is returned. Returns: int or None: The embedding dimension of the existing index if found, str or None: The entity type found. str or None: The label of the entity that the vector index was created with str or None: The property of the entity for which the vector index was created on """ifrelation_type:passelifself.relation_type:relation_type=self.relation_typeelse:raiseValueError("Couldn't find any specified `relation_type`."" Check if you spelled it correctly")embedding_dimension=Noneentity_type=Noneentity_label=Noneentity_property=Noneindex_information=self._database.query("CALL db.indexes()")ifindex_information:processed_index_information=process_index_data(index_information.result_set)fordictinprocessed_index_information:if(dict.get("entity_label",False)==relation_typeanddict.get("entity_type",False)=="RELATIONSHIP"):ifdict["index_type"]=="VECTOR":embedding_dimension=int(dict["index_dimension"])entity_type=str(dict["entity_type"])entity_label=str(dict["entity_label"])entity_property=str(dict["entity_property"])breakifembedding_dimensionandentity_typeandentity_labelandentity_property:self._index_type=IndexType(entity_type)returnembedding_dimension,entity_type,entity_label,entity_propertyelse:returnNone,None,None,Noneelse:returnNone,None,None,None
[docs]defretrieve_existing_fts_index(self)->Optional[str]:""" Check if the fulltext index exists in the FalkorDB database This method queries the FalkorDB database for existing fts indexes with the specified name. Returns: str: fulltext index entity label """entity_label=Noneindex_information=self._database.query("CALL db.indexes()")ifindex_information:processed_index_information=process_index_data(index_information.result_set)fordictinprocessed_index_information:ifdict.get("entity_label",False)==self.node_label:ifdict["index_type"]=="FULLTEXT":entity_label=str(dict["entity_label"])breakifentity_label:returnentity_labelelse:returnNoneelse:returnNone
[docs]defcreate_new_node_index(self,node_label:Optional[str]="",embedding_node_property:Optional[str]="",embedding_dimension:Optional[int]=None,)->None:""" This method creates a new vector index on a node in FalkorDB. """ifnode_label:passelifself.node_label:node_label=self.node_labelelse:raiseValueError("`node_label` property must be set to use this function")ifembedding_node_property:passelifself.embedding_node_property:embedding_node_property=self.embedding_node_propertyelse:raiseValueError("`embedding_node_property` property must be set to use this function")ifembedding_dimension:passelifself.embedding_dimension:embedding_dimension=self.embedding_dimensionelse:raiseValueError("`embedding_dimension` property must be set to use this function")try:self._database.create_node_vector_index(node_label,embedding_node_property,dim=embedding_dimension,similarity_function=DISTANCE_MAPPING[self._distance_strategy],)exceptExceptionase:if"already indexed"instr(e):raiseValueError(f"A vector index on (:{node_label}""{"f"{embedding_node_property}""}) has already been created")else:raiseValueError(f"Error occurred: {e}")
[docs]defcreate_new_index_on_relationship(self,relation_type:str="",embedding_node_property:str="",embedding_dimension:int=0,)->None:""" This method creates an new vector index on a relationship/edge in FalkorDB. """ifrelation_type:passelifself.relation_type:relation_type=self.relation_typeelse:raiseValueError("`relation_type` must be set to use this function")ifembedding_node_property:passelifself.embedding_node_property:embedding_node_property=self.embedding_node_propertyelse:raiseValueError("`embedding_node_property` must be set to use this function")ifembedding_dimensionandembedding_dimension!=0:passelifself.embedding_dimension:embedding_dimension=self.embedding_dimensionelse:raiseValueError("`embedding_dimension` must be set to use this function")try:self._database.create_edge_vector_index(relation_type,embedding_node_property,dim=embedding_dimension,similarity_function=DISTANCE_MAPPING[DEFAULT_DISTANCE_STRATEGY],)exceptExceptionase:if"already indexed"instr(e):raiseValueError(f"A vector index on [:{relation_type}""{"f"{embedding_node_property}""}] has already been created")else:raiseValueError(f"Error occurred: {e}")
[docs]defcreate_new_keyword_index(self,text_node_properties:List[str]=[])->None:""" This method constructs a Cypher query and executes it to create a new full text index in FalkorDB Args: text_node_properties (List[str]): List of node properties to be indexed.If not provided, defaults to self.text_node_property. """# Use the provided properties or default to self.text_node_propertynode_props=text_node_propertiesor[self.text_node_property]# Dynamically pass node label and properties to create the full-text# indexself._database.create_node_fulltext_index(self.node_label,*node_props)
[docs]defadd_embeddings(self,texts:Iterable[str],embeddings:List[List[float]],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Add embeddings to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. embeddings: List of list of embedding vectors. metadatas: List of metadatas associated with the texts. kwargs: vectorstore specific parameters """ifidsisNone:ids=[md5(text.encode("utf-8")).hexdigest()fortextintexts]ifnotmetadatas:metadatas=[{}for_intexts]self.metadata=[]# Check if all dictionaries are emptyifall(notmetadataformetadatainmetadatas):passelse:# Initialize a set to keep track of unique non-empty keysunique_non_empty_keys:set[str]=set()# Iterate over each metadata dictionaryformetadatainmetadatas:# Add keys with non-empty values to the setunique_non_empty_keys.update(keyforkey,valueinmetadata.items()ifvalue)# Print unique non-empty keysifunique_non_empty_keys:self.metadata=list(unique_non_empty_keys)parameters={"data":[{"text":text,"metadata":metadata,"embedding":embedding,"id":id}fortext,metadata,embedding,idinzip(texts,metadatas,embeddings,ids)]}self._database.query("UNWIND $data AS row "f"MERGE (c:`{self.node_label}` {{id: row.id}}) "f"SET c.`{self.embedding_node_property}`"f" = vecf32(row.embedding), c.`{self.text_node_property}`"" = row.text, c += row.metadata",params=parameters,)returnids
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. kwargs: vectorstore specific parameters Returns: List of ids from adding the texts into the vectorstore. """embeddings=self.embedding.embed_documents(list(texts))returnself.add_embeddings(texts=texts,embeddings=embeddings,metadatas=metadatas,ids=ids,**kwargs)
[docs]defadd_documents(self,documents:List[Document],ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:""" This function takes List[Document] element(s) and populates the existing store with a default node or default node(s) that represent the element(s) and returns the id(s) of the newly created node(s). Args: documents: the List[Document] element(s). ids: Optional List of custom IDs to assign to the documents. Returns: A list containing the id(s) of the newly created node in the store. """# Ensure the length of the ids matches the length of the documents if# providedifidsandlen(ids)!=len(documents):raiseValueError("The number of ids must match the number of documents.")result_ids=[]# Add the documents to the store with custom or generated IDsself.from_documents(embedding=self.embedding,documents=documents,)fori,docinenumerate(documents):page_content=doc.page_contentifids:# If custom IDs are provided, use them directlyassigned_id=ids[i]self._query(""" MATCH (n) WHERE n.text = $page_content SET n.id = $assigned_id """,params={"page_content":page_content,"assigned_id":assigned_id},)result_ids.append(assigned_id)else:# Use the existing logic to query the ID if no custom IDs were# providedresult=self._query(""" MATCH (n) WHERE n.text = $page_content RETURN n.id """,params={"page_content":page_content},)try:result_ids.append(result[0][0])exceptException:raiseValueError("Your document wasn't added to the store"" successfully. Check your spellings.")returnresult_ids
[docs]@classmethoddeffrom_texts(cls:type[FalkorDBVector],texts:List[str],embedding:Embeddings,metadatas:Optional[List[Dict]]=None,# Optionaldistance_strategy:Optional[DistanceStrategy]=None,# Optionalids:Optional[List[str]]=None,**kwargs:Any,)->FalkorDBVector:""" Return FalkorDBVector initialized from texts and embeddings. """embeddings=embedding.embed_documents(list(texts))# Set default values if NoneifmetadatasisNone:metadatas=[{}for_intexts]ifdistance_strategyisNone:distance_strategy=DEFAULT_DISTANCE_STRATEGYreturncls.__from(texts,embeddings,embedding,metadatas=metadatas,ids=ids,distance_strategy=distance_strategy,**kwargs,)
@classmethoddef__from(cls,texts:List[str],embeddings:List[List[float]],embedding:Embeddings,metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,search_type:SearchType=SearchType.VECTOR,**kwargs:Any,)->FalkorDBVector:ifidsisNone:ids=[md5(text.encode("utf-8")).hexdigest()fortextintexts]ifnotmetadatas:metadatas=[{}for_intexts]store=cls(embedding=embedding,search_type=search_type,**kwargs,)# Check if the vector index already existsembedding_dimension,index_type,entity_label,entity_property=(store.retrieve_existing_node_index())# Raise error if relationship index typeifindex_type=="RELATIONSHIP":raiseValueError("Data ingestion is not supported with relationship vector index")# If the vector index doesn't exist yetifnotindex_type:store.create_new_node_index()embedding_dimension,index_type,entity_label,entity_property=(store.retrieve_existing_node_index())# If the index already exists, check if embedding dimensions matchelif(embedding_dimensionandnotstore.embedding_dimension==embedding_dimension):raiseValueError(f"A Vector index for {entity_label} on {entity_property} exists""The provided embedding function and vector index ""dimensions do not match.\n"f"Embedding function dimension: {store.embedding_dimension}\n"f"Vector index dimension: {embedding_dimension}")ifsearch_type==SearchType.HYBRID:fts_node_label=store.retrieve_existing_fts_index()# If the FTS index doesn't exist yetifnotfts_node_label:store.create_new_keyword_index()else:# Validate that FTS and Vector Index use the same informationifnotfts_node_label==store.node_label:raiseValueError("Vector and keyword index don't index the same node label")store.add_embeddings(texts=texts,embeddings=embeddings,metadatas=metadatas,ids=ids,**kwargs)returnstore
[docs]@classmethoddeffrom_existing_index(cls:Type[FalkorDBVector],embedding:Embeddings,node_label:str,search_type:SearchType=DEFAULT_SEARCH_TYPE,**kwargs:Any,)->FalkorDBVector:""" Get instance of an existing FalkorDB vector index. This method will return the instance of the store without inserting any new embeddings. """store=cls(embedding=embedding,node_label=node_label,search_type=search_type,**kwargs,)embedding_dimension,index_type,entity_label,entity_property=(store.retrieve_existing_node_index())# Raise error if relationship index typeifindex_type=="RELATIONSHIP":raiseValueError("Relationship vector index is not supported with ""`from_existing_index` method. Please use the ""`from_existing_relationship_index` method.")ifnotindex_type:raiseValueError(f"The specified vector index node label `{node_label}` does not exist. ""Make sure to check if you spelled the node label correctly")# Check if embedding function and vector index dimensions matchifembedding_dimensionandnotstore.embedding_dimension==embedding_dimension:raiseValueError("The provided embedding function and vector index ""dimensions do not match.\n"f"Embedding function dimension: {store.embedding_dimension}\n"f"Vector index dimension: {embedding_dimension}")ifsearch_type==SearchType.HYBRID:fts_node_label=store.retrieve_existing_fts_index()# If the FTS index doesn't exist yetifnotfts_node_label:raiseValueError("The specified keyword index name does not exist. ""Make sure to check if you spelled it correctly")else:# Validate that FTS and Vector index use the same informationifnotfts_node_label==store.node_label:raiseValueError("Vector and keyword index don't index the same node label")returnstore
[docs]@classmethoddeffrom_existing_relationship_index(cls:Type[FalkorDBVector],embedding:Embeddings,relation_type:str,search_type:SearchType=DEFAULT_SEARCH_TYPE,**kwargs:Any,)->FalkorDBVector:""" Get instance of an existing FalkorDB relationship vector index. This method will return the instance of the store without inserting any new embeddings. """ifsearch_type==SearchType.HYBRID:raiseValueError("Hybrid search is not supported in combination ""with relationship vector index")store=cls(embedding=embedding,relation_type=relation_type,**kwargs,)embedding_dimension,index_type,entity_label,entity_property=(store.retrieve_existing_relationship_index())ifnotindex_type:raiseValueError("The specified vector index on the relationship"f" {relation_type} does not exist. ""Make sure to check if you spelled it correctly")# Raise error if not relationship index typeifindex_type=="NODE":raiseValueError("Node vector index is not supported with ""`from_existing_relationship_index` method. Please use the ""`from_existing_index` method.")# Check if embedding function and vector index dimensions matchifembedding_dimensionandnotstore.embedding_dimension==embedding_dimension:raiseValueError("The provided embedding function and vector index ""dimensions do not match.\n"f"Embedding function dimension: {store.embedding_dimension}\n"f"Vector index dimension: {embedding_dimension}")returnstore
[docs]@classmethoddeffrom_existing_graph(cls:Type[FalkorDBVector],embedding:Embeddings,database:str,node_label:str,embedding_node_property:str,text_node_properties:List[str],*,search_type:SearchType=DEFAULT_SEARCH_TYPE,retrieval_query:str="",**kwargs:Any,)->FalkorDBVector:""" Initialize and return a FalkorDBVector instance from an existing graph using the database name This method initializes a FalkorDBVector instance using the provided parameters and the existing graph. It validates the existence of the indices and creates new ones if they don't exist. Args: embedding: The `Embeddings` model you would like to use database: The name of the existing graph/database you would like to initialize node_label: The label of the node you want to initialize. embedding_node_property: The name of the property you want your embeddings to be stored in. Returns: FalkorDBVector: An instance of FalkorDBVector initialized with the provided parameters and existing graph. Example: >>> falkordb_vector = FalkorDBVector.from_existing_graph( ... embedding=my_embedding, ... node_label="Document", ... embedding_node_property="embedding", ... text_node_properties=["title", "content"] ... ) """# Validate that database and text_node_properties is not emptyifnotdatabase:raiseValueError("Parameter `database` must be given")ifnottext_node_properties:raiseValueError("Parameter `text_node_properties` must not be an empty list")# Prefer retrieval query from params, otherwise construct itifnotretrieval_query:retrieval_query=(f"RETURN reduce(str='', k IN {text_node_properties} |"" str + '\\n' + k + ': ' + coalesce(node[k], '')) AS text, ""node {.*, `"+embedding_node_property+"`: Null, id: Null, "+", ".join([f"`{prop}`: Null"forpropintext_node_properties])+"} AS metadata, score")store=cls(database=database,embedding=embedding,search_type=search_type,retrieval_query=retrieval_query,node_label=node_label,embedding_node_property=embedding_node_property,**kwargs,)embedding_dimension,index_type,entity_label,entity_property=(store.retrieve_existing_node_index())# Raise error if relationship index typeifindex_type=="RELATIONSHIP":raiseValueError("`from_existing_graph` method does not support "" existing relationship vector index. ""Please use `from_existing_relationship_index` method")# If the vector index doesn't exist yetifnotindex_type:store.create_new_node_index(node_label=node_label)# If the index already exists, check if embedding dimensions matchelif(embedding_dimensionandnotstore.embedding_dimension==embedding_dimension):raiseValueError(f"Index on Node {store.node_label} already exists.""The provided embedding function and vector index ""dimensions do not match.\n"f"Embedding function dimension: {store.embedding_dimension}\n"f"Vector index dimension: {embedding_dimension}")# FTS index for Hybrid searchifsearch_type==SearchType.HYBRID:fts_node_label=store.retrieve_existing_fts_index()# If the FTS index doesn't exist yetifnotfts_node_label:store.create_new_keyword_index(text_node_properties)else:# Validate that FTS and Vector index use the same informationifnotfts_node_label==store.node_label:raiseValueError("Vector and keyword index don't index the same node label")# Populate embeddingswhileTrue:fetch_query=(f"MATCH (n:`{node_label}`) "f"WHERE n.`{embedding_node_property}` IS null ""AND any(k IN $props WHERE n[k] IS NOT null) ""RETURN id(n) AS id, ""coalesce(n.text, '') AS text ""LIMIT 1000")data=store._query(fetch_query,params={"props":text_node_properties})ifnotdata:breaktext_embeddings=embedding.embed_documents([el[1]forelindata])params={"data":[{"id":el[0],"embedding":embedding}forel,embeddinginzip(data,text_embeddings)]}store._query("UNWIND $data AS row "f"MATCH (n:`{node_label}`) ""WHERE id(n) = row.id "f"SET n.`{embedding_node_property}` = vecf32(row.embedding)""RETURN count(*)",params=params,)# If embedding calculation should be stoppediflen(data)<1000:breakreturnstore
[docs]@classmethoddeffrom_documents(cls:Type[FalkorDBVector],documents:List[Document],embedding:Embeddings,distance_strategy:DistanceStrategy=DEFAULT_DISTANCE_STRATEGY,ids:Optional[List[str]]=None,**kwargs:Any,)->FalkorDBVector:""" Return FalkorDBVector initialized from documents and embeddings. """texts=[d.page_contentfordindocuments]metadatas=[d.metadatafordindocuments]returncls.from_texts(texts=texts,embedding=embedding,distance_strategy=distance_strategy,metadatas=metadatas,ids=ids,**kwargs,)
[docs]@classmethoddeffrom_embeddings(cls,text_embeddings:List[Tuple[str,List[float]]],embedding:Embeddings,metadatas:Optional[List[dict]]=None,distance_strategy:DistanceStrategy=DEFAULT_DISTANCE_STRATEGY,ids:Optional[List[str]]=None,pre_delete_collection:bool=False,**kwargs:Any,)->FalkorDBVector:"""Construct FalkorDBVector wrapper from raw documents and pre- generated embeddings. Return FalkorDBVector initialized from documents and embeddings. Example: .. code-block:: python from langchain_community.vectorstores.falkordb_vector import ( FalkorDBVector ) from langchain_community.embeddings import OpenAIEmbeddings embeddings = OpenAIEmbeddings() text_embeddings = embeddings.embed_documents(texts) text_embedding_pairs = list(zip(texts, text_embeddings)) vectorstore = FalkorDBVector.from_embeddings( text_embedding_pairs, embeddings ) """texts=[t[0]fortintext_embeddings]embeddings=[t[1]fortintext_embeddings]returncls.__from(texts,embeddings,embedding,metadatas=metadatas,ids=ids,distance_strategy=distance_strategy,pre_delete_collection=pre_delete_collection,**kwargs,)
[docs]defsimilarity_search(self,query:str,k:int=4,params:Dict[str,Any]={},filter:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Run similarity search with FalkorDBVector. Args: query (str): Query text to search for. k (int): Number of results to return. Defaults to 4. params (Dict[str, Any]): The search params for the index type. Defaults to empty dict. filter (Optional[Dict[str, Any]]): Dictionary of arguments(s) to filter on metadata. Defaults to None. Returns: List of Documents most similar to the query. """embedding=self.embedding.embed_query(text=query)returnself.similarity_search_by_vector(embedding=embedding,k=k,query=query,params=params,filter=filter,**kwargs,)
[docs]defsimilarity_search_by_vector(self,embedding:List[float],k:int=4,filter:Optional[Dict[str,Any]]=None,params:Dict[str,Any]={},**kwargs:Any,)->List[Document]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter (Optional[Dict[str, Any]]): Dictionary of argument(s) to filter on metadata. Defaults to None. params (Dict[str, Any]): The search params for the index type. Defaults to empty dict. Returns: List of Documents most similar to the query vector. """docs_and_scores=self.similarity_search_with_score_by_vector(embedding=embedding,k=k,filter=filter,params=params,**kwargs)return[docfordoc,_indocs_and_scores]
[docs]defsimilarity_search_with_score_by_vector(self,embedding:List[float],k:int=4,params:Dict[str,Any]={},filter:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:""" Perform a similarity search in the FalkorDB database using a given vector and return the top k similar documents with their scores. This method uses a Cypher query to find the top k documents that are most similar to a given embedding. The similarity is measured using a vector index in the FalkorDB database. The results are returned as a list of tuples, each containing a Document object and its similarity score. Args: embedding (List[float]): The embedding vector to compare against. k (int, optional): The number of top similar documents to retrieve. filter (Optional[Dict[str, Any]]): Dictionary of argument(s) to filter on metadata. Defaults to None. params (Dict[str, Any]): The Search params for the index type. Defaults to empty dict. Returns: List[Tuple[Document, float]]: A list of tuples, each containing a Document object and its similarity score. """iffilter:ifself.search_type==SearchType.HYBRID:raiseValueError("Metadata filtering can't be use in combination with ""a hybrid search approach")base_index_query=(f"MATCH (n:{self.node_label}) WHERE "f"n.{self.embedding_node_property} IS NOT NULL AND ")base_cosine_query=(" WITH n as node, "f" vec.cosineDistance(n.{self.embedding_node_property}"", vecf32($embedding)) as score ")filter_snippets,filter_params=construct_metadata_filter(filter)index_query=base_index_query+filter_snippets+base_cosine_queryelse:index_query=_get_search_index_query(self.search_type,self._index_type)filter_params={}ifself._index_type==IndexType.RELATIONSHIP:ifkwargs.get("return_embeddings"):ifself.metadata:# Construct the metadata part based on self.metadatametadata_fields=", ".join(f"`{key}`: relationship.{key}"forkeyinself.metadata)default_retrieval=(f"RETURN relationship.{self.text_node_property} ""AS text, score, "f"{{text: relationship.{self.text_node_property}, "f"embedding: relationship.{self.embedding_node_property}, "f"id: relationship.id, source: relationship.source, "f"{metadata_fields}}} AS metadata")else:default_retrieval=(f"RETURN relationship.{self.text_node_property}"" AS text, score, "f"{{text: relationship.{self.text_node_property}, "f"embedding: relationship.{self.embedding_node_property}, "f"id: relationship.id, source: relationship.source}}"" AS metadata")else:ifself.metadata:# Construct the metadata part based on self.metadatametadata_fields=", ".join(f"`{key}`: relationship.{key}"forkeyinself.metadata)default_retrieval=(f"RETURN relationship.{self.text_node_property} ""AS text, score, "f"{{text: relationship.{self.text_node_property}, "f"id: relationship.id, source: relationship.source, "f"{metadata_fields}}} AS metadata")else:default_retrieval=(f"RETURN relationship.{self.text_node_property}"" AS text, score, "f"{{text: relationship.{self.text_node_property}, "f"id: relationship.id, source: relationship.source}}"" AS metadata")else:ifkwargs.get("return_embeddings"):ifself.metadata:# Construct the metadata part based on self.metadatametadata_fields=", ".join(f"`{key}`: node.`{key}`"forkeyinself.metadata)default_retrieval=(f"RETURN node.{self.text_node_property} AS text, score, "f"{{text: node.{self.text_node_property}, "f"embedding: node.{self.embedding_node_property}, "f"id: node.id, source: node.source, "f"{metadata_fields}}} AS metadata")else:default_retrieval=(f"RETURN node.{self.text_node_property} AS text, score, "f"{{text: node.{self.text_node_property}, "f"embedding: node.{self.embedding_node_property}, "f"id: node.id, source: node.source}} AS metadata")else:ifself.metadata:# Construct the metadata part based on self.metadatametadata_fields=", ".join(f"`{key}`: node.`{key}`"forkeyinself.metadata)default_retrieval=(f"RETURN node.{self.text_node_property} AS text, score, "f"{{text: node.{self.text_node_property}, "f"id: node.id, source: node.source, "f"{metadata_fields}}} AS metadata")else:default_retrieval=(f"RETURN node.{self.text_node_property} AS text, score, "f"{{text: node.{self.text_node_property}, "f"id: node.id, source: node.source}} AS metadata")retrieval_query=(self.retrieval_queryifself.retrieval_queryelsedefault_retrieval)read_query=index_query+retrieval_queryparameters={"entity_property":self.embedding_node_property,"k":k,"embedding":embedding,"query":kwargs["query"],**params,**filter_params,}ifself._index_type=="NODE":parameters["entity_label"]=self.node_labelelifself._index_type=="RELATIONSHIP":parameters["entity_label"]=self.relation_typeresults=self._query(read_query,params=parameters)ifnotresults:ifnotself.retrieval_query:raiseValueError(f"Make sure that none of the `{self.text_node_property}` "f"properties on nodes with label `{self.node_label}` ""are missing or empty")else:raiseValueError("Inspect the `retrieval_query` and ensure it doesn't ""return None for the `text` column")elifany(result[0]isNoneforresultinresults):ifnotself.retrieval_query:raiseValueError(f"Make sure that none of the `{self.text_node_property}` "f"properties on nodes with label `{self.node_label}` ""are missing or empty")else:raiseValueError("Inspect the `retrieval_query` and ensure it doesn't ""return None for the `text` column")# Check if embeddings are missing when they are expectedifkwargs.get("return_embeddings")andany(result[2]["embedding"]isNoneforresultinresults):ifnotself.retrieval_query:raiseValueError(f"Make sure that none of the `{self.embedding_node_property}` "f"properties on nodes with label `{self.node_label}` ""are missing or empty")else:raiseValueError("Inspect the `retrieval_query` and ensure it doesn't ""return None for the `embedding` metadata column")try:docs=[(Document(# Use the first element for textpage_content=result[0],metadata={k:vfork,vinresult[2].items()ifvisnotNone},# Use the third element for metadata),result[1],# Use the second element for score)forresultinresults]exceptAttributeError:try:sorted_results=sorted(results,key=lambdar:r[2],reverse=True)docs=[(Document(# Use the first element for textpage_content=result[0],metadata={k:vfork,vinresult[1].items()ifvisnotNone},# Use the second element as metadata),result[2],# Use the second element for score)forresultinsorted_results]exceptExceptionase:raiseValueError(f"An error occurred: {e}")returndocs
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,params:Dict[str,Any]={},filter:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. params (Dict[str, Any]): The search params for the index type. Defaults to empty dict. filter (Optional[Dict[str, Any]]): Dictionary of argument(s) to filter on metadata. Defaults to None. Returns: List of Documents most similar to the query and score for each """embedding=self.embedding.embed_query(query)docs=self.similarity_search_with_score_by_vector(embedding=embedding,k=k,query=query,params=params,filter=filter,**kwargs,)returndocs
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[dict]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: search query text. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } Returns: List of Documents selected by maximal marginal relevance. """# Embed the queryquery_embedding=self.embedding.embed_query(query)# Fetch the initial documentsgot_docs=self.similarity_search_with_score_by_vector(embedding=query_embedding,query=query,k=fetch_k,return_embeddings=True,filter=filter,**kwargs,)got_embeddings=[doc.metadata["embedding"]fordoc,_ingot_docs]# Select documents using maximal marginal relevanceselected_indices=maximal_marginal_relevance(np.array(query_embedding),got_embeddings,lambda_mult=lambda_mult,k=k)selected_docs=[got_docs[i][0]foriinselected_indices]# Remove embedding values from metadatafordocinselected_docs:deldoc.metadata["embedding"]returnselected_docs
def_select_relevance_score_fn(self)->Callable[[float],float]:""" The 'correct' relevance function may differ depending on a few things, including: - the distance / similarity metric used by the VectorStore - the scale of your embeddings (OpenAI's are unit normed. Many others are not!) - embedding dimensionality - etc. """ifself.override_relevance_score_fnisnotNone:returnself.override_relevance_score_fn# Default strategy is to rely on distance strategy provided# in vectorstore constructorifself._distance_strategy==DistanceStrategy.COSINE:returnlambdax:xelifself._distance_strategy==DistanceStrategy.EUCLIDEAN_DISTANCE:returnlambdax:xelse:raiseValueError("No supported normalization function"f" for distance_strategy of {self._distance_strategy}.""Consider providing relevance_score_fn to PGVector constructor.")
[docs]defupdate_documents(self,document_id:str,document:Document,)->None:""" This function updates an existing document in the store based on the document_id. Args: document_id: The id of the document to be updated. document: The new Document instance with the updated content. Returns: None """# Ensure the document_id exists in the storeexisting_document=self._query(""" MATCH (n) WHERE n.id = $document_id RETURN n """,params={"document_id":document_id},)ifnotexisting_document:raiseValueError(f"Document with id {document_id} not found in the store.")# Update the document's text contentself._query(""" MATCH (n) WHERE n.id = $document_id SET n.text = $new_content """,params={"document_id":document_id,"new_content":document.page_content},)# Optionally, update any other properties like metadataifdocument.metadata:forkey,valueindocument.metadata.items():self._query(f""" MATCH (n) WHERE n.id = $document_id SET n.{key} = $value """,params={"document_id":document_id,"value":value},)
[docs]defdelete(self,ids:Optional[List[str]]=None,# Make `ids` optional**kwargs:Any,)->Optional[bool]:# Return type matches the superclass signature""" This function deletes an item from the store based on the item_id. Args: ids: A list of IDs of the documents to be deleted. If None, deletes all documents. Returns: Optional[bool]: True if documents were deleted, False otherwise. """ifidsisNone:raiseValueError("You must provide at least one ID to delete.")foridinids:item_id=id# Ensure the document exists in the storeexisting_document=self._query(""" MATCH (n) WHERE n.id = $item_id RETURN n """,params={"item_id":item_id},)ifnotexisting_document:raiseValueError(f"Document with id {item_id} not found in the store.")# Delete the document node from the storeself._query(""" MATCH (n) WHERE n.id = $item_id DELETE n """,params={"item_id":item_id},)returnTrue