Source code for langchain_community.vectorstores.oraclevs
from__future__importannotationsimportarrayimportfunctoolsimporthashlibimportjsonimportloggingimportosimportuuidfromtypingimport(TYPE_CHECKING,Any,Callable,Dict,Iterable,List,Optional,Tuple,Type,TypeVar,Union,cast,)fromnumpy.typingimportNDArrayifTYPE_CHECKING:fromoracledbimportConnectionimportnumpyasnpfromlangchain_core.documentsimportDocumentfromlangchain_core.embeddingsimportEmbeddingsfromlangchain_core.vectorstoresimportVectorStorefromlangchain_community.vectorstores.utilsimport(DistanceStrategy,maximal_marginal_relevance,)logger=logging.getLogger(__name__)log_level=os.getenv("LOG_LEVEL","ERROR").upper()logging.basicConfig(level=getattr(logging,log_level),format="%(asctime)s - %(levelname)s - %(message)s",)# Define a type variable that can be any kind of functionT=TypeVar("T",bound=Callable[...,Any])def_get_connection(client:Any)->Connection|None:# Dynamically import oracledb and the required classestry:importoracledbexceptImportErrorase:raiseImportError("Unable to import oracledb, please install with `pip install -U oracledb`.")frome# check if ConnectionPool existsconnection_pool_class=getattr(oracledb,"ConnectionPool",None)ifisinstance(client,oracledb.Connection):returnclientelifconnection_pool_classandisinstance(client,connection_pool_class):returnclient.acquire()else:valid_types="oracledb.Connection"ifconnection_pool_class:valid_types+=" or oracledb.ConnectionPool"raiseTypeError(f"Expected client of type {valid_types}, got {type(client).__name__}")def_handle_exceptions(func:T)->T:@functools.wraps(func)defwrapper(*args:Any,**kwargs:Any)->Any:try:returnfunc(*args,**kwargs)exceptRuntimeErrorasdb_err:# Handle a known type of error (e.g., DB-related) specificallylogger.exception("DB-related error occurred.")raiseRuntimeError("Failed due to a DB issue: {}".format(db_err))fromdb_errexceptValueErrorasval_err:# Handle another known type of error specificallylogger.exception("Validation error.")raiseValueError("Validation failed: {}".format(val_err))fromval_errexceptExceptionase:# Generic handler for all other exceptionslogger.exception("An unexpected error occurred: {}".format(e))raiseRuntimeError("Unexpected error: {}".format(e))fromereturncast(T,wrapper)def_table_exists(connection:Connection,table_name:str)->bool:try:importoracledbexceptImportErrorase:raiseImportError("Unable to import oracledb, please install with `pip install -U oracledb`.")frometry:withconnection.cursor()ascursor:cursor.execute(f"SELECT COUNT(*) FROM {table_name}")returnTrueexceptoracledb.DatabaseErrorasex:err_obj=ex.argsiferr_obj[0].code==942:returnFalseraisedef_compare_version(version:str,target_version:str)->bool:# Split both version strings into partsversion_parts=[int(part)forpartinversion.split(".")]target_parts=[int(part)forpartintarget_version.split(".")]# Compare each partforv,tinzip(version_parts,target_parts):ifv<t:returnTrue# Current version is lesselifv>t:returnFalse# Current version is greater# If all parts equal so far, check if version has fewer parts than target_versionreturnlen(version_parts)<len(target_parts)@_handle_exceptionsdef_index_exists(connection:Connection,index_name:str)->bool:# Check if the index existsquery=""" SELECT index_name FROM all_indexes WHERE upper(index_name) = upper(:idx_name) """withconnection.cursor()ascursor:# Execute the querycursor.execute(query,idx_name=index_name.upper())result=cursor.fetchone()# Check if the index existsreturnresultisnotNonedef_get_distance_function(distance_strategy:DistanceStrategy)->str:# Dictionary to map distance strategies to their corresponding function# namesdistance_strategy2function={DistanceStrategy.EUCLIDEAN_DISTANCE:"EUCLIDEAN",DistanceStrategy.DOT_PRODUCT:"DOT",DistanceStrategy.COSINE:"COSINE",}# Attempt to return the corresponding distance functionifdistance_strategyindistance_strategy2function:returndistance_strategy2function[distance_strategy]# If it's an unsupported distance strategy, raise an errorraiseValueError(f"Unsupported distance strategy: {distance_strategy}")def_get_index_name(base_name:str)->str:unique_id=str(uuid.uuid4()).replace("-","")returnf"{base_name}_{unique_id}"@_handle_exceptionsdef_create_table(connection:Connection,table_name:str,embedding_dim:int)->None:cols_dict={"id":"RAW(16) DEFAULT SYS_GUID() PRIMARY KEY","text":"CLOB","metadata":"JSON","embedding":f"vector({embedding_dim}, FLOAT32)",}ifnot_table_exists(connection,table_name):withconnection.cursor()ascursor:ddl_body=", ".join(f"{col_name}{col_type}"forcol_name,col_typeincols_dict.items())ddl=f"CREATE TABLE {table_name} ({ddl_body})"cursor.execute(ddl)logger.info("Table created successfully...")else:logger.info("Table already exists...")
[docs]@_handle_exceptionsdefcreate_index(client:Any,vector_store:OracleVS,params:Optional[dict[str,Any]]=None,)->None:connection=_get_connection(client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")ifparams:ifparams["idx_type"]=="HNSW":_create_hnsw_index(connection,vector_store.table_name,vector_store.distance_strategy,params,)elifparams["idx_type"]=="IVF":_create_ivf_index(connection,vector_store.table_name,vector_store.distance_strategy,params,)else:_create_hnsw_index(connection,vector_store.table_name,vector_store.distance_strategy,params,)else:_create_hnsw_index(connection,vector_store.table_name,vector_store.distance_strategy,params)return
@_handle_exceptionsdef_create_hnsw_index(connection:Connection,table_name:str,distance_strategy:DistanceStrategy,params:Optional[dict[str,Any]]=None,)->None:defaults={"idx_name":"HNSW","idx_type":"HNSW","neighbors":32,"efConstruction":200,"accuracy":90,"parallel":8,}ifparams:config=params.copy()# Ensure compulsory parts are includedforcompulsory_keyin["idx_name","parallel"]:ifcompulsory_keynotinconfig:ifcompulsory_key=="idx_name":config[compulsory_key]=_get_index_name(str(defaults[compulsory_key]))else:config[compulsory_key]=defaults[compulsory_key]# Validate keys in config against defaultsforkeyinconfig:ifkeynotindefaults:raiseValueError(f"Invalid parameter: {key}")else:config=defaults# Base SQL statementidx_name=config["idx_name"]base_sql=(f"create vector index {idx_name} on {table_name}(embedding) "f"ORGANIZATION INMEMORY NEIGHBOR GRAPH")# Optional parts depending on parametersaccuracy_part=" WITH TARGET ACCURACY {accuracy}"if("accuracy"inconfig)else""distance_part=f" DISTANCE {_get_distance_function(distance_strategy)}"parameters_part=""if"neighbors"inconfigand"efConstruction"inconfig:parameters_part=(" parameters (type {idx_type}, neighbors {""neighbors}, efConstruction {efConstruction})")elif"neighbors"inconfigand"efConstruction"notinconfig:config["efConstruction"]=defaults["efConstruction"]parameters_part=(" parameters (type {idx_type}, neighbors {""neighbors}, efConstruction {efConstruction})")elif"neighbors"notinconfigand"efConstruction"inconfig:config["neighbors"]=defaults["neighbors"]parameters_part=(" parameters (type {idx_type}, neighbors {""neighbors}, efConstruction {efConstruction})")# Always included part for parallelparallel_part=" parallel {parallel}"# Combine all partsddl_assembly=(base_sql+accuracy_part+distance_part+parameters_part+parallel_part)# Format the SQL with values from the params dictionaryddl=ddl_assembly.format(**config)# Check if the index existsifnot_index_exists(connection,config["idx_name"]):withconnection.cursor()ascursor:cursor.execute(ddl)logger.info("Index created successfully...")else:logger.info("Index already exists...")@_handle_exceptionsdef_create_ivf_index(connection:Connection,table_name:str,distance_strategy:DistanceStrategy,params:Optional[dict[str,Any]]=None,)->None:# Default configurationdefaults={"idx_name":"IVF","idx_type":"IVF","neighbor_part":32,"accuracy":90,"parallel":8,}ifparams:config=params.copy()# Ensure compulsory parts are includedforcompulsory_keyin["idx_name","parallel"]:ifcompulsory_keynotinconfig:ifcompulsory_key=="idx_name":config[compulsory_key]=_get_index_name(str(defaults[compulsory_key]))else:config[compulsory_key]=defaults[compulsory_key]# Validate keys in config against defaultsforkeyinconfig:ifkeynotindefaults:raiseValueError(f"Invalid parameter: {key}")else:config=defaults# Base SQL statementidx_name=config["idx_name"]base_sql=(f"CREATE VECTOR INDEX {idx_name} ON {table_name}(embedding) "f"ORGANIZATION NEIGHBOR PARTITIONS")# Optional parts depending on parametersaccuracy_part=" WITH TARGET ACCURACY {accuracy}"if("accuracy"inconfig)else""distance_part=f" DISTANCE {_get_distance_function(distance_strategy)}"parameters_part=""if"idx_type"inconfigand"neighbor_part"inconfig:parameters_part=(f" PARAMETERS (type {config['idx_type']}, neighbor"f" partitions {config['neighbor_part']})")# Always included part for parallelparallel_part=f" PARALLEL {config['parallel']}"# Combine all partsddl_assembly=(base_sql+accuracy_part+distance_part+parameters_part+parallel_part)# Format the SQL with values from the params dictionaryddl=ddl_assembly.format(**config)# Check if the index existsifnot_index_exists(connection,config["idx_name"]):withconnection.cursor()ascursor:cursor.execute(ddl)logger.info("Index created successfully...")else:logger.info("Index already exists...")
[docs]@_handle_exceptionsdefdrop_table_purge(client:Any,table_name:str)->None:"""Drop a table and purge it from the database. Args: client: The OracleDB connection object. table_name: The name of the table to drop. Raises: RuntimeError: If an error occurs while dropping the table. """connection=_get_connection(client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")if_table_exists(connection,table_name):withconnection.cursor()ascursor:ddl=f"DROP TABLE {table_name} PURGE"cursor.execute(ddl)logger.info("Table dropped successfully...")else:logger.info("Table not found...")return
[docs]@_handle_exceptionsdefdrop_index_if_exists(client:Any,index_name:str)->None:"""Drop an index if it exists. Args: client: The OracleDB connection object. index_name: The name of the index to drop. Raises: RuntimeError: If an error occurs while dropping the index. """connection=_get_connection(client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")if_index_exists(connection,index_name):drop_query=f"DROP INDEX {index_name}"withconnection.cursor()ascursor:cursor.execute(drop_query)logger.info(f"Index {index_name} has been dropped.")else:logger.exception(f"Index {index_name} does not exist.")return
[docs]classOracleVS(VectorStore):"""`OracleVS` vector store. To use, you should have both: - the ``oracledb`` python package installed - a connection string associated with a OracleDBCluster having deployed an Search index Example: .. code-block:: python from langchain.vectorstores import OracleVS from langchain.embeddings.openai import OpenAIEmbeddings import oracledb with oracledb.connect(user = user, passwd = pwd, dsn = dsn) as connection: print ("Database version:", connection.version) embeddings = OpenAIEmbeddings() query = "" vectors = OracleVS(connection, table_name, embeddings, query) """
[docs]def__init__(self,client:Any,embedding_function:Union[Callable[[str],List[float]],Embeddings,],table_name:str,distance_strategy:DistanceStrategy=DistanceStrategy.EUCLIDEAN_DISTANCE,query:Optional[str]="What is a Oracle database",params:Optional[Dict[str,Any]]=None,):try:importoracledbexceptImportErrorase:raiseImportError("Unable to import oracledb, please install with ""`pip install -U oracledb`.")fromeself.insert_mode="array"connection=_get_connection(client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")ifhasattr(connection,"thin")andconnection.thin:iforacledb.__version__=="2.1.0":raiseException("Oracle DB python thin client driver version 2.1.0 not supported")elif_compare_version(oracledb.__version__,"2.2.0"):self.insert_mode="clob"else:self.insert_mode="array"else:if(_compare_version(oracledb.__version__,"2.1.0"))and(not(_compare_version(".".join(map(str,oracledb.clientversion())),"23.4"))):raiseException("Oracle DB python thick client driver version earlier than ""2.1.0 not supported with client libraries greater than ""equal to 23.4")if_compare_version(".".join(map(str,oracledb.clientversion())),"23.4"):self.insert_mode="clob"else:self.insert_mode="array"if_compare_version(oracledb.__version__,"2.1.0"):self.insert_mode="clob"try:"""Initialize with oracledb client."""self.client=client"""Initialize with necessary components."""ifnotisinstance(embedding_function,Embeddings):logger.warning("`embedding_function` is expected to be an Embeddings ""object, support ""for passing in a function will soon be removed.")self.embedding_function=embedding_functionself.query=queryembedding_dim=self.get_embedding_dimension()self.table_name=table_nameself.distance_strategy=distance_strategyself.params=params_create_table(connection,table_name,embedding_dim)exceptoracledb.DatabaseErrorasdb_err:logger.exception(f"Database error occurred while create table: {db_err}")raiseRuntimeError("Failed to create table due to a database error.")fromdb_errexceptValueErrorasval_err:logger.exception(f"Validation error: {val_err}")raiseRuntimeError("Failed to create table due to a validation error.")fromval_errexceptExceptionasex:logger.exception("An unexpected error occurred while creating the index.")raiseRuntimeError("Failed to create table due to an unexpected error.")fromex
@propertydefembeddings(self)->Optional[Embeddings]:""" A property that returns an Embeddings instance embedding_function is an instance of Embeddings, otherwise returns None. Returns: Optional[Embeddings]: The embedding function if it's an instance of Embeddings, otherwise None. """return(self.embedding_functionifisinstance(self.embedding_function,Embeddings)elseNone)
[docs]defget_embedding_dimension(self)->int:# Embed the single document by wrapping it in a listembedded_document=self._embed_documents([self.queryifself.queryisnotNoneelse""])# Get the first (and only) embedding's dimensionreturnlen(embedded_document[0])
def_embed_documents(self,texts:List[str])->List[List[float]]:ifisinstance(self.embedding_function,Embeddings):returnself.embedding_function.embed_documents(texts)elifcallable(self.embedding_function):return[self.embedding_function(text)fortextintexts]else:raiseTypeError("The embedding_function is neither Embeddings nor callable.")def_embed_query(self,text:str)->List[float]:ifisinstance(self.embedding_function,Embeddings):returnself.embedding_function.embed_query(text)else:returnself.embedding_function(text)
[docs]@_handle_exceptionsdefadd_texts(self,texts:Iterable[str],metadatas:Optional[List[Dict[Any,Any]]]=None,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Add more texts to the vectorstore index. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids for the texts that are being added to the vector store. kwargs: vectorstore specific parameters """texts=list(texts)ifids:# If ids are provided, hash them to maintain consistencyprocessed_ids=[hashlib.sha256(_id.encode()).hexdigest()[:16].upper()for_idinids]elifmetadatasandall("id"inmetadataformetadatainmetadatas):# If no ids are provided but metadatas with ids are, generate# ids from metadatasprocessed_ids=[hashlib.sha256(metadata["id"].encode()).hexdigest()[:16].upper()formetadatainmetadatas]else:# Generate new ids if none are providedgenerated_ids=[str(uuid.uuid4())for_intexts]# uuid4 is more standard for random UUIDsprocessed_ids=[hashlib.sha256(_id.encode()).hexdigest()[:16].upper()for_idingenerated_ids]embeddings=self._embed_documents(texts)ifnotmetadatas:metadatas=[{}for_intexts]docs:List[Tuple[Any,Any,Any,Any]]ifself.insert_mode=="clob":docs=[(id_,json.dumps(embedding),json.dumps(metadata),text)forid_,embedding,metadata,textinzip(processed_ids,embeddings,metadatas,texts)]else:docs=[(id_,array.array("f",embedding),json.dumps(metadata),text)forid_,embedding,metadata,textinzip(processed_ids,embeddings,metadatas,texts)]connection=_get_connection(self.client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")withconnection.cursor()ascursor:cursor.executemany(f"INSERT INTO {self.table_name} (id, embedding, metadata, "f"text) VALUES (:1, :2, :3, :4)",docs,)connection.commit()returnprocessed_ids
[docs]defsimilarity_search(self,query:str,k:int=4,filter:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to query."""embedding:List[float]=[]ifisinstance(self.embedding_function,Embeddings):embedding=self.embedding_function.embed_query(query)documents=self.similarity_search_by_vector(embedding=embedding,k=k,filter=filter,**kwargs)returndocuments
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,filter:Optional[dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Return docs most similar to query."""embedding:List[float]=[]ifisinstance(self.embedding_function,Embeddings):embedding=self.embedding_function.embed_query(query)docs_and_scores=self.similarity_search_by_vector_with_relevance_scores(embedding=embedding,k=k,filter=filter,**kwargs)returndocs_and_scores
@_handle_exceptionsdef_get_clob_value(self,result:Any)->str:try:importoracledbexceptImportErrorase:raiseImportError("Unable to import oracledb, please install with ""`pip install -U oracledb`.")fromeclob_value=""ifresult:ifisinstance(result,oracledb.LOB):raw_data=result.read()ifisinstance(raw_data,bytes):clob_value=raw_data.decode("utf-8")# Specify the correct encodingelse:clob_value=raw_dataelifisinstance(result,str):clob_value=resultelse:raiseException("Unexpected type:",type(result))returnclob_value
[docs]@_handle_exceptionsdefsimilarity_search_by_vector_with_relevance_scores(self,embedding:List[float],k:int=4,filter:Optional[dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float]]:docs_and_scores=[]embedding_arr:Anyifself.insert_mode=="clob":embedding_arr=json.dumps(embedding)else:embedding_arr=array.array("f",embedding)query=f""" SELECT id, text, metadata, vector_distance(embedding, :embedding,{_get_distance_function(self.distance_strategy)}) as distance FROM {self.table_name} ORDER BY distance FETCH APPROX FIRST {k} ROWS ONLY """# Execute the queryconnection=_get_connection(self.client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")withconnection.cursor()ascursor:cursor.execute(query,embedding=embedding_arr)results=cursor.fetchall()# Filter results if filter is providedforresultinresults:metadata=dict(result[2])ifisinstance(result[2],dict)else{}# Apply filtering based on the 'filter' dictionaryiffilter:ifall(metadata.get(key)invalueforkey,valueinfilter.items()):doc=Document(page_content=(self._get_clob_value(result[1])ifresult[1]isnotNoneelse""),metadata=metadata,)distance=result[3]docs_and_scores.append((doc,distance))else:doc=Document(page_content=(self._get_clob_value(result[1])ifresult[1]isnotNoneelse""),metadata=metadata,)distance=result[3]docs_and_scores.append((doc,distance))returndocs_and_scores
[docs]@_handle_exceptionsdefsimilarity_search_by_vector_returning_embeddings(self,embedding:List[float],k:int,filter:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Tuple[Document,float,NDArray[np.float32]]]:embedding_arr:Anyifself.insert_mode=="clob":embedding_arr=json.dumps(embedding)else:embedding_arr=array.array("f",embedding)documents=[]query=f""" SELECT id, text, metadata, vector_distance(embedding, :embedding, {_get_distance_function(self.distance_strategy)}) as distance, embedding FROM {self.table_name} ORDER BY distance FETCH APPROX FIRST {k} ROWS ONLY """# Execute the queryconnection=_get_connection(self.client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")withconnection.cursor()ascursor:cursor.execute(query,embedding=embedding_arr)results=cursor.fetchall()forresultinresults:page_content_str=self._get_clob_value(result[1])metadata=result[2]ifisinstance(result[2],dict)else{}# Apply filter if provided and matches; otherwise, add all# documentsifnotfilterorall(metadata.get(key)invalueforkey,valueinfilter.items()):document=Document(page_content=page_content_str,metadata=metadata)distance=result[3]# Assuming result[4] is already in the correct format;# adjust if necessarycurrent_embedding=(np.array(result[4],dtype=np.float32)ifresult[4]elsenp.empty(0,dtype=np.float32))documents.append((document,distance,current_embedding))returndocuments# type: ignore
[docs]@_handle_exceptionsdefmax_marginal_relevance_search_with_score_by_vector(self,embedding:List[float],*,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[Dict[str,Any]]=None,)->List[Tuple[Document,float]]:"""Return docs and their similarity scores selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: self: An instance of the class embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch before filtering to pass to MMR algorithm. filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. Returns: List of Documents and similarity scores selected by maximal marginal relevance and score for each. """# Fetch documents and their scoresdocs_scores_embeddings=self.similarity_search_by_vector_returning_embeddings(embedding,fetch_k,filter=filter)# Assuming documents_with_scores is a list of tuples (Document, score)# If you need to split documents and scores for processing (e.g.,# for MMR calculation)documents,scores,embeddings=(zip(*docs_scores_embeddings)ifdocs_scores_embeddingselse([],[],[]))# Assume maximal_marginal_relevance method accepts embeddings and# scores, and returns indices of selected docsmmr_selected_indices=maximal_marginal_relevance(np.array(embedding,dtype=np.float32),list(embeddings),k=k,lambda_mult=lambda_mult,)# Filter documents based on MMR-selected indices and map scoresmmr_selected_documents_with_scores=[(documents[i],scores[i])foriinmmr_selected_indices]returnmmr_selected_documents_with_scores
[docs]@_handle_exceptionsdefmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: self: An instance of the class embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Optional[Dict[str, Any]] **kwargs: Any Returns: List of Documents selected by maximal marginal relevance. """docs_and_scores=self.max_marginal_relevance_search_with_score_by_vector(embedding,k=k,fetch_k=fetch_k,lambda_mult=lambda_mult,filter=filter)return[docfordoc,_indocs_and_scores]
[docs]@_handle_exceptionsdefmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[Dict[str,Any]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: self: An instance of the class query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Optional[Dict[str, Any]] **kwargs Returns: List of Documents selected by maximal marginal relevance. `max_marginal_relevance_search` requires that `query` returns matched embeddings alongside the match documents. """embedding=self._embed_query(query)documents=self.max_marginal_relevance_search_by_vector(embedding,k=k,fetch_k=fetch_k,lambda_mult=lambda_mult,filter=filter,**kwargs,)returndocuments
[docs]@_handle_exceptionsdefdelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->None:"""Delete by vector IDs. Args: self: An instance of the class ids: List of ids to delete. **kwargs """ifidsisNone:raiseValueError("No ids provided to delete.")# Compute SHA-256 hashes of the ids and truncate themhashed_ids=[hashlib.sha256(_id.encode()).hexdigest()[:16].upper()for_idinids]# Constructing the SQL statement with individual placeholdersplaceholders=", ".join([":id"+str(i+1)foriinrange(len(hashed_ids))])ddl=f"DELETE FROM {self.table_name} WHERE id IN ({placeholders})"# Preparing bind variablesbind_vars={f"id{i}":hashed_idfori,hashed_idinenumerate(hashed_ids,start=1)}connection=_get_connection(self.client)ifconnectionisNone:raiseValueError("Failed to acquire a connection.")withconnection.cursor()ascursor:cursor.execute(ddl,bind_vars)connection.commit()
[docs]@classmethod@_handle_exceptionsdeffrom_texts(cls:Type[OracleVS],texts:Iterable[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,**kwargs:Any,)->OracleVS:client:Any=kwargs.get("client",None)ifclientisNone:raiseValueError("client parameter is required...")params=kwargs.get("params",{})table_name=str(kwargs.get("table_name","langchain"))distance_strategy=cast(DistanceStrategy,kwargs.get("distance_strategy",None))ifnotisinstance(distance_strategy,DistanceStrategy):raiseTypeError(f"Expected DistanceStrategy got {type(distance_strategy).__name__} ")query=kwargs.get("query","What is a Oracle database")drop_table_purge(client,table_name)vss=cls(client=client,embedding_function=embedding,table_name=table_name,distance_strategy=distance_strategy,query=query,params=params,)vss.add_texts(texts=list(texts),metadatas=metadatas)returnvss