Source code for langchain_community.vectorstores.bigquery_vector_search
"""Vector Store in Google Cloud BigQuery."""from__future__importannotationsimportasyncioimportjsonimportloggingimportsysimportuuidfromdatetimeimportdatetimefromfunctoolsimportpartialfromthreadingimportLock,ThreadfromtypingimportAny,Callable,Dict,List,Optional,Tuple,Typeimportnumpyasnpfromlangchain_core._api.deprecationimportdeprecatedfromlangchain_core.documentsimportDocumentfromlangchain_core.embeddingsimportEmbeddingsfromlangchain_core.vectorstoresimportVectorStorefromlangchain_community.utils.googleimportget_client_infofromlangchain_community.vectorstores.utilsimport(DistanceStrategy,maximal_marginal_relevance,)DEFAULT_DISTANCE_STRATEGY=DistanceStrategy.EUCLIDEAN_DISTANCEDEFAULT_DOC_ID_COLUMN_NAME="doc_id"# document idDEFAULT_TEXT_EMBEDDING_COLUMN_NAME="text_embedding"# embeddings vectorsDEFAULT_METADATA_COLUMN_NAME="metadata"# document metadataDEFAULT_CONTENT_COLUMN_NAME="content"# text content, do not renameDEFAULT_TOP_K=4# default number of documents returned from similarity search_MIN_INDEX_ROWS=5000# minimal number of rows for creating an index_INDEX_CHECK_PERIOD_SECONDS=60# Do not check for index more often that this._vector_table_lock=Lock()# process-wide BigQueryVectorSearch table lock
[docs]@deprecated(since="0.0.33",removal="1.0",alternative_import="langchain_google_community.BigQueryVectorSearch",)classBigQueryVectorSearch(VectorStore):"""Google Cloud BigQuery vector store. To use, you need the following packages installed: google-cloud-bigquery """
[docs]def__init__(self,embedding:Embeddings,project_id:str,dataset_name:str,table_name:str,location:str="US",content_field:str=DEFAULT_CONTENT_COLUMN_NAME,metadata_field:str=DEFAULT_METADATA_COLUMN_NAME,text_embedding_field:str=DEFAULT_TEXT_EMBEDDING_COLUMN_NAME,doc_id_field:str=DEFAULT_DOC_ID_COLUMN_NAME,distance_strategy:DistanceStrategy=DEFAULT_DISTANCE_STRATEGY,credentials:Optional[Any]=None,):"""Constructor for BigQueryVectorSearch. Args: embedding (Embeddings): Text Embedding model to use. project_id (str): GCP project. dataset_name (str): BigQuery dataset to store documents and embeddings. table_name (str): BigQuery table name. location (str, optional): BigQuery region. Defaults to `US`(multi-region). content_field (str): Specifies the column to store the content. Defaults to `content`. metadata_field (str): Specifies the column to store the metadata. Defaults to `metadata`. text_embedding_field (str): Specifies the column to store the embeddings vector. Defaults to `text_embedding`. doc_id_field (str): Specifies the column to store the document id. Defaults to `doc_id`. distance_strategy (DistanceStrategy, optional): Determines the strategy employed for calculating the distance between vectors in the embedding space. Defaults to EUCLIDEAN_DISTANCE. Available options are: - COSINE: Measures the similarity between two vectors of an inner product space. - EUCLIDEAN_DISTANCE: Computes the Euclidean distance between two vectors. This metric considers the geometric distance in the vector space, and might be more suitable for embeddings that rely on spatial relationships. This is the default behavior credentials (Credentials, optional): Custom Google Cloud credentials to use. Defaults to None. """try:fromgoogle.cloudimportbigqueryclient_info=get_client_info(module="bigquery-vector-search")self.bq_client=bigquery.Client(project=project_id,location=location,credentials=credentials,client_info=client_info,)exceptModuleNotFoundError:raiseImportError("Please, install or upgrade the google-cloud-bigquery library: ""pip install google-cloud-bigquery")self._logger=logging.getLogger(__name__)self._creating_index=Falseself._have_index=Falseself.embedding_model=embeddingself.project_id=project_idself.dataset_name=dataset_nameself.table_name=table_nameself.location=locationself.content_field=content_fieldself.metadata_field=metadata_fieldself.text_embedding_field=text_embedding_fieldself.doc_id_field=doc_id_fieldself.distance_strategy=distance_strategyself._full_table_id=f"{self.project_id}.{self.dataset_name}.{self.table_name}"self._logger.debug("Using table `%s`",self.full_table_id)with_vector_table_lock:self.vectors_table=self._initialize_table()self._last_index_check=datetime.minself._initialize_vector_index()
def_initialize_table(self)->Any:"""Validates or creates the BigQuery table."""fromgoogle.cloudimportbigquerytable_ref=bigquery.TableReference.from_string(self._full_table_id)table=self.bq_client.create_table(table_ref,exists_ok=True)changed_schema=Falseschema=table.schema.copy()columns={c.name:cforcinschema}ifself.doc_id_fieldnotincolumns:changed_schema=Trueschema.append(bigquery.SchemaField(name=self.doc_id_field,field_type="STRING"))elif(columns[self.doc_id_field].field_type!="STRING"orcolumns[self.doc_id_field].mode=="REPEATED"):raiseValueError(f"Column {self.doc_id_field} must be of STRING type")ifself.metadata_fieldnotincolumns:changed_schema=Trueschema.append(bigquery.SchemaField(name=self.metadata_field,field_type="JSON"))elif(columns[self.metadata_field].field_typenotin["JSON","STRING"]orcolumns[self.metadata_field].mode=="REPEATED"):raiseValueError(f"Column {self.metadata_field} must be of STRING or JSON type")ifself.content_fieldnotincolumns:changed_schema=Trueschema.append(bigquery.SchemaField(name=self.content_field,field_type="STRING"))elif(columns[self.content_field].field_type!="STRING"orcolumns[self.content_field].mode=="REPEATED"):raiseValueError(f"Column {self.content_field} must be of STRING type")ifself.text_embedding_fieldnotincolumns:changed_schema=Trueschema.append(bigquery.SchemaField(name=self.text_embedding_field,field_type="FLOAT64",mode="REPEATED",))elif(columns[self.text_embedding_field].field_typenotin("FLOAT","FLOAT64")orcolumns[self.text_embedding_field].mode!="REPEATED"):raiseValueError(f"Column {self.text_embedding_field} must be of ARRAY<FLOAT64> type")ifchanged_schema:self._logger.debug("Updated table `%s` schema.",self.full_table_id)table.schema=schematable=self.bq_client.update_table(table,fields=["schema"])returntabledef_initialize_vector_index(self)->Any:""" A vector index in BigQuery table enables efficient approximate vector search. """fromgoogle.cloudimportbigqueryifself._have_indexorself._creating_index:# Already have an index or in the process of creating one.returntable=self.bq_client.get_table(self.vectors_table)if(table.num_rowsor0)<_MIN_INDEX_ROWS:# Not enough rows to create index.self._logger.debug("Not enough rows to create a vector index.")returnif(datetime.utcnow()-self._last_index_check).total_seconds()<_INDEX_CHECK_PERIOD_SECONDS:returnwith_vector_table_lock:ifself._creating_indexorself._have_index:returnself._last_index_check=datetime.utcnow()# Check if index exists, create if necessarycheck_query=(f"SELECT 1 FROM `{self.project_id}.{self.dataset_name}"".INFORMATION_SCHEMA.VECTOR_INDEXES` WHERE"f" table_name = '{self.table_name}'")job=self.bq_client.query(check_query,api_method=bigquery.enums.QueryApiMethod.QUERY)ifjob.result().total_rows==0:# Need to create an index. Make it in a separate thread.self._create_index_in_background()else:self._logger.debug("Vector index already exists.")self._have_index=Truedef_create_index_in_background(self):# type: ignore[no-untyped-def]ifself._have_indexorself._creating_index:# Already have an index or in the process of creating one.returnself._creating_index=Trueself._logger.debug("Trying to create a vector index.")thread=Thread(target=self._create_index,daemon=True)thread.start()def_create_index(self):# type: ignore[no-untyped-def]fromgoogle.api_core.exceptionsimportClientErrortable=self.bq_client.get_table(self.vectors_table)if(table.num_rowsor0)<_MIN_INDEX_ROWS:# Not enough rows to create index.returnifself.distance_strategy==DistanceStrategy.EUCLIDEAN_DISTANCE:distance_type="EUCLIDEAN"elifself.distance_strategy==DistanceStrategy.COSINE:distance_type="COSINE"# Default to EUCLIDEAN_DISTANCEelse:distance_type="EUCLIDEAN"index_name=f"{self.table_name}_langchain_index"try:sql=f""" CREATE VECTOR INDEX IF NOT EXISTS `{index_name}` ON `{self.full_table_id}`({self.text_embedding_field}) OPTIONS(distance_type="{distance_type}", index_type="IVF") """self.bq_client.query(sql).result()self._have_index=TrueexceptClientErrorasex:self._logger.debug("Vector index creation failed (%s).",ex.args[0])finally:self._creating_index=Falsedef_persist(self,data:Dict[str,Any])->None:"""Saves documents and embeddings to BigQuery."""fromgoogle.cloudimportbigquerydata_len=len(data[list(data.keys())[0]])ifdata_len==0:returnlist_of_dicts=[dict(zip(data,t))fortinzip(*data.values())]job_config=bigquery.LoadJobConfig()job_config.schema=self.vectors_table.schemajob_config.schema_update_options=(bigquery.SchemaUpdateOption.ALLOW_FIELD_ADDITION)job_config.write_disposition=bigquery.WriteDisposition.WRITE_APPENDjob=self.bq_client.load_table_from_json(list_of_dicts,self.vectors_table,job_config=job_config)job.result()@propertydefembeddings(self)->Optional[Embeddings]:returnself.embedding_model@propertydeffull_table_id(self)->str:returnself._full_table_id
[docs]defadd_texts(# type: ignore[override]self,texts:List[str],metadatas:Optional[List[dict]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: List of strings to add to the vectorstore. metadatas: Optional list of metadata associated with the texts. Returns: List of ids from adding the texts into the vectorstore. """embs=self.embedding_model.embed_documents(texts)returnself.add_texts_with_embeddings(texts,embs,metadatas,**kwargs)
[docs]defadd_texts_with_embeddings(self,texts:List[str],embs:List[List[float]],metadatas:Optional[List[dict]]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: List of strings to add to the vectorstore. embs: List of lists of floats with text embeddings for texts. metadatas: Optional list of metadata associated with the texts. Returns: List of ids from adding the texts into the vectorstore. """ids=[uuid.uuid4().hexfor_intexts]values_dict:Dict[str,List[Any]]={self.content_field:texts,self.doc_id_field:ids,}ifnotmetadatas:metadatas=[]len_diff=len(ids)-len(metadatas)add_meta=[Nonefor_inrange(0,len_diff)]metadatas=[mifmisnotNoneelse{}forminmetadatas+add_meta]values_dict[self.metadata_field]=metadatasvalues_dict[self.text_embedding_field]=embsself._persist(values_dict)returnids
[docs]defget_documents(self,ids:Optional[List[str]]=None,filter:Optional[Dict[str,Any]]=None)->List[Document]:"""Search documents by their ids or metadata values. Args: ids: List of ids of documents to retrieve from the vectorstore. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } Returns: List of ids from adding the texts into the vectorstore. """ifidsandlen(ids)>0:fromgoogle.cloudimportbigqueryjob_config=bigquery.QueryJobConfig(query_parameters=[bigquery.ArrayQueryParameter("ids","STRING",ids),])id_expr=f"{self.doc_id_field} IN UNNEST(@ids)"else:job_config=Noneid_expr="TRUE"iffilter:filter_expressions=[]foriinfilter.items():ifisinstance(i[1],float):expr=("ABS(CAST(JSON_VALUE("f"`{self.metadata_field}`,'$.{i[0]}') "f"AS FLOAT64) - {i[1]}) "f"<= {sys.float_info.epsilon}")else:val=str(i[1]).replace('"','\\"')expr=f"JSON_VALUE(`{self.metadata_field}`,'$.{i[0]}') = \"{val}\""filter_expressions.append(expr)filter_expression_str=" AND ".join(filter_expressions)where_filter_expr=f" AND ({filter_expression_str})"else:where_filter_expr=""job=self.bq_client.query(f""" SELECT * FROM `{self.full_table_id}` WHERE {id_expr}{where_filter_expr} """,job_config=job_config,)docs:List[Document]=[]forrowinjob:metadata=Noneifself.metadata_field:metadata=row[self.metadata_field]ifmetadata:ifnotisinstance(metadata,dict):metadata=json.loads(metadata)else:metadata={}metadata["__id"]=row[self.doc_id_field]doc=Document(page_content=row[self.content_field],metadata=metadata)docs.append(doc)returndocs
[docs]defdelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->Optional[bool]:"""Delete by vector ID or other criteria. Args: ids: List of ids to delete. **kwargs: Other keyword arguments that subclasses might use. Returns: Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ifnotidsorlen(ids)==0:returnTruefromgoogle.cloudimportbigqueryjob_config=bigquery.QueryJobConfig(query_parameters=[bigquery.ArrayQueryParameter("ids","STRING",ids),])self.bq_client.query(f""" DELETE FROM `{self.full_table_id}` WHERE {self.doc_id_field} IN UNNEST(@ids) """,job_config=job_config,).result()returnTrue
[docs]asyncdefadelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->Optional[bool]:"""Delete by vector ID or other criteria. Args: ids: List of ids to delete. **kwargs: Other keyword arguments that subclasses might use. Returns: Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """returnawaitasyncio.get_running_loop().run_in_executor(None,partial(self.delete,**kwargs),ids)
def_search_with_score_and_embeddings_by_vector(self,embedding:List[float],k:int=DEFAULT_TOP_K,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,)->List[Tuple[Document,List[float],float]]:fromgoogle.cloudimportbigquery# Create an index if no index exists.ifnotself._have_indexandnotself._creating_index:self._initialize_vector_index()# Prepare filterfilter_expr="TRUE"iffilter:filter_expressions=[]foriinfilter.items():ifisinstance(i[1],float):expr=("ABS(CAST(JSON_VALUE("f"base.`{self.metadata_field}`,'$.{i[0]}') "f"AS FLOAT64) - {i[1]}) "f"<= {sys.float_info.epsilon}")else:val=str(i[1]).replace('"','\\"')expr=(f"JSON_VALUE(base.`{self.metadata_field}`,'$.{i[0]}')"f' = "{val}"')filter_expressions.append(expr)filter_expression_str=" AND ".join(filter_expressions)filter_expr+=f" AND ({filter_expression_str})"# Configure and run a query job.job_config=bigquery.QueryJobConfig(query_parameters=[bigquery.ArrayQueryParameter("v","FLOAT64",embedding),],use_query_cache=False,priority=bigquery.QueryPriority.BATCH,)ifself.distance_strategy==DistanceStrategy.EUCLIDEAN_DISTANCE:distance_type="EUCLIDEAN"elifself.distance_strategy==DistanceStrategy.COSINE:distance_type="COSINE"# Default to EUCLIDEAN_DISTANCEelse:distance_type="EUCLIDEAN"ifbrute_force:options_string=",options => '{\"use_brute_force\":true}'"eliffraction_lists_to_search:iffraction_lists_to_search==0orfraction_lists_to_search>=1.0:raiseValueError("`fraction_lists_to_search` must be between 0.0 and 1.0")options_string=(',options => \'{"fraction_lists_to_search":'f"{fraction_lists_to_search}}}'")else:options_string=""query=f""" SELECT base.*, distance AS _vector_search_distance FROM VECTOR_SEARCH( TABLE `{self.full_table_id}`, "{self.text_embedding_field}", (SELECT @v AS {self.text_embedding_field}), distance_type => "{distance_type}", top_k => {k}{options_string} ) WHERE {filter_expr} LIMIT {k} """document_tuples:List[Tuple[Document,List[float],float]]=[]# TODO(vladkol): Use jobCreationMode=JOB_CREATION_OPTIONAL when available.job=self.bq_client.query(query,job_config=job_config,api_method=bigquery.enums.QueryApiMethod.QUERY)# Process job results.forrowinjob:metadata=row[self.metadata_field]ifmetadata:ifnotisinstance(metadata,dict):metadata=json.loads(metadata)else:metadata={}metadata["__id"]=row[self.doc_id_field]metadata["__job_id"]=job.job_iddoc=Document(page_content=row[self.content_field],metadata=metadata)document_tuples.append((doc,row[self.text_embedding_field],row["_vector_search_distance"],))returndocument_tuples
[docs]defsimilarity_search_with_score_by_vector(self,embedding:List[float],k:int=DEFAULT_TOP_K,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } brute_force: Whether to use brute force search. Defaults to False. fraction_lists_to_search: Optional percentage of lists to search, must be in range 0.0 and 1.0, exclusive. If Node, uses service's default which is 0.05. Returns: List of Documents most similar to the query vector with distance. """delkwargsdocument_tuples=self._search_with_score_and_embeddings_by_vector(embedding,k,filter,brute_force,fraction_lists_to_search)return[(doc,distance)fordoc,_,distanceindocument_tuples]
[docs]defsimilarity_search_by_vector(self,embedding:List[float],k:int=DEFAULT_TOP_K,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } brute_force: Whether to use brute force search. Defaults to False. fraction_lists_to_search: Optional percentage of lists to search, must be in range 0.0 and 1.0, exclusive. If Node, uses service's default which is 0.05. Returns: List of Documents most similar to the query vector. """tuples=self.similarity_search_with_score_by_vector(embedding,k,filter,brute_force,fraction_lists_to_search,**kwargs)return[i[0]foriintuples]
[docs]defsimilarity_search_with_score(self,query:str,k:int=DEFAULT_TOP_K,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Run similarity search with score. Args: query: search query text. k: Number of Documents to return. Defaults to 4. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } brute_force: Whether to use brute force search. Defaults to False. fraction_lists_to_search: Optional percentage of lists to search, must be in range 0.0 and 1.0, exclusive. If Node, uses service's default which is 0.05. Returns: List of Documents most similar to the query vector, with similarity scores. """emb=self.embedding_model.embed_query(query)# type: ignorereturnself.similarity_search_with_score_by_vector(emb,k,filter,brute_force,fraction_lists_to_search,**kwargs)
[docs]defsimilarity_search(self,query:str,k:int=DEFAULT_TOP_K,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Document]:"""Run similarity search. Args: query: search query text. k: Number of Documents to return. Defaults to 4. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } brute_force: Whether to use brute force search. Defaults to False. fraction_lists_to_search: Optional percentage of lists to search, must be in range 0.0 and 1.0, exclusive. If Node, uses service's default which is 0.05. Returns: List of Documents most similar to the query vector. """tuples=self.similarity_search_with_score(query,k,filter,brute_force,fraction_lists_to_search,**kwargs)return[i[0]foriintuples]
def_select_relevance_score_fn(self)->Callable[[float],float]:ifself.distance_strategy==DistanceStrategy.COSINE:returnBigQueryVectorSearch._cosine_relevance_score_fnelse:raiseValueError("Relevance score is not supported "f"for `{self.distance_strategy}` distance.")
[docs]defmax_marginal_relevance_search(self,query:str,k:int=DEFAULT_TOP_K,fetch_k:int=DEFAULT_TOP_K*5,lambda_mult:float=0.5,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: search query text. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } brute_force: Whether to use brute force search. Defaults to False. fraction_lists_to_search: Optional percentage of lists to search, must be in range 0.0 and 1.0, exclusive. If Node, uses service's default which is 0.05. Returns: List of Documents selected by maximal marginal relevance. """query_embedding=self.embedding_model.embed_query(# type: ignorequery)doc_tuples=self._search_with_score_and_embeddings_by_vector(query_embedding,fetch_k,filter,brute_force,fraction_lists_to_search)doc_embeddings=[d[1]fordindoc_tuples]mmr_doc_indexes=maximal_marginal_relevance(np.array(query_embedding),doc_embeddings,lambda_mult=lambda_mult,k=k)return[doc_tuples[i][0]foriinmmr_doc_indexes]
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=DEFAULT_TOP_K,fetch_k:int=DEFAULT_TOP_K*5,lambda_mult:float=0.5,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Filter on metadata properties, e.g. { "str_property": "foo", "int_property": 123 } brute_force: Whether to use brute force search. Defaults to False. fraction_lists_to_search: Optional percentage of lists to search, must be in range 0.0 and 1.0, exclusive. If Node, uses service's default which is 0.05. Returns: List of Documents selected by maximal marginal relevance. """doc_tuples=self._search_with_score_and_embeddings_by_vector(embedding,fetch_k,filter,brute_force,fraction_lists_to_search)doc_embeddings=[d[1]fordindoc_tuples]mmr_doc_indexes=maximal_marginal_relevance(np.array(embedding),doc_embeddings,lambda_mult=lambda_mult,k=k)return[doc_tuples[i][0]foriinmmr_doc_indexes]
[docs]asyncdefamax_marginal_relevance_search(self,query:str,k:int=DEFAULT_TOP_K,fetch_k:int=DEFAULT_TOP_K*5,lambda_mult:float=0.5,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance."""func=partial(self.max_marginal_relevance_search,query,k=k,fetch_k=fetch_k,lambda_mult=lambda_mult,filter=filter,brute_force=brute_force,fraction_lists_to_search=fraction_lists_to_search,**kwargs,)returnawaitasyncio.get_event_loop().run_in_executor(None,func)
[docs]asyncdefamax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=DEFAULT_TOP_K,fetch_k:int=DEFAULT_TOP_K*5,lambda_mult:float=0.5,filter:Optional[Dict[str,Any]]=None,brute_force:bool=False,fraction_lists_to_search:Optional[float]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance."""returnawaitasyncio.get_running_loop().run_in_executor(None,partial(self.max_marginal_relevance_search_by_vector,**kwargs),embedding,k,fetch_k,lambda_mult,filter,brute_force,fraction_lists_to_search,)
[docs]@classmethoddeffrom_texts(cls:Type["BigQueryVectorSearch"],texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,**kwargs:Any,)->"BigQueryVectorSearch":"""Return VectorStore initialized from texts and embeddings."""vs_obj=BigQueryVectorSearch(embedding=embedding,**kwargs)vs_obj.add_texts(texts,metadatas)returnvs_obj
[docs]defexplore_job_stats(self,job_id:str)->Dict:"""Return the statistics for a single job execution. Args: job_id: The BigQuery Job id. Returns: A dictionary of job statistics for a given job. """returnself.bq_client.get_job(job_id)._properties["statistics"]