[docs]classCassandra(VectorStore):_embedding_dimension:Union[int,None]def_get_embedding_dimension(self)->int:ifself._embedding_dimensionisNone:self._embedding_dimension=len(self.embedding.embed_query("This is a sample sentence."))returnself._embedding_dimensionasyncdef_aget_embedding_dimension(self)->int:ifself._embedding_dimensionisNone:self._embedding_dimension=len(awaitself.embedding.aembed_query("This is a sample sentence."))returnself._embedding_dimension
[docs]def__init__(self,embedding:Embeddings,session:Optional[Session]=None,keyspace:Optional[str]=None,table_name:str="",ttl_seconds:Optional[int]=None,*,body_index_options:Optional[List[Tuple[str,Any]]]=None,setup_mode:SetupMode=SetupMode.SYNC,metadata_indexing:Union[Tuple[str,Iterable[str]],str]="all",)->None:"""Apache Cassandra(R) for vector-store workloads. To use it, you need a recent installation of the `cassio` library and a Cassandra cluster / Astra DB instance supporting vector capabilities. Visit the cassio.org website for extensive quickstarts and code examples. Example: .. code-block:: python from langchain_community.vectorstores import Cassandra from langchain_openai import OpenAIEmbeddings embeddings = OpenAIEmbeddings() session = ... # create your Cassandra session object keyspace = 'my_keyspace' # the keyspace should exist already table_name = 'my_vector_store' vectorstore = Cassandra(embeddings, session, keyspace, table_name) Args: embedding: Embedding function to use. session: Cassandra driver session. If not provided, it is resolved from cassio. keyspace: Cassandra keyspace. If not provided, it is resolved from cassio. table_name: Cassandra table (required). ttl_seconds: Optional time-to-live for the added texts. body_index_options: Optional options used to create the body index. Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER] setup_mode: mode used to create the Cassandra table (SYNC, ASYNC or OFF). metadata_indexing: Optional specification of a metadata indexing policy, i.e. to fine-tune which of the metadata fields are indexed. It can be a string ("all" or "none"), or a 2-tuple. The following means that all fields except 'f1', 'f2' ... are NOT indexed: metadata_indexing=("allowlist", ["f1", "f2", ...]) The following means all fields EXCEPT 'g1', 'g2', ... are indexed: metadata_indexing("denylist", ["g1", "g2", ...]) The default is to index every metadata field. Note: if you plan to have massive unique text metadata entries, consider not indexing them for performance (and to overcome max-length limitations). """try:fromcassio.tableimportMetadataVectorCassandraTableexcept(ImportError,ModuleNotFoundError):raiseImportError("Could not import cassio python package. ""Please install it with `pip install cassio`.")ifnottable_name:raiseValueError("Missing required parameter 'table_name'.")self.embedding=embeddingself.session=sessionself.keyspace=keyspaceself.table_name=table_nameself.ttl_seconds=ttl_seconds#self._embedding_dimension=None#kwargs:Dict[str,Any]={}ifbody_index_optionsisnotNone:kwargs["body_index_options"]=body_index_optionsifsetup_mode==SetupMode.ASYNC:kwargs["async_setup"]=Trueembedding_dimension:Union[int,Awaitable[int],None]=Noneifsetup_mode==SetupMode.ASYNC:embedding_dimension=self._aget_embedding_dimension()elifsetup_mode==SetupMode.SYNC:embedding_dimension=self._get_embedding_dimension()self.table=MetadataVectorCassandraTable(session=session,keyspace=keyspace,table=table_name,vector_dimension=embedding_dimension,metadata_indexing=metadata_indexing,primary_key_type="TEXT",skip_provisioning=setup_mode==SetupMode.OFF,**kwargs,)
@propertydefembeddings(self)->Embeddings:returnself.embeddingdef_select_relevance_score_fn(self)->Callable[[float],float]:""" The underlying VectorTable already returns a "score proper", i.e. one in [0, 1] where higher means more *similar*, so here the final score transformation is not reversing the interval: """returnlambdascore:score
[docs]defdelete_collection(self)->None:""" Just an alias for `clear` (to better align with other VectorStore implementations). """self.clear()
[docs]asyncdefadelete_collection(self)->None:""" Just an alias for `aclear` (to better align with other VectorStore implementations). """awaitself.aclear()
[docs]defclear(self)->None:"""Empty the table."""self.table.clear()
[docs]asyncdefaclear(self)->None:"""Empty the table."""awaitself.table.aclear()
[docs]defdelete_by_document_id(self,document_id:str)->None:"""Delete by document ID. Args: document_id: the document ID to delete. """returnself.table.delete(row_id=document_id)
[docs]asyncdefadelete_by_document_id(self,document_id:str)->None:"""Delete by document ID. Args: document_id: the document ID to delete. """returnawaitself.table.adelete(row_id=document_id)
[docs]defdelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->Optional[bool]:"""Delete by vector IDs. Args: ids: List of ids to delete. Returns: Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ifidsisNone:raiseValueError("No ids provided to delete.")fordocument_idinids:self.delete_by_document_id(document_id)returnTrue
[docs]asyncdefadelete(self,ids:Optional[List[str]]=None,**kwargs:Any)->Optional[bool]:"""Delete by vector IDs. Args: ids: List of ids to delete. Returns: Optional[bool]: True if deletion is successful, False otherwise, None if not implemented. """ifidsisNone:raiseValueError("No ids provided to delete.")fordocument_idinids:awaitself.adelete_by_document_id(document_id)returnTrue
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,batch_size:int=16,ttl_seconds:Optional[int]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Texts to add to the vectorstore. metadatas: Optional list of metadatas. ids: Optional list of IDs. batch_size: Number of concurrent requests to send to the server. ttl_seconds: Optional time-to-live for the added texts. Returns: List[str]: List of IDs of the added texts. """_texts=list(texts)ids=idsor[uuid.uuid4().hexfor_in_texts]metadatas=metadatasor[{}]*len(_texts)ttl_seconds=ttl_secondsorself.ttl_secondsembedding_vectors=self.embedding.embed_documents(_texts)foriinrange(0,len(_texts),batch_size):batch_texts=_texts[i:i+batch_size]batch_embedding_vectors=embedding_vectors[i:i+batch_size]batch_ids=ids[i:i+batch_size]batch_metadatas=metadatas[i:i+batch_size]futures=[self.table.put_async(row_id=text_id,body_blob=text,vector=embedding_vector,metadata=metadataor{},ttl_seconds=ttl_seconds,)fortext,embedding_vector,text_id,metadatainzip(batch_texts,batch_embedding_vectors,batch_ids,batch_metadatas)]forfutureinfutures:future.result()returnids
[docs]asyncdefaadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,concurrency:int=16,ttl_seconds:Optional[int]=None,**kwargs:Any,)->List[str]:"""Run more texts through the embeddings and add to the vectorstore. Args: texts: Texts to add to the vectorstore. metadatas: Optional list of metadatas. ids: Optional list of IDs. concurrency: Number of concurrent queries to the database. Defaults to 16. ttl_seconds: Optional time-to-live for the added texts. Returns: List[str]: List of IDs of the added texts. """_texts=list(texts)ids=idsor[uuid.uuid4().hexfor_in_texts]_metadatas:List[dict]=metadatasor[{}]*len(_texts)ttl_seconds=ttl_secondsorself.ttl_secondsembedding_vectors=awaitself.embedding.aembed_documents(_texts)sem=asyncio.Semaphore(concurrency)asyncdefsend_concurrently(row_id:str,text:str,embedding_vector:List[float],metadata:dict)->None:asyncwithsem:awaitself.table.aput(row_id=row_id,body_blob=text,vector=embedding_vector,metadata=metadataor{},ttl_seconds=ttl_seconds,)foriinrange(0,len(_texts)):tasks=[asyncio.create_task(send_concurrently(ids[i],_texts[i],embedding_vectors[i],_metadatas[i]))]awaitasyncio.gather(*tasks)returnids
@staticmethoddef_search_to_documents(hits:Iterable[Dict[str,Any]],)->List[Tuple[Document,float,str]]:# We stick to 'cos' distance as it can be normalized on a 0-1 axis# (1=most relevant), as required by this class' contract.return[(Document(page_content=hit["body_blob"],metadata=hit["metadata"],),0.5+0.5*hit["distance"],hit["row_id"],)forhitinhits]# id-returning search facilities
[docs]defsimilarity_search_with_score_id_by_vector(self,embedding:List[float],k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float,str]]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score, id), the most similar to the query vector. """kwargs:Dict[str,Any]={}iffilterisnotNone:kwargs["metadata"]=filterifbody_searchisnotNone:kwargs["body_search"]=body_searchhits=self.table.metric_ann_search(vector=embedding,n=k,metric="cos",**kwargs,)returnself._search_to_documents(hits)
[docs]asyncdefasimilarity_search_with_score_id_by_vector(self,embedding:List[float],k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float,str]]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score, id), the most similar to the query vector. """kwargs:Dict[str,Any]={}iffilterisnotNone:kwargs["metadata"]=filterifbody_searchisnotNone:kwargs["body_search"]=body_searchhits=awaitself.table.ametric_ann_search(vector=embedding,n=k,metric="cos",**kwargs,)returnself._search_to_documents(hits)
[docs]defsimilarity_search_with_score_id(self,query:str,k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float,str]]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score, id), the most similar to the query vector. """embedding_vector=self.embedding.embed_query(query)returnself.similarity_search_with_score_id_by_vector(embedding=embedding_vector,k=k,filter=filter,body_search=body_search,)
[docs]asyncdefasimilarity_search_with_score_id(self,query:str,k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float,str]]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score, id), the most similar to the query vector. """embedding_vector=awaitself.embedding.aembed_query(query)returnawaitself.asimilarity_search_with_score_id_by_vector(embedding=embedding_vector,k=k,filter=filter,body_search=body_search,)
# id-unaware search facilities
[docs]defsimilarity_search_with_score_by_vector(self,embedding:List[float],k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float]]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score), the most similar to the query vector. """return[(doc,score)for(doc,score,docId)inself.similarity_search_with_score_id_by_vector(embedding=embedding,k=k,filter=filter,body_search=body_search,)]
[docs]asyncdefasimilarity_search_with_score_by_vector(self,embedding:List[float],k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float]]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score), the most similar to the query vector. """return[(doc,score)for(doc,score,_,)inawaitself.asimilarity_search_with_score_id_by_vector(embedding=embedding,k=k,filter=filter,body_search=body_search,)]
[docs]defsimilarity_search(self,query:str,k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Document, the most similar to the query vector. """embedding_vector=self.embedding.embed_query(query)returnself.similarity_search_by_vector(embedding_vector,k,filter=filter,body_search=body_search,)
[docs]asyncdefasimilarity_search(self,query:str,k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Document, the most similar to the query vector. """embedding_vector=awaitself.embedding.aembed_query(query)returnawaitself.asimilarity_search_by_vector(embedding_vector,k,filter=filter,body_search=body_search,)
[docs]defsimilarity_search_by_vector(self,embedding:List[float],k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Document, the most similar to the query vector. """return[docfordoc,_inself.similarity_search_with_score_by_vector(embedding,k,filter=filter,body_search=body_search,)]
[docs]asyncdefasimilarity_search_by_vector(self,embedding:List[float],k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs most similar to embedding vector. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Document, the most similar to the query vector. """return[docfordoc,_inawaitself.asimilarity_search_with_score_by_vector(embedding,k,filter=filter,body_search=body_search,)]
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float]]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score), the most similar to the query vector. """embedding_vector=self.embedding.embed_query(query)returnself.similarity_search_with_score_by_vector(embedding_vector,k,filter=filter,body_search=body_search,)
[docs]asyncdefasimilarity_search_with_score(self,query:str,k:int=4,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,)->List[Tuple[Document,float]]:"""Return docs most similar to query. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of (Document, score), the most similar to the query vector. """embedding_vector=awaitself.embedding.aembed_query(query)returnawaitself.asimilarity_search_with_score_by_vector(embedding_vector,k,filter=filter,body_search=body_search,)
@staticmethoddef_mmr_search_to_documents(prefetch_hits:List[Dict[str,Any]],embedding:List[float],k:int,lambda_mult:float,)->List[Document]:# let the mmr utility pick the *indices* in the above arraymmr_chosen_indices=maximal_marginal_relevance(np.array(embedding,dtype=np.float32),[pf_hit["vector"]forpf_hitinprefetch_hits],k=k,lambda_mult=lambda_mult,)mmr_hits=[pf_hitforpf_index,pf_hitinenumerate(prefetch_hits)ifpf_indexinmmr_chosen_indices]return[Document(page_content=hit["body_blob"],metadata=hit["metadata"],)forhitinmmr_hits]
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. Defaults to 20. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Documents selected by maximal marginal relevance. """_kwargs:Dict[str,Any]={}iffilterisnotNone:_kwargs["metadata"]=filterifbody_searchisnotNone:_kwargs["body_search"]=body_searchprefetch_hits=list(self.table.metric_ann_search(vector=embedding,n=fetch_k,metric="cos",**_kwargs,))returnself._mmr_search_to_documents(prefetch_hits,embedding,k,lambda_mult)
[docs]asyncdefamax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: embedding: Embedding to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. Defaults to 20. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Documents selected by maximal marginal relevance. """_kwargs:Dict[str,Any]={}iffilterisnotNone:_kwargs["metadata"]=filterifbody_searchisnotNone:_kwargs["body_search"]=body_searchprefetch_hits=list(awaitself.table.ametric_ann_search(vector=embedding,n=fetch_k,metric="cos",**_kwargs,))returnself._mmr_search_to_documents(prefetch_hits,embedding,k,lambda_mult)
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. fetch_k: Number of Documents to fetch to pass to MMR algorithm. Defaults to 20. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Documents selected by maximal marginal relevance. """embedding_vector=self.embedding.embed_query(query)returnself.max_marginal_relevance_search_by_vector(embedding_vector,k,fetch_k,lambda_mult=lambda_mult,filter=filter,body_search=body_search,)
[docs]asyncdefamax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,filter:Optional[Dict[str,str]]=None,body_search:Optional[Union[str,List[str]]]=None,**kwargs:Any,)->List[Document]:"""Return docs selected using the maximal marginal relevance. Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents. Args: query: Text to look up documents similar to. k: Number of Documents to return. fetch_k: Number of Documents to fetch to pass to MMR algorithm. lambda_mult: Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. filter: Filter on the metadata to apply. body_search: Document textual search terms to apply. Only supported by Astra DB at the moment. Returns: List of Documents selected by maximal marginal relevance. """embedding_vector=awaitself.embedding.aembed_query(query)returnawaitself.amax_marginal_relevance_search_by_vector(embedding_vector,k,fetch_k,lambda_mult=lambda_mult,filter=filter,body_search=body_search,)
[docs]@classmethoddeffrom_texts(cls:Type[CVST],texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,*,session:Optional[Session]=None,keyspace:Optional[str]=None,table_name:str="",ids:Optional[List[str]]=None,batch_size:int=16,ttl_seconds:Optional[int]=None,body_index_options:Optional[List[Tuple[str,Any]]]=None,metadata_indexing:Union[Tuple[str,Iterable[str]],str]="all",**kwargs:Any,)->CVST:"""Create a Cassandra vectorstore from raw texts. Args: texts: Texts to add to the vectorstore. embedding: Embedding function to use. metadatas: Optional list of metadatas associated with the texts. session: Cassandra driver session. If not provided, it is resolved from cassio. keyspace: Cassandra key space. If not provided, it is resolved from cassio. table_name: Cassandra table (required). ids: Optional list of IDs associated with the texts. batch_size: Number of concurrent requests to send to the server. Defaults to 16. ttl_seconds: Optional time-to-live for the added texts. body_index_options: Optional options used to create the body index. Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER] Returns: a Cassandra vectorstore. """store=cls(embedding=embedding,session=session,keyspace=keyspace,table_name=table_name,ttl_seconds=ttl_seconds,body_index_options=body_index_options,metadata_indexing=metadata_indexing,)store.add_texts(texts=texts,metadatas=metadatas,ids=ids,batch_size=batch_size)returnstore
[docs]@classmethodasyncdefafrom_texts(cls:Type[CVST],texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,*,session:Optional[Session]=None,keyspace:Optional[str]=None,table_name:str="",ids:Optional[List[str]]=None,concurrency:int=16,ttl_seconds:Optional[int]=None,body_index_options:Optional[List[Tuple[str,Any]]]=None,metadata_indexing:Union[Tuple[str,Iterable[str]],str]="all",**kwargs:Any,)->CVST:"""Create a Cassandra vectorstore from raw texts. Args: texts: Texts to add to the vectorstore. embedding: Embedding function to use. metadatas: Optional list of metadatas associated with the texts. session: Cassandra driver session. If not provided, it is resolved from cassio. keyspace: Cassandra key space. If not provided, it is resolved from cassio. table_name: Cassandra table (required). ids: Optional list of IDs associated with the texts. concurrency: Number of concurrent queries to send to the database. Defaults to 16. ttl_seconds: Optional time-to-live for the added texts. body_index_options: Optional options used to create the body index. Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER] Returns: a Cassandra vectorstore. """store=cls(embedding=embedding,session=session,keyspace=keyspace,table_name=table_name,ttl_seconds=ttl_seconds,setup_mode=SetupMode.ASYNC,body_index_options=body_index_options,metadata_indexing=metadata_indexing,)awaitstore.aadd_texts(texts=texts,metadatas=metadatas,ids=ids,concurrency=concurrency)returnstore
[docs]@classmethoddeffrom_documents(cls:Type[CVST],documents:List[Document],embedding:Embeddings,*,session:Optional[Session]=None,keyspace:Optional[str]=None,table_name:str="",ids:Optional[List[str]]=None,batch_size:int=16,ttl_seconds:Optional[int]=None,body_index_options:Optional[List[Tuple[str,Any]]]=None,metadata_indexing:Union[Tuple[str,Iterable[str]],str]="all",**kwargs:Any,)->CVST:"""Create a Cassandra vectorstore from a document list. Args: documents: Documents to add to the vectorstore. embedding: Embedding function to use. session: Cassandra driver session. If not provided, it is resolved from cassio. keyspace: Cassandra key space. If not provided, it is resolved from cassio. table_name: Cassandra table (required). ids: Optional list of IDs associated with the documents. batch_size: Number of concurrent requests to send to the server. Defaults to 16. ttl_seconds: Optional time-to-live for the added documents. body_index_options: Optional options used to create the body index. Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER] Returns: a Cassandra vectorstore. """texts=[doc.page_contentfordocindocuments]metadatas=[doc.metadatafordocindocuments]returncls.from_texts(texts=texts,embedding=embedding,metadatas=metadatas,session=session,keyspace=keyspace,table_name=table_name,ids=ids,batch_size=batch_size,ttl_seconds=ttl_seconds,body_index_options=body_index_options,metadata_indexing=metadata_indexing,**kwargs,)
[docs]@classmethodasyncdefafrom_documents(cls:Type[CVST],documents:List[Document],embedding:Embeddings,*,session:Optional[Session]=None,keyspace:Optional[str]=None,table_name:str="",ids:Optional[List[str]]=None,concurrency:int=16,ttl_seconds:Optional[int]=None,body_index_options:Optional[List[Tuple[str,Any]]]=None,metadata_indexing:Union[Tuple[str,Iterable[str]],str]="all",**kwargs:Any,)->CVST:"""Create a Cassandra vectorstore from a document list. Args: documents: Documents to add to the vectorstore. embedding: Embedding function to use. session: Cassandra driver session. If not provided, it is resolved from cassio. keyspace: Cassandra key space. If not provided, it is resolved from cassio. table_name: Cassandra table (required). ids: Optional list of IDs associated with the documents. concurrency: Number of concurrent queries to send to the database. Defaults to 16. ttl_seconds: Optional time-to-live for the added documents. body_index_options: Optional options used to create the body index. Eg. body_index_options = [cassio.table.cql.STANDARD_ANALYZER] Returns: a Cassandra vectorstore. """texts=[doc.page_contentfordocindocuments]metadatas=[doc.metadatafordocindocuments]returnawaitcls.afrom_texts(texts=texts,embedding=embedding,metadatas=metadatas,session=session,keyspace=keyspace,table_name=table_name,ids=ids,concurrency=concurrency,ttl_seconds=ttl_seconds,body_index_options=body_index_options,metadata_indexing=metadata_indexing,**kwargs,)
[docs]defas_retriever(self,search_type:str="similarity",search_kwargs:Optional[Dict[str,Any]]=None,tags:Optional[List[str]]=None,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->VectorStoreRetriever:"""Return VectorStoreRetriever initialized from this VectorStore. Args: search_type: Defines the type of search that the Retriever should perform. Can be "similarity" (default), "mmr", or "similarity_score_threshold". search_kwargs: Keyword arguments to pass to the search function. Can include things like: k: Amount of documents to return (Default: 4) score_threshold: Minimum relevance threshold for similarity_score_threshold fetch_k: Amount of documents to pass to MMR algorithm (Default: 20) lambda_mult: Diversity of results returned by MMR; 1 for minimum diversity and 0 for maximum. (Default: 0.5) filter: Filter by document metadata tags: List of tags associated with the retriever. metadata: Metadata associated with the retriever. kwargs: Other arguments passed to the VectorStoreRetriever init. Returns: Retriever for VectorStore. Examples: .. code-block:: python # Retrieve more documents with higher diversity # Useful if your dataset has many similar documents docsearch.as_retriever( search_type="mmr", search_kwargs={'k': 6, 'lambda_mult': 0.25} ) # Fetch more documents for the MMR algorithm to consider # But only return the top 5 docsearch.as_retriever( search_type="mmr", search_kwargs={'k': 5, 'fetch_k': 50} ) # Only retrieve documents that have a relevance score # Above a certain threshold docsearch.as_retriever( search_type="similarity_score_threshold", search_kwargs={'score_threshold': 0.8} ) # Only get the single most similar document from the dataset docsearch.as_retriever(search_kwargs={'k': 1}) # Use a filter to only retrieve documents from a specific paper docsearch.as_retriever( search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}} ) """_tags=tagsor[]+self._get_retriever_tags()returnVectorStoreRetriever(vectorstore=self,search_type=search_type,search_kwargs=search_kwargsor{},tags=_tags,metadata=metadata,**kwargs,)