[docs]classVespaStore(VectorStore):""" `Vespa` vector store. To use, you should have the python client library ``pyvespa`` installed. Example: .. code-block:: python from langchain_community.vectorstores import VespaStore from langchain_community.embeddings.openai import OpenAIEmbeddings from vespa.application import Vespa # Create a vespa client dependent upon your application, # e.g. either connecting to Vespa Cloud or a local deployment # such as Docker. Please refer to the PyVespa documentation on # how to initialize the client. vespa_app = Vespa(url="...", port=..., application_package=...) # You need to instruct LangChain on which fields to use for embeddings vespa_config = dict( page_content_field="text", embedding_field="embedding", input_field="query_embedding", metadata_fields=["date", "rating", "author"] ) embedding_function = OpenAIEmbeddings() vectorstore = VespaStore(vespa_app, embedding_function, **vespa_config) """
[docs]def__init__(self,app:Any,embedding_function:Optional[Embeddings]=None,page_content_field:Optional[str]=None,embedding_field:Optional[str]=None,input_field:Optional[str]=None,metadata_fields:Optional[List[str]]=None,)->None:""" Initialize with a PyVespa client. """try:fromvespa.applicationimportVespaexceptImportError:raiseImportError("Could not import Vespa python package. ""Please install it with `pip install pyvespa`.")ifnotisinstance(app,Vespa):raiseValueError(f"app should be an instance of vespa.application.Vespa, got {type(app)}")self._vespa_app=appself._embedding_function=embedding_functionself._page_content_field=page_content_fieldself._embedding_field=embedding_fieldself._input_field=input_fieldself._metadata_fields=metadata_fields
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:""" Add texts to the vectorstore. Args: texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids associated with the texts. kwargs: vectorstore specific parameters Returns: List of ids from adding the texts into the vectorstore. """embeddings=Noneifself._embedding_functionisnotNone:embeddings=self._embedding_function.embed_documents(list(texts))ifidsisNone:ids=[str(f"{i+1}")fori,_inenumerate(texts)]batch=[]fori,textinenumerate(texts):fields:Dict[str,Union[str,List[float]]]={}ifself._page_content_fieldisnotNone:fields[self._page_content_field]=textifself._embedding_fieldisnotNoneandembeddingsisnotNone:fields[self._embedding_field]=embeddings[i]ifmetadatasisnotNoneandself._metadata_fieldsisnotNone:formetadata_fieldinself._metadata_fields:ifmetadata_fieldinmetadatas[i]:fields[metadata_field]=metadatas[i][metadata_field]batch.append({"id":ids[i],"fields":fields})results=self._vespa_app.feed_batch(batch)forresultinresults:ifnot(str(result.status_code).startswith("2")):raiseRuntimeError(f"Could not add document to Vespa. "f"Error code: {result.status_code}. "f"Message: {result.json['message']}")returnids
def_create_query(self,query_embedding:List[float],k:int=4,**kwargs:Any)->Dict:hits=kdoc_embedding_field=self._embedding_fieldinput_embedding_field=self._input_fieldranking_function=kwargs["ranking"]if"ranking"inkwargselse"default"filter=kwargs["filter"]if"filter"inkwargselseNoneapproximate=kwargs["approximate"]if"approximate"inkwargselseFalseapproximate="true"ifapproximateelse"false"yql="select * from sources * where "yql+=f"{{targetHits: {hits}, approximate: {approximate}}}"yql+=f"nearestNeighbor({doc_embedding_field}, {input_embedding_field})"iffilterisnotNone:yql+=f" and {filter}"query={"yql":yql,f"input.query({input_embedding_field})":query_embedding,"ranking":ranking_function,"hits":hits,}returnquery
[docs]defsimilarity_search_by_vector_with_score(self,query_embedding:List[float],k:int=4,**kwargs:Any)->List[Tuple[Document,float]]:""" Performs similarity search from a embeddings vector. Args: query_embedding: Embeddings vector to search for. k: Number of results to return. custom_query: Use this custom query instead default query (kwargs) kwargs: other vector store specific parameters Returns: List of ids from adding the texts into the vectorstore. """if"custom_query"inkwargs:query=kwargs["custom_query"]else:query=self._create_query(query_embedding,k,**kwargs)try:response=self._vespa_app.query(body=query)exceptExceptionase:raiseRuntimeError(f"Could not retrieve data from Vespa: "f"{e.args[0][0]['summary']}. "f"Error: {e.args[0][0]['message']}")ifnotstr(response.status_code).startswith("2"):raiseRuntimeError(f"Could not retrieve data from Vespa. "f"Error code: {response.status_code}. "f"Message: {response.json['message']}")root=response.json["root"]if"errors"inroot:importjsonraiseRuntimeError(json.dumps(root["errors"]))ifresponseisNoneorresponse.hitsisNone:return[]docs=[]forchildinresponse.hits:page_content=child["fields"][self._page_content_field]score=child["relevance"]metadata={"id":child["id"]}ifself._metadata_fieldsisnotNone:forfieldinself._metadata_fields:metadata[field]=child["fields"].get(field)doc=Document(page_content=page_content,metadata=metadata)docs.append((doc,score))returndocs
[docs]defmax_marginal_relevance_search(self,query:str,k:int=4,fetch_k:int=20,lambda_mult:float=0.5,**kwargs:Any,)->List[Document]:raiseNotImplementedError("MMR search not implemented")
[docs]defmax_marginal_relevance_search_by_vector(self,embedding:List[float],k:int=4,fetch_k:int=20,lambda_mult:float=0.5,**kwargs:Any,)->List[Document]:raiseNotImplementedError("MMR search by vector not implemented")