Source code for langchain_community.vectorstores.pathway
"""Pathway Vector Store client.The Pathway Vector Server is a pipeline written in the Pathway framweork which indexesall files in a given folder, embeds them, and builds a vector index. The pipeline reactsto changes in source files, automatically updating appropriate index entries.The PathwayVectorClient implements the LangChain VectorStore interface and queries thePathwayVectorServer to retrieve up-to-date documents.You can use the client with managed instances of Pathway Vector Store, or run your owninstance as described at https://pathway.com/developers/user-guide/llm-xpack/vectorstore_pipeline/"""importjsonimportloggingfromtypingimportAny,Callable,Iterable,List,Optional,Tupleimportrequestsfromlangchain_core.documentsimportDocumentfromlangchain_core.embeddingsimportEmbeddingsfromlangchain_core.vectorstoresimportVectorStore# Copied from https://github.com/pathwaycom/pathway/blob/main/python/pathway/xpacks/llm/vector_store.py# to remove dependency on Pathway library.class_VectorStoreClient:def__init__(self,host:Optional[str]=None,port:Optional[int]=None,url:Optional[str]=None,):""" A client you can use to query :py:class:`VectorStoreServer`. Please provide aither the `url`, or `host` and `port`. Args: - host: host on which `:py:class:`VectorStoreServer` listens - port: port on which `:py:class:`VectorStoreServer` listens - url: url at which `:py:class:`VectorStoreServer` listens """err="Either (`host` and `port`) or `url` must be provided, but not both."ifurlisnotNone:ifhostorport:raiseValueError(err)self.url=urlelse:ifhostisNone:raiseValueError(err)port=portor80self.url=f"http://{host}:{port}"defquery(self,query:str,k:int=3,metadata_filter:Optional[str]=None)->List[dict]:""" Perform a query to the vector store and fetch results. Args: - query: - k: number of documents to be returned - metadata_filter: optional string representing the metadata filtering query in the JMESPath format. The search will happen only for documents satisfying this filtering. """data={"query":query,"k":k}ifmetadata_filterisnotNone:data["metadata_filter"]=metadata_filterurl=self.url+"/v1/retrieve"response=requests.post(url,data=json.dumps(data),headers={"Content-Type":"application/json"},timeout=3,)responses=response.json()returnsorted(responses,key=lambdax:x["dist"])# Make an alias__call__=querydefget_vectorstore_statistics(self)->dict:"""Fetch basic statistics about the vector store."""url=self.url+"/v1/statistics"response=requests.post(url,json={},headers={"Content-Type":"application/json"},)responses=response.json()returnresponsesdefget_input_files(self,metadata_filter:Optional[str]=None,filepath_globpattern:Optional[str]=None,)->list:""" Fetch information on documents in the vector store. Args: metadata_filter: optional string representing the metadata filtering query in the JMESPath format. The search will happen only for documents satisfying this filtering. filepath_globpattern: optional glob pattern specifying which documents will be searched for this query. """url=self.url+"/v1/inputs"response=requests.post(url,json={"metadata_filter":metadata_filter,"filepath_globpattern":filepath_globpattern,},headers={"Content-Type":"application/json"},)responses=response.json()returnresponses
[docs]classPathwayVectorClient(VectorStore):""" VectorStore connecting to Pathway Vector Store. """
[docs]def__init__(self,host:Optional[str]=None,port:Optional[int]=None,url:Optional[str]=None,)->None:""" A client you can use to query Pathway Vector Store. Please provide aither the `url`, or `host` and `port`. Args: - host: host on which Pathway Vector Store listens - port: port on which Pathway Vector Store listens - url: url at which Pathway Vector Store listens """self.client=_VectorStoreClient(host,port,url)
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,**kwargs:Any,)->List[str]:"""Pathway is not suitable for this method."""raiseNotImplementedError("Pathway vector store does not support adding or removing texts"" from client.")
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Embeddings,metadatas:Optional[List[dict]]=None,**kwargs:Any,)->"PathwayVectorClient":raiseNotImplementedError("Pathway vector store does not support initializing from_texts.")
[docs]defsimilarity_search(self,query:str,k:int=4,**kwargs:Any)->List[Document]:metadata_filter=kwargs.pop("metadata_filter",None)ifkwargs:logging.warning("Unknown kwargs passed to PathwayVectorClient.similarity_search: %s",kwargs,)rets=self.client(query=query,k=k,metadata_filter=metadata_filter)return[Document(page_content=ret["text"],metadata=ret["metadata"])forretinrets]
[docs]defsimilarity_search_with_score(self,query:str,k:int=4,metadata_filter:Optional[str]=None,)->List[Tuple[Document,float]]:"""Run similarity search with Pathway with distance. Args: - query (str): Query text to search for. - k (int): Number of results to return. Defaults to 4. - metadata_filter (Optional[str]): Filter by metadata. Filtering query should be in JMESPath format. Defaults to None. Returns: List[Tuple[Document, float]]: List of documents most similar to the query text and cosine distance in float for each. Lower score represents more similarity. """rets=self.client(query=query,k=k,metadata_filter=metadata_filter)return[(Document(page_content=ret["text"],metadata=ret["metadata"]),ret["dist"])forretinrets]
[docs]defget_vectorstore_statistics(self)->dict:"""Fetch basic statistics about the Vector Store."""returnself.client.get_vectorstore_statistics()
[docs]defget_input_files(self,metadata_filter:Optional[str]=None,filepath_globpattern:Optional[str]=None,)->list:"""List files indexed by the Vector Store."""returnself.client.get_input_files(metadata_filter,filepath_globpattern)