[docs]classClarifai(VectorStore):"""`Clarifai AI` vector store. To use, you should have the ``clarifai`` python SDK package installed. Example: .. code-block:: python from langchain_community.vectorstores import Clarifai clarifai_vector_db = Clarifai( user_id=USER_ID, app_id=APP_ID, number_of_docs=NUMBER_OF_DOCS, ) """
[docs]def__init__(self,user_id:Optional[str]=None,app_id:Optional[str]=None,number_of_docs:Optional[int]=4,pat:Optional[str]=None,token:Optional[str]=None,api_base:Optional[str]="https://api.clarifai.com",)->None:"""Initialize with Clarifai client. Args: user_id (Optional[str], optional): User ID. Defaults to None. app_id (Optional[str], optional): App ID. Defaults to None. pat (Optional[str], optional): Personal access token. Defaults to None. token (Optional[str], optional): Session token. Defaults to None. number_of_docs (Optional[int], optional): Number of documents to return during vector search. Defaults to None. api_base (Optional[str], optional): API base. Defaults to None. Raises: ValueError: If user ID, app ID or personal access token is not provided. """_user_id=user_idoros.environ.get("CLARIFAI_USER_ID")_app_id=app_idoros.environ.get("CLARIFAI_APP_ID")if_user_idisNoneor_app_idisNone:raiseValueError("Could not find CLARIFAI_USER_ID ""or CLARIFAI_APP_ID in your environment. ""Please set those env variables with a valid user ID, app ID")self._number_of_docs=number_of_docstry:fromclarifai.client.searchimportSearchexceptImportErrorase:raiseImportError("Could not import clarifai python package. ""Please install it with `pip install clarifai`.")fromeself._auth=Search(user_id=_user_id,app_id=_app_id,top_k=number_of_docs,pat=pat,token=token,base_url=api_base,).auth_helper
[docs]defadd_texts(self,texts:Iterable[str],metadatas:Optional[List[dict]]=None,ids:Optional[List[str]]=None,**kwargs:Any,)->List[str]:"""Add texts to the Clarifai vectorstore. This will push the text to a Clarifai application. Application use a base workflow that create and store embedding for each text. Make sure you are using a base workflow that is compatible with text (such as Language Understanding). Args: texts (Iterable[str]): Texts to add to the vectorstore. metadatas (Optional[List[dict]], optional): Optional list of metadatas. ids (Optional[List[str]], optional): Optional list of IDs. """try:fromclarifai.client.inputimportInputsfromgoogle.protobuf.struct_pb2importStructexceptImportErrorase:raiseImportError("Could not import clarifai python package. ""Please install it with `pip install clarifai`.")fromeltexts=list(texts)length=len(ltexts)assertlength>0,"No texts provided to add to the vectorstore."ifmetadatasisnotNone:assertlength==len(metadatas),("Number of texts and metadatas should be the same.")ifidsisnotNone:assertlen(ltexts)==len(ids),("Number of text inputs and input ids should be the same.")input_obj=Inputs.from_auth_helper(auth=self._auth)batch_size=32input_job_ids=[]foridxinrange(0,length,batch_size):try:batch_texts=ltexts[idx:idx+batch_size]batch_metadatas=(metadatas[idx:idx+batch_size]ifmetadataselseNone)ifidsisNone:batch_ids=[uuid.uuid4().hexfor_inrange(len(batch_texts))]else:batch_ids=ids[idx:idx+batch_size]ifbatch_metadatasisnotNone:meta_list=[]formetainbatch_metadatas:meta_struct=Struct()meta_struct.update(meta)meta_list.append(meta_struct)input_batch=[input_obj.get_text_input(input_id=batch_ids[i],raw_text=text,metadata=meta_list[i]ifbatch_metadataselseNone,)fori,textinenumerate(batch_texts)]result_id=input_obj.upload_inputs(inputs=input_batch)input_job_ids.extend(result_id)logger.debug("Input posted successfully.")exceptExceptionaserror:logger.warning(f"Post inputs failed: {error}")traceback.print_exc()returninput_job_ids
[docs]defsimilarity_search_with_score(self,query:str,k:Optional[int]=None,filters:Optional[dict]=None,**kwargs:Any,)->List[Tuple[Document,float]]:"""Run similarity search with score using Clarifai. Args: query (str): Query text to search for. k (Optional[int]): Number of results to return. If not set, it'll take _number_of_docs. Defaults to None. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. Returns: List[Document]: List of documents most similar to the query text. """try:fromclarifai.client.searchimportSearchfromclarifai_grpc.grpc.apiimportresources_pb2fromgoogle.protobufimportjson_format# type: ignoreexceptImportErrorase:raiseImportError("Could not import clarifai python package. ""Please install it with `pip install clarifai`.")frome# Get number of docs to returntop_k=korself._number_of_docssearch_obj=Search.from_auth_helper(auth=self._auth,top_k=top_k)rank=[{"text_raw":query}]# Add filter by metadata if provided.iffiltersisnotNone:search_metadata={"metadata":filters}search_response=search_obj.query(ranks=rank,filters=[search_metadata])else:search_response=search_obj.query(ranks=rank)# Retrieve hitshits=[hitfordatainsearch_responseforhitindata.hits]executor=ThreadPoolExecutor(max_workers=10)defhit_to_document(hit:resources_pb2.Hit)->Tuple[Document,float]:metadata=json_format.MessageToDict(hit.input.data.metadata)h=dict(self._auth.metadata)request=requests.get(hit.input.data.text.url,headers=h)# override encoding by real educated guess as provided by chardetrequest.encoding=request.apparent_encodingrequested_text=request.textlogger.debug(f"\tScore {hit.score:.2f} for annotation: {hit.annotation.id}\ off input: {hit.input.id}, text: {requested_text[:125]}")return(Document(page_content=requested_text,metadata=metadata),hit.score)# Iterate over hits and retrieve metadata and textfutures=[executor.submit(hit_to_document,hit)forhitinhits]docs_and_scores=[future.result()forfutureinfutures]returndocs_and_scores
[docs]defsimilarity_search(self,query:str,k:Optional[int]=None,**kwargs:Any,)->List[Document]:"""Run similarity search using Clarifai. Args: query: Text to look up documents similar to. k: Number of Documents to return. If not set, it'll take _number_of_docs. Defaults to None. Returns: List of Documents most similar to the query and score for each """docs_and_scores=self.similarity_search_with_score(query,k=k,**kwargs)return[docfordoc,_indocs_and_scores]
[docs]@classmethoddeffrom_texts(cls,texts:List[str],embedding:Optional[Embeddings]=None,metadatas:Optional[List[dict]]=None,user_id:Optional[str]=None,app_id:Optional[str]=None,number_of_docs:Optional[int]=None,pat:Optional[str]=None,token:Optional[str]=None,**kwargs:Any,)->Clarifai:"""Create a Clarifai vectorstore from a list of texts. Args: user_id (str): User ID. app_id (str): App ID. texts (List[str]): List of texts to add. number_of_docs (Optional[int]): Number of documents to return during vector search. Defaults to None. pat (Optional[str], optional): Personal access token. Defaults to None. token (Optional[str], optional): Session token. Defaults to None. metadatas (Optional[List[dict]]): Optional list of metadatas. Defaults to None. kwargs: Additional keyword arguments to be passed to the Search. Returns: Clarifai: Clarifai vectorstore. """clarifai_vector_db=cls(user_id=user_id,app_id=app_id,number_of_docs=number_of_docs,pat=pat,token=token,**kwargs,)clarifai_vector_db.add_texts(texts=texts,metadatas=metadatas)returnclarifai_vector_db
[docs]@classmethoddeffrom_documents(cls,documents:List[Document],embedding:Optional[Embeddings]=None,user_id:Optional[str]=None,app_id:Optional[str]=None,number_of_docs:Optional[int]=None,pat:Optional[str]=None,token:Optional[str]=None,**kwargs:Any,)->Clarifai:"""Create a Clarifai vectorstore from a list of documents. Args: user_id (str): User ID. app_id (str): App ID. documents (List[Document]): List of documents to add. number_of_docs (Optional[int]): Number of documents to return during vector search. Defaults to None. pat (Optional[str], optional): Personal access token. Defaults to None. token (Optional[str], optional): Session token. Defaults to None. kwargs: Additional keyword arguments to be passed to the Search. Returns: Clarifai: Clarifai vectorstore. """texts=[doc.page_contentfordocindocuments]metadatas=[doc.metadatafordocindocuments]returncls.from_texts(user_id=user_id,app_id=app_id,texts=texts,number_of_docs=number_of_docs,pat=pat,metadatas=metadatas,token=token,**kwargs,)