[docs]classPineconeEmbeddings(BaseModel,Embeddings):"""PineconeEmbeddings embedding model. Example: .. code-block:: python from langchain_pinecone import PineconeEmbeddings model = PineconeEmbeddings(model="multilingual-e5-large") """# Clients_client:PineconeClient=PrivateAttr(default=None)_async_client:Optional[PineconeAsyncioClient]=PrivateAttr(default=None)model:str"""Model to use for example 'multilingual-e5-large'."""# Configbatch_size:Optional[int]=None"""Batch size for embedding documents."""query_params:Dict=Field(default_factory=dict)"""Parameters for embedding query."""document_params:Dict=Field(default_factory=dict)"""Parameters for embedding document"""#dimension:Optional[int]=None#show_progress_bar:bool=Falsepinecone_api_key:SecretStr=Field(default_factory=secret_from_env("PINECONE_API_KEY",error_message="Pinecone API key not found. Please set the PINECONE_API_KEY ""environment variable or pass it via `pinecone_api_key`.",),alias="api_key",)"""Pinecone API key. If not provided, will look for the PINECONE_API_KEY environment variable."""model_config=ConfigDict(extra="forbid",populate_by_name=True,protected_namespaces=(),)@propertydefasync_client(self)->PineconeAsyncioClient:"""Lazily initialize the async client."""returnPineconeAsyncioClient(api_key=self.pinecone_api_key.get_secret_value(),source_tag="langchain")@model_validator(mode="before")@classmethoddefset_default_config(cls,values:dict)->Any:"""Set default configuration based on model."""default_config_map={"multilingual-e5-large":{"batch_size":96,"query_params":{"input_type":"query","truncation":"END"},"document_params":{"input_type":"passage","truncation":"END"},"dimension":1024,},}model=values.get("model")ifmodelindefault_config_map:config=default_config_map[model]forkey,valueinconfig.items():ifkeynotinvalues:values[key]=valuereturnvalues@model_validator(mode="after")defvalidate_environment(self)->Self:"""Validate that Pinecone version and credentials exist in environment."""api_key_str=self.pinecone_api_key.get_secret_value()client=PineconeClient(api_key=api_key_str,source_tag="langchain")self._client=client# Ensure async_client is lazily initializedreturnselfdef_get_batch_iterator(self,texts:List[str])->tuple[Iterable,int]:ifself.batch_sizeisNone:batch_size=DEFAULT_BATCH_SIZEelse:batch_size=self.batch_sizeifself.show_progress_bar:try:fromtqdm.autoimporttqdm# type: ignoreexceptImportErrorase:raiseImportError("Must have tqdm installed if `show_progress_bar` is set to True. ""Please install with `pip install tqdm`.")frome_iter=tqdm(range(0,len(texts),batch_size))else:_iter=range(0,len(texts),batch_size)return_iter,batch_size
[docs]classPineconeSparseEmbeddings(PineconeEmbeddings):"""PineconeSparseEmbeddings embedding model. Example: .. code-block:: python from langchain_pinecone import PineconeSparseEmbeddings model = PineconeSparseEmbeddings(model="pinecone-sparse-english-v0") """@model_validator(mode="before")@classmethoddefset_default_config(cls,values:dict)->Any:"""Set default configuration based on model."""default_config_map={"pinecone-sparse-english-v0":{"batch_size":96,"query_params":{"input_type":"query","truncation":"END"},"document_params":{"input_type":"passage","truncation":"END"},"dimension":None,},}model=values.get("model")ifmodelindefault_config_map:config=default_config_map[model]forkey,valueinconfig.items():ifkeynotinvalues:values[key]=valuereturnvalues
[docs]defembed_documents(self,texts:List[str])->List[SparseValues]:"""Embed search docs with sparse embeddings."""embeddings:List[SparseValues]=[]_iter,batch_size=self._get_batch_iterator(texts)foriin_iter:response=self._embed_texts(model=self.model,parameters=self.document_params,texts=texts[i:i+batch_size],)forrinresponse:embeddings.append(SparseValues(indices=r["sparse_indices"],values=r["sparse_values"]))returnembeddings
[docs]asyncdefaembed_documents(self,texts:List[str])->List[SparseValues]:"""Asynchronously embed search docs with sparse embeddings."""embeddings:List[SparseValues]=[]_iter,batch_size=self._get_batch_iterator(texts)foriin_iter:response=awaitself._aembed_texts(model=self.model,parameters=self.document_params,texts=texts[i:i+batch_size],)forrinresponse:embeddings.append(SparseValues(indices=r["sparse_indices"],values=r["sparse_values"]))returnembeddings
[docs]defembed_query(self,text:str)->SparseValues:"""Embed query text with sparse embeddings."""response=self._embed_texts(model=self.model,parameters=self.query_params,texts=[text])[0]returnSparseValues(indices=response["sparse_indices"],values=response["sparse_values"])
[docs]asyncdefaembed_query(self,text:str)->SparseValues:"""Asynchronously embed query text with sparse embeddings."""embeddings=awaitself._aembed_texts(model=self.model,parameters=self.query_params,texts=[text],)response=embeddings[0]returnSparseValues(indices=response["sparse_indices"],values=response["sparse_values"])