Source code for langchain_google_community.vertex_ai_search
"""Retriever wrapper for Google Vertex AI Search.Set the following environment variables before the tests:export PROJECT_ID=... - set to your Google Cloud project IDexport DATA_STORE_ID=... - the ID of the search engine to use for the test"""from__future__importannotationsimportjsonimportwarningsfromtypingimportTYPE_CHECKING,Any,Dict,List,Optional,Sequencefromgoogle.api_core.client_optionsimportClientOptionsfromgoogle.api_core.exceptionsimportInvalidArgumentfromgoogle.protobuf.json_formatimportMessageToDictfromlangchain_core.callbacksimportCallbackManagerForRetrieverRunfromlangchain_core.documentsimportDocumentfromlangchain_core.loadimportSerializable,loadfromlangchain_core.pydantic_v1importExtra,Field,root_validatorfromlangchain_core.retrieversimportBaseRetrieverfromlangchain_core.toolsimportBaseToolfromlangchain_core.utilsimportget_from_dict_or_envfromlangchain_google_community._utilsimportget_client_infoifTYPE_CHECKING:fromgoogle.cloud.discoveryengine_v1betaimport(# type: ignore[import, attr-defined]ConversationalSearchServiceClient,SearchRequest,SearchResult,SearchServiceClient,)def_load(dump:Dict[str,Any])->Any:returnload(dump,valid_namespaces=["langchain_google_community"])class_BaseVertexAISearchRetriever(Serializable):project_id:str"""Google Cloud Project ID."""data_store_id:str"""Vertex AI Search data store ID."""location_id:str="global""""Vertex AI Search data store location."""serving_config_id:str="default_config""""Vertex AI Search serving config ID."""credentials:Any=None"""The default custom credentials (google.auth.credentials.Credentials) to use when making API calls. If not provided, credentials will be ascertained from the environment."""engine_data_type:int=Field(default=0,ge=0,le=2)""" Defines the Vertex AI Search data type 0 - Unstructured data 1 - Structured data 2 - Website data """@classmethoddefis_lc_serializable(self)->bool:returnTruedef__reduce__(self)->Any:return_load,(self.to_json(),)@root_validator(pre=True)defvalidate_environment(cls,values:Dict)->Dict:"""Validates the environment."""try:fromgoogle.cloudimportdiscoveryengine_v1beta# noqa: F401exceptImportErrorasexc:raiseImportError("Could not import google-cloud-discoveryengine python package. ""Please, install vertexaisearch dependency group: ""poetry install --with vertexaisearch")fromexcvalues["project_id"]=get_from_dict_or_env(values,"project_id","PROJECT_ID")try:# For backwards compatibilitysearch_engine_id=get_from_dict_or_env(values,"search_engine_id","SEARCH_ENGINE_ID")ifsearch_engine_id:warnings.warn("The `search_engine_id` parameter is deprecated. Use `data_store_id` instead.",# noqa: E501DeprecationWarning,)values["data_store_id"]=search_engine_idexcept:# noqa: E722passvalues["data_store_id"]=get_from_dict_or_env(values,"data_store_id","DATA_STORE_ID")returnvalues@propertydefclient_options(self)->"ClientOptions":returnClientOptions(api_endpoint=(f"{self.location_id}-discoveryengine.googleapis.com"ifself.location_id!="global"elseNone))def_convert_structured_search_response(self,results:Sequence[SearchResult])->List[Document]:"""Converts a sequence of search results to a list of LangChain documents."""documents:List[Document]=[]forresultinresults:document_dict=MessageToDict(result.document._pb,preserving_proto_field_name=True)documents.append(Document(page_content=json.dumps(document_dict.get("struct_data",{})),metadata={"id":document_dict["id"],"name":document_dict["name"]},))returndocumentsdef_convert_unstructured_search_response(self,results:Sequence[SearchResult],chunk_type:str)->List[Document]:"""Converts a sequence of search results to a list of LangChain documents."""documents:List[Document]=[]forresultinresults:document_dict=MessageToDict(result.document._pb,preserving_proto_field_name=True)derived_struct_data=document_dict.get("derived_struct_data")ifnotderived_struct_data:continuedoc_metadata=document_dict.get("struct_data",{})doc_metadata["id"]=document_dict["id"]ifchunk_typenotinderived_struct_data:continueforchunkinderived_struct_data[chunk_type]:chunk_metadata=doc_metadata.copy()chunk_metadata["source"]=derived_struct_data.get("link","")if(chunk_type=="extractive_answers"orchunk_type=="extractive_segments"):chunk_metadata["source"]+=f":{chunk.get('pageNumber','')}"documents.append(Document(page_content=chunk.get("content",""),metadata=chunk_metadata))returndocumentsdef_convert_website_search_response(self,results:Sequence[SearchResult],chunk_type:str)->List[Document]:"""Converts a sequence of search results to a list of LangChain documents."""documents:List[Document]=[]forresultinresults:document_dict=MessageToDict(result.document._pb,preserving_proto_field_name=True)derived_struct_data=document_dict.get("derived_struct_data")ifnotderived_struct_data:continuedoc_metadata=document_dict.get("struct_data",{})doc_metadata["id"]=document_dict["id"]doc_metadata["source"]=derived_struct_data.get("link","")ifchunk_typenotinderived_struct_data:continuetext_field="snippet"ifchunk_type=="snippets"else"content"forchunkinderived_struct_data[chunk_type]:documents.append(Document(page_content=chunk.get(text_field,""),metadata=doc_metadata))ifnotdocuments:print(f"No {chunk_type} could be found.")# noqa: T201ifchunk_type=="extractive_answers":print(# noqa: T201"Make sure that your data store is using Advanced Website ""Indexing.\n""https://cloud.google.com/generative-ai-app-builder/docs/about-advanced-features#advanced-website-indexing"# noqa: E501)returndocuments
[docs]classVertexAISearchRetriever(BaseRetriever,_BaseVertexAISearchRetriever):"""`Google Vertex AI Search` retriever. For a detailed explanation of the Vertex AI Search concepts and configuration parameters, refer to the product documentation. https://cloud.google.com/generative-ai-app-builder/docs/enterprise-search-introduction """filter:Optional[str]=None"""Filter expression."""get_extractive_answers:bool=False"""If True return Extractive Answers, otherwise return Extractive Segments or Snippets."""# noqa: E501max_documents:int=Field(default=5,ge=1,le=100)"""The maximum number of documents to return."""max_extractive_answer_count:int=Field(default=1,ge=1,le=5)"""The maximum number of extractive answers returned in each search result. At most 5 answers will be returned for each SearchResult. """max_extractive_segment_count:int=Field(default=1,ge=1,le=1)"""The maximum number of extractive segments returned in each search result. Currently one segment will be returned for each SearchResult. """query_expansion_condition:int=Field(default=1,ge=0,le=2)"""Specification to determine under which conditions query expansion should occur. 0 - Unspecified query expansion condition. In this case, server behavior defaults to disabled 1 - Disabled query expansion. Only the exact search query is used, even if SearchResponse.total_size is zero. 2 - Automatic query expansion built by the Search API. """spell_correction_mode:int=Field(default=2,ge=0,le=2)"""Specification to determine under which conditions query expansion should occur. 0 - Unspecified spell correction mode. In this case, server behavior defaults to auto. 1 - Suggestion only. Search API will try to find a spell suggestion if there is any and put in the `SearchResponse.corrected_query`. The spell suggestion will not be used as the search query. 2 - Automatic spell correction built by the Search API. Search will be based on the corrected query if found. """boost_spec:Optional[Dict[Any,Any]]=None"""BoostSpec for boosting search results. A protobuf should be provided. https://cloud.google.com/generative-ai-app-builder/docs/boost-search-results https://cloud.google.com/generative-ai-app-builder/docs/reference/rest/v1beta/BoostSpec """_client:SearchServiceClient_serving_config:strclassConfig:"""Configuration for this pydantic object."""extra=Extra.forbidarbitrary_types_allowed=Trueunderscore_attrs_are_private=Truedef__init__(self,**kwargs:Any)->None:"""Initializes private fields."""try:fromgoogle.cloud.discoveryengine_v1betaimportSearchServiceClientexceptImportErrorasexc:raiseImportError("Could not import google-cloud-discoveryengine python package. ""Please, install vertexaisearch dependency group: ""`pip install langchain-google-community[vertexaisearch]`")fromexctry:super().__init__(**kwargs)exceptValueErrorase:print(f"Error initializing GoogleVertexAISearchRetriever: {str(e)}")raise# For more information, refer to:# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_storeself._client=SearchServiceClient(credentials=self.credentials,client_options=self.client_options,client_info=get_client_info(module="vertex-ai-search"),)self._serving_config=self._client.serving_config_path(project=self.project_id,location=self.location_id,data_store=self.data_store_id,serving_config=self.serving_config_id,)def_get_content_spec_kwargs(self)->Optional[Dict[str,Any]]:"""Prepares a ContentSpec object."""fromgoogle.cloud.discoveryengine_v1betaimportSearchRequestifself.engine_data_type==0:ifself.get_extractive_answers:extractive_content_spec=(SearchRequest.ContentSearchSpec.ExtractiveContentSpec(max_extractive_answer_count=self.max_extractive_answer_count,))else:extractive_content_spec=(SearchRequest.ContentSearchSpec.ExtractiveContentSpec(max_extractive_segment_count=self.max_extractive_segment_count,))content_search_spec=dict(extractive_content_spec=extractive_content_spec)elifself.engine_data_type==1:content_search_spec=Noneelifself.engine_data_type==2:content_search_spec=dict(extractive_content_spec=SearchRequest.ContentSearchSpec.ExtractiveContentSpec(max_extractive_answer_count=self.max_extractive_answer_count,),snippet_spec=SearchRequest.ContentSearchSpec.SnippetSpec(return_snippet=True),)else:raiseNotImplementedError("Only data store type 0 (Unstructured), 1 (Structured),""or 2 (Website) are supported currently."+f" Got {self.engine_data_type}")returncontent_search_specdef_create_search_request(self,query:str)->SearchRequest:"""Prepares a SearchRequest object."""fromgoogle.cloud.discoveryengine_v1betaimportSearchRequestquery_expansion_spec=SearchRequest.QueryExpansionSpec(condition=self.query_expansion_condition,)spell_correction_spec=SearchRequest.SpellCorrectionSpec(mode=self.spell_correction_mode)content_search_spec_kwargs=self._get_content_spec_kwargs()ifcontent_search_spec_kwargsisnotNone:content_search_spec=SearchRequest.ContentSearchSpec(**content_search_spec_kwargs)else:content_search_spec=NonereturnSearchRequest(query=query,filter=self.filter,serving_config=self._serving_config,page_size=self.max_documents,content_search_spec=content_search_spec,query_expansion_spec=query_expansion_spec,spell_correction_spec=spell_correction_spec,boost_spec=SearchRequest.BoostSpec(**self.boost_spec)ifself.boost_specelseNone,)def_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:"""Get documents relevant for a query."""search_request=self._create_search_request(query)try:response=self._client.search(search_request)exceptInvalidArgumentasexc:raisetype(exc)(exc.message+" This might be due to engine_data_type not set correctly.")ifself.engine_data_type==0:chunk_type=("extractive_answers"ifself.get_extractive_answerselse"extractive_segments")documents=self._convert_unstructured_search_response(response.results,chunk_type)elifself.engine_data_type==1:documents=self._convert_structured_search_response(response.results)elifself.engine_data_type==2:chunk_type=("extractive_answers"ifself.get_extractive_answerselse"snippets")documents=self._convert_website_search_response(response.results,chunk_type)else:raiseNotImplementedError("Only data store type 0 (Unstructured), 1 (Structured),""or 2 (Website) are supported currently."+f" Got {self.engine_data_type}")returndocuments
[docs]classVertexAIMultiTurnSearchRetriever(BaseRetriever,_BaseVertexAISearchRetriever):"""`Google Vertex AI Search` retriever for multi-turn conversations."""conversation_id:str="-""""Vertex AI Search Conversation ID."""_client:ConversationalSearchServiceClient_serving_config:strclassConfig:"""Configuration for this pydantic object."""extra=Extra.ignorearbitrary_types_allowed=Trueunderscore_attrs_are_private=Truedef__init__(self,**kwargs:Any):super().__init__(**kwargs)fromgoogle.cloud.discoveryengine_v1betaimport(ConversationalSearchServiceClient,)self._client=ConversationalSearchServiceClient(credentials=self.credentials,client_options=self.client_options,client_info=get_client_info(module="vertex-ai-search"),)self._serving_config=self._client.serving_config_path(project=self.project_id,location=self.location_id,data_store=self.data_store_id,serving_config=self.serving_config_id,)ifself.engine_data_type==1:raiseNotImplementedError("Data store type 1 (Structured)""is not currently supported for multi-turn search."+f" Got {self.engine_data_type}")def_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:"""Get documents relevant for a query."""fromgoogle.cloud.discoveryengine_v1betaimport(ConverseConversationRequest,TextInput,)request=ConverseConversationRequest(name=self._client.conversation_path(self.project_id,self.location_id,self.data_store_id,self.conversation_id,),serving_config=self._serving_config,query=TextInput(input=query),)response=self._client.converse_conversation(request)ifself.engine_data_type==2:returnself._convert_website_search_response(response.search_results,"extractive_answers")returnself._convert_unstructured_search_response(response.search_results,"extractive_answers")
[docs]classVertexAISearchSummaryTool(BaseTool,VertexAISearchRetriever):"""Class that exposes a tool to interface with an App in Vertex Search and Conversation and get the summary of the documents retrieved. """summary_prompt:Optional[str]=None"""Prompt for the summarization agent"""summary_result_count:int=3""" Number of documents to include in the summary"""summary_include_citations:bool=True""" Whether to include citations in the summary """summary_spec_kwargs:Dict[str,Any]=Field(default_factory=dict)""" Additional kwargs for `SearchRequest.ContentSearchSpec.SummarySpec`"""classConfig(VertexAISearchRetriever.Config):"""Redefinition to specify that inherits config from `VertexAISearchRetriever` not BaseTool """def_get_content_spec_kwargs(self)->Optional[Dict[str,Any]]:"""Adds additional summary_spec parameters to the configuration of the search. Returns: kwargs for the specification of the content. """fromgoogle.cloud.discoveryengine_v1betaimportSearchRequestkwargs=super()._get_content_spec_kwargs()or{}kwargs["summary_spec"]=SearchRequest.ContentSearchSpec.SummarySpec(summary_result_count=self.summary_result_count,include_citations=self.summary_include_citations,model_prompt_spec=SearchRequest.ContentSearchSpec.SummarySpec.ModelPromptSpec(preamble=self.summary_prompt),**self.summary_spec_kwargs,)returnkwargsdef_run(self,user_query:str)->str:"""Runs the tool. Args: search_query: The query to run by the agent. Returns: The response from the agent. """request=self._create_search_request(user_query)response=self._client.search(request)returnresponse.summary.summary_text