Source code for langchain_community.retrievers.google_vertex_ai_search
"""Retriever wrapper for Google Vertex AI Search."""from__future__importannotationsfromtypingimportTYPE_CHECKING,Any,Dict,List,Optional,Sequence,Tuplefromlangchain_core._api.deprecationimportdeprecatedfromlangchain_core.callbacksimportCallbackManagerForRetrieverRunfromlangchain_core.documentsimportDocumentfromlangchain_core.retrieversimportBaseRetrieverfromlangchain_core.utilsimportget_from_dict_or_envfrompydanticimportBaseModel,ConfigDict,Field,model_validatorfromlangchain_community.utilities.vertexaiimportget_client_infoifTYPE_CHECKING:fromgoogle.api_core.client_optionsimportClientOptionsfromgoogle.cloud.discoveryengine_v1betaimportSearchRequest,SearchResultclass_BaseGoogleVertexAISearchRetriever(BaseModel):project_id:str"""Google Cloud Project ID."""data_store_id:Optional[str]=None"""Vertex AI Search data store ID."""search_engine_id:Optional[str]=None"""Vertex AI Search app ID."""location_id:str="global""""Vertex AI Search data store location."""serving_config_id:str="default_config""""Vertex AI Search serving config ID."""credentials:Any=None"""The default custom credentials (google.auth.credentials.Credentials) to use when making API calls. If not provided, credentials will be ascertained from the environment."""engine_data_type:int=Field(default=0,ge=0,le=3)""" Defines the Vertex AI Search app data type 0 - Unstructured data 1 - Structured data 2 - Website data 3 - Blended search """@model_validator(mode="before")@classmethoddefvalidate_environment(cls,values:Dict)->Any:"""Validates the environment."""try:fromgoogle.cloudimportdiscoveryengine_v1beta# noqa: F401exceptImportErrorasexc:raiseImportError("google.cloud.discoveryengine is not installed.""Please install it with pip install ""google-cloud-discoveryengine>=0.11.10")fromexctry:fromgoogle.api_core.exceptionsimportInvalidArgument# noqa: F401exceptImportErrorasexc:raiseImportError("google.api_core.exceptions is not installed. ""Please install it with pip install google-api-core")fromexcvalues["project_id"]=get_from_dict_or_env(values,"project_id","PROJECT_ID")try:values["data_store_id"]=get_from_dict_or_env(values,"data_store_id","DATA_STORE_ID")values["search_engine_id"]=get_from_dict_or_env(values,"search_engine_id","SEARCH_ENGINE_ID")exceptException:passreturnvalues@propertydefclient_options(self)->"ClientOptions":fromgoogle.api_core.client_optionsimportClientOptionsreturnClientOptions(api_endpoint=(f"{self.location_id}-discoveryengine.googleapis.com"ifself.location_id!="global"elseNone))def_convert_structured_search_response(self,results:Sequence[SearchResult])->List[Document]:"""Converts a sequence of search results to a list of LangChain documents."""importjsonfromgoogle.protobuf.json_formatimportMessageToDictdocuments:List[Document]=[]forresultinresults:document_dict=MessageToDict(result.document._pb,preserving_proto_field_name=True)documents.append(Document(page_content=json.dumps(document_dict.get("struct_data",{})),metadata={"id":document_dict["id"],"name":document_dict["name"]},))returndocumentsdef_convert_unstructured_search_response(self,results:Sequence[SearchResult],chunk_type:str)->List[Document]:"""Converts a sequence of search results to a list of LangChain documents."""fromgoogle.protobuf.json_formatimportMessageToDictdocuments:List[Document]=[]forresultinresults:document_dict=MessageToDict(result.document._pb,preserving_proto_field_name=True)derived_struct_data=document_dict.get("derived_struct_data")ifnotderived_struct_data:continuedoc_metadata=document_dict.get("struct_data",{})doc_metadata["id"]=document_dict["id"]ifchunk_typenotinderived_struct_data:continueforchunkinderived_struct_data[chunk_type]:chunk_metadata=doc_metadata.copy()chunk_metadata["source"]=derived_struct_data.get("link","")ifchunk_type=="extractive_answers":chunk_metadata["source"]+=f":{chunk.get('pageNumber','')}"documents.append(Document(page_content=chunk.get("content",""),metadata=chunk_metadata))returndocumentsdef_convert_website_search_response(self,results:Sequence[SearchResult],chunk_type:str)->List[Document]:"""Converts a sequence of search results to a list of LangChain documents."""fromgoogle.protobuf.json_formatimportMessageToDictdocuments:List[Document]=[]forresultinresults:document_dict=MessageToDict(result.document._pb,preserving_proto_field_name=True)derived_struct_data=document_dict.get("derived_struct_data")ifnotderived_struct_data:continuedoc_metadata=document_dict.get("struct_data",{})doc_metadata["id"]=document_dict["id"]doc_metadata["source"]=derived_struct_data.get("link","")ifchunk_typenotinderived_struct_data:continuetext_field="snippet"ifchunk_type=="snippets"else"content"forchunkinderived_struct_data[chunk_type]:documents.append(Document(page_content=chunk.get(text_field,""),metadata=doc_metadata))ifnotdocuments:print(f"No {chunk_type} could be found.")# noqa: T201ifchunk_type=="extractive_answers":print(# noqa: T201"Make sure that your data store is using Advanced Website ""Indexing.\n""https://cloud.google.com/generative-ai-app-builder/docs/about-advanced-features#advanced-website-indexing")returndocuments
[docs]@deprecated(since="0.0.33",removal="1.0",alternative_import="langchain_google_community.VertexAISearchRetriever",)classGoogleVertexAISearchRetriever(BaseRetriever,_BaseGoogleVertexAISearchRetriever):"""`Google Vertex AI Search` retriever. For a detailed explanation of the Vertex AI Search concepts and configuration parameters, refer to the product documentation. https://cloud.google.com/generative-ai-app-builder/docs/enterprise-search-introduction """filter:Optional[str]=None"""Filter expression."""get_extractive_answers:bool=False"""If True return Extractive Answers, otherwise return Extractive Segments or Snippets."""# noqa: E501max_documents:int=Field(default=5,ge=1,le=100)"""The maximum number of documents to return."""max_extractive_answer_count:int=Field(default=1,ge=1,le=5)"""The maximum number of extractive answers returned in each search result. At most 5 answers will be returned for each SearchResult. """max_extractive_segment_count:int=Field(default=1,ge=1,le=1)"""The maximum number of extractive segments returned in each search result. Currently one segment will be returned for each SearchResult. """query_expansion_condition:int=Field(default=1,ge=0,le=2)"""Specification to determine under which conditions query expansion should occur. 0 - Unspecified query expansion condition. In this case, server behavior defaults to disabled 1 - Disabled query expansion. Only the exact search query is used, even if SearchResponse.total_size is zero. 2 - Automatic query expansion built by the Search API. """spell_correction_mode:int=Field(default=2,ge=0,le=2)"""Specification to determine under which conditions query expansion should occur. 0 - Unspecified spell correction mode. In this case, server behavior defaults to auto. 1 - Suggestion only. Search API will try to find a spell suggestion if there is any and put in the `SearchResponse.corrected_query`. The spell suggestion will not be used as the search query. 2 - Automatic spell correction built by the Search API. Search will be based on the corrected query if found. """# type is SearchServiceClient but can't be set due to optional imports_client:Any=None_serving_config:strmodel_config=ConfigDict(arbitrary_types_allowed=True,extra="ignore",)def__init__(self,**kwargs:Any)->None:"""Initializes private fields."""try:fromgoogle.cloud.discoveryengine_v1betaimportSearchServiceClientexceptImportErrorasexc:raiseImportError("google.cloud.discoveryengine is not installed.""Please install it with pip install google-cloud-discoveryengine")fromexcsuper().__init__(**kwargs)# For more information, refer to:# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_storeself._client=SearchServiceClient(credentials=self.credentials,client_options=self.client_options,client_info=get_client_info(module="vertex-ai-search"),)ifself.engine_data_type==3andnotself.search_engine_id:raiseValueError("search_engine_id must be specified for blended search apps.")ifself.search_engine_id:self._serving_config=f"projects/{self.project_id}/locations/{self.location_id}/collections/default_collection/engines/{self.search_engine_id}/servingConfigs/default_config"# noqa: E501elifself.data_store_id:self._serving_config=self._client.serving_config_path(project=self.project_id,location=self.location_id,data_store=self.data_store_id,serving_config=self.serving_config_id,)else:raiseValueError("Either data_store_id or search_engine_id must be specified.")def_create_search_request(self,query:str)->SearchRequest:"""Prepares a SearchRequest object."""fromgoogle.cloud.discoveryengine_v1betaimportSearchRequestquery_expansion_spec=SearchRequest.QueryExpansionSpec(condition=self.query_expansion_condition,)spell_correction_spec=SearchRequest.SpellCorrectionSpec(mode=self.spell_correction_mode)ifself.engine_data_type==0:ifself.get_extractive_answers:extractive_content_spec=(SearchRequest.ContentSearchSpec.ExtractiveContentSpec(max_extractive_answer_count=self.max_extractive_answer_count,))else:extractive_content_spec=(SearchRequest.ContentSearchSpec.ExtractiveContentSpec(max_extractive_segment_count=self.max_extractive_segment_count,))content_search_spec=SearchRequest.ContentSearchSpec(extractive_content_spec=extractive_content_spec)elifself.engine_data_type==1:content_search_spec=Noneelifself.engine_data_typein(2,3):content_search_spec=SearchRequest.ContentSearchSpec(extractive_content_spec=SearchRequest.ContentSearchSpec.ExtractiveContentSpec(max_extractive_answer_count=self.max_extractive_answer_count,),snippet_spec=SearchRequest.ContentSearchSpec.SnippetSpec(return_snippet=True),)else:raiseNotImplementedError("Only data store type 0 (Unstructured), 1 (Structured),""2 (Website), or 3 (Blended) are supported currently."+f" Got {self.engine_data_type}")returnSearchRequest(query=query,filter=self.filter,serving_config=self._serving_config,page_size=self.max_documents,content_search_spec=content_search_spec,query_expansion_spec=query_expansion_spec,spell_correction_spec=spell_correction_spec,)def_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:"""Get documents relevant for a query."""returnself.get_relevant_documents_with_response(query)[0]
[docs]defget_relevant_documents_with_response(self,query:str)->Tuple[List[Document],Any]:fromgoogle.api_core.exceptionsimportInvalidArgumentsearch_request=self._create_search_request(query)try:response=self._client.search(search_request)exceptInvalidArgumentasexc:raisetype(exc)(exc.message+" This might be due to engine_data_type not set correctly.")ifself.engine_data_type==0:chunk_type=("extractive_answers"ifself.get_extractive_answerselse"extractive_segments")documents=self._convert_unstructured_search_response(response.results,chunk_type)elifself.engine_data_type==1:documents=self._convert_structured_search_response(response.results)elifself.engine_data_typein(2,3):chunk_type=("extractive_answers"ifself.get_extractive_answerselse"snippets")documents=self._convert_website_search_response(response.results,chunk_type)else:raiseNotImplementedError("Only data store type 0 (Unstructured), 1 (Structured),""2 (Website), or 3 (Blended) are supported currently."+f" Got {self.engine_data_type}")returndocuments,response
[docs]@deprecated(since="0.0.33",removal="1.0",alternative_import="langchain_google_community.VertexAIMultiTurnSearchRetriever",)classGoogleVertexAIMultiTurnSearchRetriever(BaseRetriever,_BaseGoogleVertexAISearchRetriever):"""`Google Vertex AI Search` retriever for multi-turn conversations."""conversation_id:str="-""""Vertex AI Search Conversation ID."""# type is ConversationalSearchServiceClient but can't be set due to optional imports_client:Any=None_serving_config:strmodel_config=ConfigDict(arbitrary_types_allowed=True,extra="ignore",)def__init__(self,**kwargs:Any):super().__init__(**kwargs)fromgoogle.cloud.discoveryengine_v1betaimport(ConversationalSearchServiceClient,)self._client=ConversationalSearchServiceClient(credentials=self.credentials,client_options=self.client_options,client_info=get_client_info(module="vertex-ai-search"),)ifnotself.data_store_id:raiseValueError("data_store_id is required for MultiTurnSearchRetriever.")self._serving_config=self._client.serving_config_path(project=self.project_id,location=self.location_id,data_store=self.data_store_id,serving_config=self.serving_config_id,)ifself.engine_data_type==1orself.engine_data_type==3:raiseNotImplementedError("Data store type 1 (Structured) and 3 (Blended)""is not currently supported for multi-turn search."+f" Got {self.engine_data_type}")def_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:"""Get documents relevant for a query."""fromgoogle.cloud.discoveryengine_v1betaimport(ConverseConversationRequest,TextInput,)request=ConverseConversationRequest(name=self._client.conversation_path(self.project_id,self.location_id,self.data_store_id,self.conversation_id,),serving_config=self._serving_config,query=TextInput(input=query),)response=self._client.converse_conversation(request)ifself.engine_data_type==2:returnself._convert_website_search_response(response.search_results,"extractive_answers")returnself._convert_unstructured_search_response(response.search_results,"extractive_answers")
[docs]classGoogleCloudEnterpriseSearchRetriever(GoogleVertexAISearchRetriever):"""`Google Vertex Search API` retriever alias for backwards compatibility. DEPRECATED: Use `GoogleVertexAISearchRetriever` instead. """def__init__(self,**data:Any):importwarningswarnings.warn("GoogleCloudEnterpriseSearchRetriever is deprecated, use GoogleVertexAISearchRetriever",# noqa: E501DeprecationWarning,)super().__init__(**data)