Source code for langchain_google_vertexai.model_garden_maas.llama
from__future__importannotationsimportjsonimportuuidfromtypingimport(Any,AsyncIterator,Callable,Dict,Iterator,List,Literal,Optional,Sequence,Type,Union,cast,overload,)fromlangchain_core.callbacks.managerimport(AsyncCallbackManagerForLLMRun,CallbackManagerForLLMRun,)fromlangchain_core.language_modelsimportLanguageModelInputfromlangchain_core.language_models.chat_modelsimport(BaseChatModel,agenerate_from_stream,generate_from_stream,)fromlangchain_core.messagesimport(AIMessage,AIMessageChunk,BaseMessage,HumanMessage,SystemMessage,ToolMessage,)fromlangchain_core.messages.toolimporttool_callascreate_tool_callfromlangchain_core.messages.toolimporttool_call_chunkfromlangchain_core.outputsimport(ChatGeneration,ChatGenerationChunk,ChatResult,)fromlangchain_core.runnablesimportRunnablefromlangchain_core.toolsimportBaseToolfromlangchain_core.utils.function_callingimport(convert_to_openai_function,)fromlangchain_google_vertexai.model_garden_maas._baseimport(_BaseVertexMaasModelGarden,acompletion_with_retry,completion_with_retry,)@overloaddef_parse_response_candidate_llama(response_candidate:Dict[str,str],streaming:Literal[False]=False)->AIMessage:...@overloaddef_parse_response_candidate_llama(response_candidate:Dict[str,str],streaming:Literal[True])->AIMessageChunk:...def_parse_response_candidate_llama(response_candidate:Dict[str,str],streaming:bool=False)->AIMessage:content=response_candidate["content"]role=response_candidate["role"]ifrole!="assistant":raiseValueError(f"Role in response is {role}, expected 'assistant'!")tool_calls=[]tool_call_chunks=[]response_json=Nonetry:response_json=json.loads(response_candidate["content"])exceptValueError:passifresponse_jsonand"name"inresponse_json:function_name=response_json["name"]function_args=response_json.get("parameters",None)ifstreaming:tool_call_chunks.append(tool_call_chunk(name=function_name,args=function_args,id=str(uuid.uuid4())))else:tool_calls.append(create_tool_call(name=function_name,args=function_args,id=str(uuid.uuid4())))content=""ifstreaming:returnAIMessageChunk(content=content,tool_call_chunks=tool_call_chunks,)returnAIMessage(content=content,tool_calls=tool_calls,)
[docs]classVertexModelGardenLlama(_BaseVertexMaasModelGarden,BaseChatModel):# type: ignore[misc]"""Integration for Llama 3.1 on Google Cloud Vertex AI Model-as-a-Service. For more information, see: https://cloud.google.com/blog/products/ai-machine-learning/llama-3-1-on-vertex-ai Setup: You need to enable a corresponding MaaS model (Google Cloud UI console -> Vertex AI -> Model Garden -> search for a model you need and click enable) You must have the langchain-google-vertexai Python package installed .. code-block:: bash pip install -U langchain-google-vertexai And either: - Have credentials configured for your environment (gcloud, workload identity, etc...) - Store the path to a service account JSON file as the GOOGLE_APPLICATION_CREDENTIALS environment variable This codebase uses the google.auth library which first looks for the application credentials variable mentioned above, and then looks for system-level auth. For more information, see: https://cloud.google.com/docs/authentication/application-default-credentials#GAC and https://googleapis.dev/python/google-auth/latest/reference/google.auth.html#module-google.auth. Key init args — completion params: model: str Name of VertexMaaS model to use ("meta/llama3-405b-instruct-maas") append_tools_to_system_message: bool Whether to append tools to a system message Key init args — client params: credentials: Optional[google.auth.credentials.Credentials] The default custom credentials to use when making API calls. If not provided, credentials will be ascertained from the environment. project: Optional[str] The default GCP project to use when making Vertex API calls. location: str = "us-central1" The default location to use when making API calls. See full list of supported init args and their descriptions in the params section. Instantiate: .. code-block:: python from langchain_google_vertexai import VertexMaaS llm = VertexModelGardenLlama( model="meta/llama3-405b-instruct-maas", # other params... ) Invoke: .. code-block:: python messages = [ ("system", "You are a helpful translator. Translate the user sentence to French."), ("human", "I love programming."), ] llm.invoke(messages) .. code-block:: python AIMessage(content="J'adore programmer. \n", id='run-925ce305-2268-44c4-875f-dde9128520ad-0') Stream: .. code-block:: python for chunk in llm.stream(messages): print(chunk) .. code-block:: python AIMessageChunk(content='J', id='run-9df01d73-84d9-42db-9d6b-b1466a019e89') AIMessageChunk(content="'adore programmer. \n", id='run-9df01d73-84d9-42db-9d6b-b1466a019e89') AIMessageChunk(content='', id='run-9df01d73-84d9-42db-9d6b-b1466a019e89') .. code-block:: python stream = llm.stream(messages) full = next(stream) for chunk in stream: full += chunk full .. code-block:: python AIMessageChunk(content="J'adore programmer. \n", id='run-b7f7492c-4cb5-42d0-8fc3-dce9b293b0fb') """# noqa: E501def_convert_messages(self,messages:List[BaseMessage],tools:Optional[List[BaseTool]]=None)->List[Dict[str,Any]]:converted_messages:List[Dict[str,Any]]=[]iftoolsandnotself.append_tools_to_system_message:raiseValueError("If providing tools, either format system message yourself or ""append_tools_to_system_message to True!")eliftools:tools_str="\n".join([json.dumps(convert_to_openai_function(t))fortintools])formatted_system_message=("You are an assistant with access to the following tools:\n\n"f"{tools_str}\n\n""If you decide to use a tool, please respond with a JSON for a ""function call with its proper arguments that best answers the ""given prompt.\nRespond in the format "'{"name": function name, "parameters": dictionary '"of argument name and its value}. Do not use variables.\n""Do not provide any additional comments when calling a tool.\n""Do not mention tools to the user when preparing the final answer.")message=messages[0]ifnotisinstance(message,SystemMessage):converted_messages.append({"role":"system","content":formatted_system_message})else:converted_messages.append({"role":"system","content":str(message.content)+"\n"+formatted_system_message,})fori,messageinenumerate(messages):iftoolsandisinstance(message,SystemMessage)andi==0:continueifisinstance(message,AIMessage):converted_messages.append({"role":"assistant","content":message.content})elifisinstance(message,HumanMessage):converted_messages.append({"role":"user","content":message.content})elifisinstance(message,SystemMessage):converted_messages.append({"role":"system","content":message.content})elifisinstance(message,ToolMessage):# we also need to format a previous message if we got a tool resultprev_message=messages[i-1]ifnotisinstance(prev_message,AIMessage):raiseValueError("ToolMessage should follow AIMessage only!")_=converted_messages[-1].pop("content",None)tool_calls=[]fortool_callinprev_message.tool_calls:tool_calls.append({"type":"function","id":tool_call["id"],"function":{"name":tool_call["name"],"arguments":json.dumps(tool_call.get("args",{})),},})converted_messages[-1]["tool_calls"]=tool_callsiflen(tool_calls)>1:raiseValueError("Only a single function call per turn is supported!")converted_messages.append({"role":"tool","name":message.name,"content":message.content,"tool_call_id":message.tool_call_id,})else:raiseValueError(f"Message type {type(message)} is not yet supported!")returnconverted_messagesdef_generate(self,messages:List[BaseMessage],stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,stream:Optional[bool]=None,*,tools:Optional[List[BaseTool]]=None,**kwargs:Any,)->ChatResult:"""Generate next turn in the conversation. Args: messages: The history of the conversation as a list of messages. Code chat does not support context. stop: The list of stop words (optional). run_manager: The CallbackManager for LLM run, it's not used at the moment. stream: Whether to use the streaming endpoint. Returns: The ChatResult that contains outputs generated by the model. Raises: ValueError: if the last message in the list is not from human. """ifstreamisTrue:returngenerate_from_stream(self._stream(messages,stop=stop,run_manager=run_manager,tools=tools,**kwargs,))converted_messages=self._convert_messages(messages,tools=tools)response=completion_with_retry(self,messages=converted_messages,**kwargs)returnself._create_chat_result(response)asyncdef_agenerate(self,messages:List[BaseMessage],stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,stream:Optional[bool]=None,*,tools:Optional[List[BaseTool]]=None,**kwargs:Any,)->ChatResult:ifstream:stream_iter=self._astream(messages=messages,stop=stop,run_manager=run_manager,**kwargs)returnawaitagenerate_from_stream(stream_iter)converted_messages=self._convert_messages(messages,tools=tools)response=awaitacompletion_with_retry(self,messages=converted_messages,run_manager=run_manager,**kwargs)returnself._create_chat_result(response)def_create_chat_result(self,response:Dict)->ChatResult:generations=[]token_usage=response.get("usage",{})forcandidateinresponse["choices"]:finish_reason=response.get("finish_reason")message=_parse_response_candidate_llama(candidate["message"])iftoken_usageandisinstance(message,AIMessage):message.usage_metadata={"input_tokens":token_usage.get("prompt_tokens",0),"output_tokens":token_usage.get("completion_tokens",0),"total_tokens":token_usage.get("total_tokens",0),}gen=ChatGeneration(message=message,generation_info={"finish_reason":finish_reason},)generations.append(gen)llm_output={"token_usage":token_usage,"model":self.model_name}returnChatResult(generations=generations,llm_output=llm_output)@propertydef_llm_type(self)->str:"""Return type of chat model."""return"vertexai_model_garden_maas_llama"def_parse_chunk(self,chunk:Dict)->AIMessageChunk:chunk_delta=chunk["choices"][0]["delta"]content=chunk_delta.get("content","")ifchunk_delta.get("role")!="assistant":raiseValueError(f"Got chunk with non-assistant role: {chunk_delta}")additional_kwargs={}ifraw_tool_calls:=chunk_delta.get("tool_calls"):additional_kwargs["tool_calls"]=raw_tool_callstry:tool_call_chunks=[]forraw_tool_callinraw_tool_calls:ifnotraw_tool_call.get("index")andnotraw_tool_call.get("id"):tool_call_id=str(uuid.uuid4())else:tool_call_id=raw_tool_call.get("id")tool_call_chunks.append(tool_call_chunk(name=raw_tool_call["function"].get("name"),args=raw_tool_call["function"].get("arguments"),id=tool_call_id,index=raw_tool_call.get("index"),))exceptKeyError:passelse:tool_call_chunks=[]iftoken_usage:=chunk.get("usage"):usage_metadata={"input_tokens":token_usage.get("prompt_tokens",0),"output_tokens":token_usage.get("completion_tokens",0),"total_tokens":token_usage.get("total_tokens",0),}else:usage_metadata=NonereturnAIMessageChunk(content=content,additional_kwargs=additional_kwargs,tool_call_chunks=tool_call_chunks,usage_metadata=usage_metadata,# type: ignore[arg-type])def_stream(self,messages:List[BaseMessage],stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,*,tools:Optional[List[BaseTool]]=None,**kwargs:Any,)->Iterator[ChatGenerationChunk]:converted_messages=self._convert_messages(messages,tools=tools)params={**kwargs,"stream":True,"headers_content_type":"text/event-stream"}forchunkincompletion_with_retry(self,messages=converted_messages,run_manager=run_manager,**params):iflen(chunk["choices"])==0:continuemessage=self._parse_chunk(chunk)gen_chunk=ChatGenerationChunk(message=message)ifrun_manager:run_manager.on_llm_new_token(token=cast(str,message.content),chunk=gen_chunk)yieldgen_chunkasyncdef_astream(self,messages:List[BaseMessage],stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,*,tools:Optional[List[BaseTool]]=None,**kwargs:Any,)->AsyncIterator[ChatGenerationChunk]:converted_messages=self._convert_messages(messages,tools=tools)params={**kwargs,"stream":True,"headers_content_type":"text/event-stream"}asyncforchunkinawaitacompletion_with_retry(self,messages=converted_messages,run_manager=run_manager,**params):iflen(chunk["choices"])==0:continuemessage=self._parse_chunk(chunk)gen_chunk=ChatGenerationChunk(message=message)ifrun_manager:awaitrun_manager.on_llm_new_token(token=cast(str,message.content),chunk=gen_chunk)yieldgen_chunk
[docs]defbind_tools(self,tools:Sequence[Union[Dict[str,Any],Type,Callable,BaseTool]],**kwargs:Any,)->Runnable[LanguageModelInput,BaseMessage]:"""Bind tool-like objects to this chat model."""formatted_tools=[convert_to_openai_function(tool)fortoolintools]returnsuper().bind(tools=formatted_tools,**kwargs)