[docs]classWatsonxLLM(BaseLLM):""" IBM watsonx.ai large language models. To use the large language models, you need to have the ``langchain_ibm`` python package installed, and the environment variable ``WATSONX_APIKEY`` set with your API key or pass it as a named parameter to the constructor. Example: .. code-block:: python from ibm_watsonx_ai.metanames import GenTextParamsMetaNames parameters = { GenTextParamsMetaNames.DECODING_METHOD: "sample", GenTextParamsMetaNames.MAX_NEW_TOKENS: 100, GenTextParamsMetaNames.MIN_NEW_TOKENS: 1, GenTextParamsMetaNames.TEMPERATURE: 0.5, GenTextParamsMetaNames.TOP_K: 50, GenTextParamsMetaNames.TOP_P: 1, } from langchain_ibm import WatsonxLLM watsonx_llm = WatsonxLLM( model_id="google/flan-ul2", url="https://us-south.ml.cloud.ibm.com", apikey="*****", project_id="*****", params=parameters, ) """model_id:Optional[str]=None"""Type of model to use."""model:Optional[str]=None""" Name or alias of the foundation model to use. When using IBM’s watsonx.ai Model Gateway (public preview), you can specify any supported third-party model—OpenAI, Anthropic, NVIDIA, Cerebras, or IBM’s own Granite series—via a single, OpenAI-compatible interface. Models must be explicitly provisioned (opt-in) through the Gateway to ensure secure, vendor-agnostic access and easy switch-over without reconfiguration. For more details on configuration and usage, see IBM watsonx Model Gateway docs: https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/fm-model-gateway.html?context=wx&audience=wdp """deployment_id:Optional[str]=None"""Type of deployed model to use."""project_id:Optional[str]=None"""ID of the Watson Studio project."""space_id:Optional[str]=None"""ID of the Watson Studio space."""url:SecretStr=Field(alias="url",default_factory=secret_from_env("WATSONX_URL",default=None),# type: ignore[assignment])"""URL to the Watson Machine Learning or CPD instance."""apikey:Optional[SecretStr]=Field(alias="apikey",default_factory=secret_from_env("WATSONX_APIKEY",default=None))"""API key to the Watson Machine Learning or CPD instance."""token:Optional[SecretStr]=Field(alias="token",default_factory=secret_from_env("WATSONX_TOKEN",default=None))"""Token to the CPD instance."""password:Optional[SecretStr]=Field(alias="password",default_factory=secret_from_env("WATSONX_PASSWORD",default=None),)"""Password to the CPD instance."""username:Optional[SecretStr]=Field(alias="username",default_factory=secret_from_env("WATSONX_USERNAME",default=None),)"""Username to the CPD instance."""instance_id:Optional[SecretStr]=Field(alias="instance_id",default_factory=secret_from_env("WATSONX_INSTANCE_ID",default=None),)"""Instance_id of the CPD instance."""version:Optional[SecretStr]=None"""Version of the CPD instance."""params:Optional[dict]=None"""Model parameters to use during request generation."""verify:Union[str,bool,None]=None"""You can pass one of following as verify: * the path to a CA_BUNDLE file * the path of directory with certificates of trusted CAs * True - default path to truststore will be taken * False - no verification will be made"""streaming:bool=False""" Whether to stream the results or not. """watsonx_model:ModelInference=Field(default=None,exclude=True)#: :meta private:watsonx_model_gateway:Gateway=Field(default=None,exclude=True)#: :meta private:watsonx_client:Optional[APIClient]=Field(default=None)model_config=ConfigDict(extra="forbid",)@classmethoddefis_lc_serializable(cls)->bool:returnFalse@propertydeflc_secrets(self)->Dict[str,str]:"""A map of constructor argument names for secret IDs. For example: { "url": "WATSONX_URL", "apikey": "WATSONX_APIKEY", "token": "WATSONX_TOKEN", "password": "WATSONX_PASSWORD", "username": "WATSONX_USERNAME", "instance_id": "WATSONX_INSTANCE_ID", } """return{"url":"WATSONX_URL","apikey":"WATSONX_APIKEY","token":"WATSONX_TOKEN","password":"WATSONX_PASSWORD","username":"WATSONX_USERNAME","instance_id":"WATSONX_INSTANCE_ID",}@model_validator(mode="after")defvalidate_environment(self)->Self:"""Validate that credentials and python package exists in environment."""ifself.watsonx_model_gatewayisnotNone:raiseNotImplementedError("Passing the 'watsonx_model_gateway' parameter to the WatsonxLLM ""constructor is not supported yet.")ifisinstance(self.watsonx_model,(ModelInference,Model)):self.model_id=getattr(self.watsonx_model,"model_id")self.deployment_id=getattr(self.watsonx_model,"deployment_id","")self.project_id=getattr(getattr(self.watsonx_model,"_client"),"default_project_id",)self.space_id=getattr(getattr(self.watsonx_model,"_client"),"default_space_id")self.params=getattr(self.watsonx_model,"params")elifisinstance(self.watsonx_client,APIClient):ifsum(map(bool,(self.model,self.model_id,self.deployment_id)))!=1:raiseValueError("The parameters 'model', 'model_id' and 'deployment_id' are ""mutually exclusive. Please specify exactly one of these ""parameters when initializing WatsonxLLM.")ifself.modelisnotNone:watsonx_model_gateway=Gateway(api_client=self.watsonx_client,verify=self.verify,)self.watsonx_model_gateway=watsonx_model_gatewayelse:watsonx_model=ModelInference(model_id=self.model_id,deployment_id=self.deployment_id,params=self.params,api_client=self.watsonx_client,project_id=self.project_id,space_id=self.space_id,verify=self.verify,)self.watsonx_model=watsonx_modelelse:ifsum(map(bool,(self.model,self.model_id,self.deployment_id)))!=1:raiseValueError("The parameters 'model', 'model_id' and 'deployment_id' are ""mutually exclusive. Please specify exactly one of these ""parameters when initializing WatsonxLLM.")check_for_attribute(self.url,"url","WATSONX_URL")if"cloud.ibm.com"inself.url.get_secret_value():ifnotself.tokenandnotself.apikey:raiseValueError("Did not find 'apikey' or 'token',"" please add an environment variable"" `WATSONX_APIKEY` or 'WATSONX_TOKEN' ""which contains it,"" or pass 'apikey' or 'token'"" as a named parameter.")else:ifnotself.tokenandnotself.passwordandnotself.apikey:raiseValueError("Did not find 'token', 'password' or 'apikey',"" please add an environment variable"" `WATSONX_TOKEN`, 'WATSONX_PASSWORD' or 'WATSONX_APIKEY' ""which contains it,"" or pass 'token', 'password' or 'apikey'"" as a named parameter.")elifself.token:check_for_attribute(self.token,"token","WATSONX_TOKEN")elifself.password:check_for_attribute(self.password,"password","WATSONX_PASSWORD")check_for_attribute(self.username,"username","WATSONX_USERNAME")elifself.apikey:check_for_attribute(self.apikey,"apikey","WATSONX_APIKEY")check_for_attribute(self.username,"username","WATSONX_USERNAME")ifnotself.instance_id:check_for_attribute(self.instance_id,"instance_id","WATSONX_INSTANCE_ID")credentials=Credentials(url=self.url.get_secret_value()ifself.urlelseNone,api_key=self.apikey.get_secret_value()ifself.apikeyelseNone,token=self.token.get_secret_value()ifself.tokenelseNone,password=self.password.get_secret_value()ifself.passwordelseNone,username=self.username.get_secret_value()ifself.usernameelseNone,instance_id=self.instance_id.get_secret_value()ifself.instance_idelseNone,version=self.version.get_secret_value()ifself.versionelseNone,verify=self.verify,)ifself.modelisnotNone:watsonx_model_gateway=Gateway(credentials=credentials,verify=self.verify,)self.watsonx_model_gateway=watsonx_model_gatewayelse:watsonx_model=ModelInference(model_id=self.model_id,deployment_id=self.deployment_id,credentials=credentials,params=self.params,project_id=self.project_id,space_id=self.space_id,)self.watsonx_model=watsonx_modelreturnself@gateway_error_handlerdef_call_model_gateway(self,*,model:str,prompt:list,**params:Any)->Any:returnself.watsonx_model_gateway.completions.create(model=model,prompt=prompt,**params)@async_gateway_error_handlerasyncdef_acall_model_gateway(self,*,model:str,prompt:list,**params:Any)->Any:returnawaitself.watsonx_model_gateway.completions.acreate(model=model,prompt=prompt,**params)@propertydef_identifying_params(self)->Mapping[str,Any]:"""Get the identifying parameters."""return{"model_id":self.model_id,"deployment_id":self.deployment_id,"params":self.params,"project_id":self.project_id,"space_id":self.space_id,}@propertydef_llm_type(self)->str:"""Return the type of LLM."""return"IBM watsonx.ai"@staticmethoddef_extract_token_usage(response:Optional[List[Dict[str,Any]]]=None,)->Dict[str,Any]:ifresponseisNone:return{"generated_token_count":0,"input_token_count":0}input_token_count=0generated_token_count=0defget_count_value(key:str,result:Dict[str,Any])->int:returnresult.get(key,0)or0forresinresponse:results=res.get("results")ifresults:input_token_count+=get_count_value("input_token_count",results[0])generated_token_count+=get_count_value("generated_token_count",results[0])return{"generated_token_count":generated_token_count,"input_token_count":input_token_count,}@staticmethoddef_validate_chat_params(params:Dict[str,Any],)->Dict[str,Any]:"""Validate and fix the chat parameters."""forparaminparams.keys():ifparam.lower()notintextgen_valid_params:raiseException(f"Parameter {param} is not valid. "f"Valid parameters are: {textgen_valid_params}")returnparams@staticmethoddef_override_chat_params(params:Dict[str,Any],**kwargs:Any)->Tuple[Dict[str,Any],Dict[str,Any]]:""" Override class parameters with those provided in the invoke method. Merges the 'params' dictionary with any 'params' found in kwargs, then updates 'params' with matching keys from kwargs and removes those keys from kwargs. """forkeyinlist(kwargs.keys()):ifkey.lower()intextgen_valid_params:params[key]=kwargs.pop(key)returnparams,kwargsdef_get_chat_params(self,stop:Optional[List[str]]=None,**kwargs:Any)->Tuple[Dict[str,Any],Dict[str,Any]]:params=extract_params(kwargs,self.params)params,kwargs=self._override_chat_params(paramsor{},**kwargs)ifstopisnotNone:ifparamsand"stop_sequences"inparams:raiseValueError("`stop_sequences` found in both the input and default params.")params=(paramsor{})|{"stop_sequences":stop}returnparams,kwargsdef_create_llm_result(self,response:List[dict])->LLMResult:"""Create the LLMResult from the choices and prompts."""generations=[[Generation(text=result.get("generated_text",""),generation_info={"finish_reason":result.get("stop_reason")}|({"moderations":moderations}if(moderations:=result.get("moderations"))else{}),)]forresinresponseif(results:=res.get("results"))forresultinresults]llm_output={"token_usage":self._extract_token_usage(response),"model_id":self.model_id,"deployment_id":self.deployment_id,}returnLLMResult(generations=generations,llm_output=llm_output)def_create_llm_gateway_result(self,response:dict)->LLMResult:"""Create the LLMResult from the choices and prompts."""choices=response["choices"]generations=[[Generation(text=choice["text"],generation_info=dict(finish_reason=choice.get("finish_reason"),logprobs=choice.get("logprobs"),),)]forchoiceinchoices]llm_output={"token_usage":response["usage"]["total_tokens"],"model_id":self.model_id,"deployment_id":self.deployment_id,}returnLLMResult(generations=generations,llm_output=llm_output)def_stream_response_to_generation_chunk(self,stream_response:Dict[str,Any],)->GenerationChunk:"""Convert a stream response to a generation chunk."""result=stream_response.get("results",[{}])[0]ifnotresult:returnGenerationChunk(text="")finish_reason=result.get("stop_reason")finish_reason=Noneiffinish_reason=="not_finished"elsefinish_reasongeneration_info={"finish_reason":finish_reason,"llm_output":{"model_id":self.model_id,"deployment_id":self.deployment_id,},}ifmoderations:=result.get("moderations"):generation_info["moderations"]=moderationsreturnGenerationChunk(text=result.get("generated_text",""),generation_info=generation_info,)def_call(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->str:"""Call the IBM watsonx.ai inference endpoint. Args: prompt: The prompt to pass into the model. stop: Optional list of stop words to use when generating the response. run_manager: Optional callback manager. Returns: The string generated by the model. Example: .. code-block:: python response = watsonx_llm.invoke("What is a molecule") """result=self._generate(prompts=[prompt],stop=stop,run_manager=run_manager,**kwargs)returnresult.generations[0][0].textasyncdef_acall(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->str:"""Async version of the _call method."""result=awaitself._agenerate(prompts=[prompt],stop=stop,run_manager=run_manager,**kwargs)returnresult.generations[0][0].textdef_generate(self,prompts:List[str],stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,stream:Optional[bool]=None,**kwargs:Any,)->LLMResult:"""Call the IBM watsonx.ai inference endpoint that then generates the response. Args: prompts: List of strings (prompts) to pass into the model. stop: Optional list of stop words to use when generating the response. run_manager: Optional callback manager. Returns: The full LLMResult output. Example: .. code-block:: python response = watsonx_llm.generate(["What is a molecule"]) """params,kwargs=self._get_chat_params(stop=stop,**kwargs)params=self._validate_chat_params(params)should_stream=streamifstreamisnotNoneelseself.streamingifshould_stream:iflen(prompts)>1:raiseValueError(f"WatsonxLLM currently only supports single prompt, got {prompts}")generation=GenerationChunk(text="")stream_iter=self._stream(prompts[0],stop=stop,run_manager=run_manager,**kwargs)forchunkinstream_iter:ifgenerationisNone:generation=chunkelse:generation+=chunkassertgenerationisnotNoneifisinstance(generation.generation_info,dict):llm_output=generation.generation_info.pop("llm_output")returnLLMResult(generations=[[generation]],llm_output=llm_output)returnLLMResult(generations=[[generation]])else:ifself.watsonx_model_gatewayisnotNone:call_kwargs={**kwargs,**params}response=self._call_model_gateway(model=self.model,prompt=prompts,**call_kwargs)returnself._create_llm_gateway_result(response)else:response=self.watsonx_model.generate(prompt=prompts,params=params,**kwargs)returnself._create_llm_result(response)asyncdef_agenerate(self,prompts:List[str],stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,stream:Optional[bool]=None,**kwargs:Any,)->LLMResult:"""Async run the LLM on the given prompt and input."""params,kwargs=self._get_chat_params(stop=stop,**kwargs)params=self._validate_chat_params(params)ifstream:returnawaitsuper()._agenerate(prompts=prompts,stop=stop,run_manager=run_manager,**kwargs)else:ifself.watsonx_model_gatewayisnotNone:call_kwargs={**kwargs,**params}responses=awaitself._acall_model_gateway(model=self.model,prompt=prompts,**call_kwargs)returnself._create_llm_gateway_result(responses)else:responses=[awaitself.watsonx_model.agenerate(prompt=prompt,params=params,**kwargs)forpromptinprompts]returnself._create_llm_result(responses)def_stream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->Iterator[GenerationChunk]:"""Call the IBM watsonx.ai inference endpoint that then streams the response. Args: prompt: The prompt to pass into the model. stop: Optional list of stop words to use when generating the response. run_manager: Optional callback manager. Returns: The iterator which yields generation chunks. Example: .. code-block:: python response = watsonx_llm.stream("What is a molecule") for chunk in response: print(chunk, end='', flush=True) """params,kwargs=self._get_chat_params(stop=stop,**kwargs)params=self._validate_chat_params(params)ifself.watsonx_model_gatewayisnotNone:call_kwargs={**kwargs,**params,"stream":True}chunk_iter=self._call_model_gateway(model=self.model,prompt=prompt,**call_kwargs)else:chunk_iter=self.watsonx_model.generate_text_stream(prompt=prompt,params=params,**(kwargs|{"raw_response":True}))forstream_respinchunk_iter:ifnotisinstance(stream_resp,dict):stream_resp=stream_resp.dict()chunk=self._stream_response_to_generation_chunk(stream_resp)ifrun_manager:run_manager.on_llm_new_token(chunk.text,chunk=chunk)yieldchunkasyncdef_astream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->AsyncIterator[GenerationChunk]:params,kwargs=self._get_chat_params(stop=stop,**kwargs)params=self._validate_chat_params(params)ifself.watsonx_model_gatewayisnotNone:call_kwargs={**kwargs,**params,"stream":True}chunk_iter=awaitself._acall_model_gateway(model=self.model,prompt=prompt,**call_kwargs)else:chunk_iter=awaitself.watsonx_model.agenerate_stream(prompt=prompt,params=params)asyncforstream_respinchunk_iter:ifnotisinstance(stream_resp,dict):stream_resp=stream_resp.dict()chunk=self._stream_response_to_generation_chunk(stream_resp)ifrun_manager:awaitrun_manager.on_llm_new_token(chunk.text,chunk=chunk)yieldchunk
[docs]defget_num_tokens(self,text:str)->int:ifself.watsonx_model_gatewayisnotNone:raiseNotImplementedError("Tokenize endpoint is not supported by IBM Model Gateway endpoint.")else:response=self.watsonx_model.tokenize(text,return_tokens=False)returnresponse["result"]["token_count"]
[docs]defget_token_ids(self,text:str)->List[int]:raiseNotImplementedError("API does not support returning token ids.")