Source code for langchain_core.language_models.llms
"""Base interface for large language models to expose."""from__future__importannotationsimportasyncioimportfunctoolsimportinspectimportjsonimportloggingimportuuidimportwarningsfromabcimportABC,abstractmethodfrompathlibimportPathfromtypingimport(Any,AsyncIterator,Callable,Dict,Iterator,List,Optional,Sequence,Tuple,Type,Union,cast,)importyamlfromtenacityimport(RetryCallState,before_sleep_log,retry,retry_base,retry_if_exception_type,stop_after_attempt,wait_exponential,)fromlangchain_core._apiimportdeprecatedfromlangchain_core.cachesimportBaseCachefromlangchain_core.callbacksimport(AsyncCallbackManager,AsyncCallbackManagerForLLMRun,BaseCallbackManager,CallbackManager,CallbackManagerForLLMRun,Callbacks,)fromlangchain_core.globalsimportget_llm_cachefromlangchain_core.language_models.baseimport(BaseLanguageModel,LangSmithParams,LanguageModelInput,)fromlangchain_core.loadimportdumpdfromlangchain_core.messagesimport(AIMessage,BaseMessage,convert_to_messages,get_buffer_string,)fromlangchain_core.outputsimportGeneration,GenerationChunk,LLMResult,RunInfofromlangchain_core.prompt_valuesimportChatPromptValue,PromptValue,StringPromptValuefromlangchain_core.pydantic_v1importField,root_validatorfromlangchain_core.runnablesimportRunnableConfig,ensure_config,get_config_listfromlangchain_core.runnables.configimportrun_in_executorlogger=logging.getLogger(__name__)@functools.lru_cachedef_log_error_once(msg:str)->None:"""Log an error once."""logger.error(msg)
[docs]defcreate_base_retry_decorator(error_types:List[Type[BaseException]],max_retries:int=1,run_manager:Optional[Union[AsyncCallbackManagerForLLMRun,CallbackManagerForLLMRun]]=None,)->Callable[[Any],Any]:"""Create a retry decorator for a given LLM and provided a list of error types. Args: error_types: List of error types to retry on. max_retries: Number of retries. Default is 1. run_manager: Callback manager for the run. Default is None. Returns: A retry decorator. Raises: ValueError: If the cache is not set and cache is True. """_logging=before_sleep_log(logger,logging.WARNING)def_before_sleep(retry_state:RetryCallState)->None:_logging(retry_state)ifrun_manager:ifisinstance(run_manager,AsyncCallbackManagerForLLMRun):coro=run_manager.on_retry(retry_state)try:loop=asyncio.get_event_loop()ifloop.is_running():loop.create_task(coro)else:asyncio.run(coro)exceptExceptionase:_log_error_once(f"Error in on_retry: {e}")else:run_manager.on_retry(retry_state)min_seconds=4max_seconds=10# Wait 2^x * 1 second between each retry starting with# 4 seconds, then up to 10 seconds, then 10 seconds afterwardsretry_instance:retry_base=retry_if_exception_type(error_types[0])forerrorinerror_types[1:]:retry_instance=retry_instance|retry_if_exception_type(error)returnretry(reraise=True,stop=stop_after_attempt(max_retries),wait=wait_exponential(multiplier=1,min=min_seconds,max=max_seconds),retry=retry_instance,before_sleep=_before_sleep,)
def_resolve_cache(cache:Union[BaseCache,bool,None])->Optional[BaseCache]:"""Resolve the cache."""ifisinstance(cache,BaseCache):llm_cache=cacheelifcacheisNone:llm_cache=get_llm_cache()elifcacheisTrue:llm_cache=get_llm_cache()ifllm_cacheisNone:raiseValueError("No global cache was configured. Use `set_llm_cache`.""to set a global cache if you want to use a global cache.""Otherwise either pass a cache object or set cache to False/None")elifcacheisFalse:llm_cache=Noneelse:raiseValueError(f"Unsupported cache value {cache}")returnllm_cache
[docs]defget_prompts(params:Dict[str,Any],prompts:List[str],cache:Optional[Union[BaseCache,bool,None]]=None,)->Tuple[Dict[int,List],str,List[int],List[str]]:"""Get prompts that are already cached. Args: params: Dictionary of parameters. prompts: List of prompts. cache: Cache object. Default is None. Returns: A tuple of existing prompts, llm_string, missing prompt indexes, and missing prompts. Raises: ValueError: If the cache is not set and cache is True. """llm_string=str(sorted([(k,v)fork,vinparams.items()]))missing_prompts=[]missing_prompt_idxs=[]existing_prompts={}llm_cache=_resolve_cache(cache)fori,promptinenumerate(prompts):ifllm_cache:cache_val=llm_cache.lookup(prompt,llm_string)ifisinstance(cache_val,list):existing_prompts[i]=cache_valelse:missing_prompts.append(prompt)missing_prompt_idxs.append(i)returnexisting_prompts,llm_string,missing_prompt_idxs,missing_prompts
[docs]asyncdefaget_prompts(params:Dict[str,Any],prompts:List[str],cache:Optional[Union[BaseCache,bool,None]]=None,)->Tuple[Dict[int,List],str,List[int],List[str]]:"""Get prompts that are already cached. Async version. Args: params: Dictionary of parameters. prompts: List of prompts. cache: Cache object. Default is None. Returns: A tuple of existing prompts, llm_string, missing prompt indexes, and missing prompts. Raises: ValueError: If the cache is not set and cache is True. """llm_string=str(sorted([(k,v)fork,vinparams.items()]))missing_prompts=[]missing_prompt_idxs=[]existing_prompts={}llm_cache=_resolve_cache(cache)fori,promptinenumerate(prompts):ifllm_cache:cache_val=awaitllm_cache.alookup(prompt,llm_string)ifisinstance(cache_val,list):existing_prompts[i]=cache_valelse:missing_prompts.append(prompt)missing_prompt_idxs.append(i)returnexisting_prompts,llm_string,missing_prompt_idxs,missing_prompts
[docs]defupdate_cache(cache:Union[BaseCache,bool,None],existing_prompts:Dict[int,List],llm_string:str,missing_prompt_idxs:List[int],new_results:LLMResult,prompts:List[str],)->Optional[dict]:"""Update the cache and get the LLM output. Args: cache: Cache object. existing_prompts: Dictionary of existing prompts. llm_string: LLM string. missing_prompt_idxs: List of missing prompt indexes. new_results: LLMResult object. prompts: List of prompts. Returns: LLM output. Raises: ValueError: If the cache is not set and cache is True. """llm_cache=_resolve_cache(cache)fori,resultinenumerate(new_results.generations):existing_prompts[missing_prompt_idxs[i]]=resultprompt=prompts[missing_prompt_idxs[i]]ifllm_cacheisnotNone:llm_cache.update(prompt,llm_string,result)llm_output=new_results.llm_outputreturnllm_output
[docs]asyncdefaupdate_cache(cache:Union[BaseCache,bool,None],existing_prompts:Dict[int,List],llm_string:str,missing_prompt_idxs:List[int],new_results:LLMResult,prompts:List[str],)->Optional[dict]:"""Update the cache and get the LLM output. Async version. Args: cache: Cache object. existing_prompts: Dictionary of existing prompts. llm_string: LLM string. missing_prompt_idxs: List of missing prompt indexes. new_results: LLMResult object. prompts: List of prompts. Returns: LLM output. Raises: ValueError: If the cache is not set and cache is True. """llm_cache=_resolve_cache(cache)fori,resultinenumerate(new_results.generations):existing_prompts[missing_prompt_idxs[i]]=resultprompt=prompts[missing_prompt_idxs[i]]ifllm_cache:awaitllm_cache.aupdate(prompt,llm_string,result)llm_output=new_results.llm_outputreturnllm_output
[docs]classBaseLLM(BaseLanguageModel[str],ABC):"""Base LLM abstract interface. It should take in a prompt and return a string."""callback_manager:Optional[BaseCallbackManager]=Field(default=None,exclude=True)"""[DEPRECATED]"""classConfig:arbitrary_types_allowed=True@root_validator(pre=True)defraise_deprecation(cls,values:Dict)->Dict:"""Raise deprecation warning if callback_manager is used."""ifvalues.get("callback_manager")isnotNone:warnings.warn("callback_manager is deprecated. Please use callbacks instead.",DeprecationWarning,stacklevel=5,)values["callbacks"]=values.pop("callback_manager",None)returnvalues# --- Runnable methods ---@propertydefOutputType(self)->Type[str]:"""Get the input type for this runnable."""returnstrdef_convert_input(self,input:LanguageModelInput)->PromptValue:ifisinstance(input,PromptValue):returninputelifisinstance(input,str):returnStringPromptValue(text=input)elifisinstance(input,Sequence):returnChatPromptValue(messages=convert_to_messages(input))else:raiseValueError(f"Invalid input type {type(input)}. ""Must be a PromptValue, str, or list of BaseMessages.")def_get_ls_params(self,stop:Optional[List[str]]=None,**kwargs:Any,)->LangSmithParams:"""Get standard params for tracing."""# get default provider from class namedefault_provider=self.__class__.__name__ifdefault_provider.endswith("LLM"):default_provider=default_provider[:-3]default_provider=default_provider.lower()ls_params=LangSmithParams(ls_provider=default_provider,ls_model_type="llm")ifstop:ls_params["ls_stop"]=stop# modelifhasattr(self,"model")andisinstance(self.model,str):ls_params["ls_model_name"]=self.modelelifhasattr(self,"model_name")andisinstance(self.model_name,str):ls_params["ls_model_name"]=self.model_name# temperatureif"temperature"inkwargsandisinstance(kwargs["temperature"],float):ls_params["ls_temperature"]=kwargs["temperature"]elifhasattr(self,"temperature")andisinstance(self.temperature,float):ls_params["ls_temperature"]=self.temperature# max_tokensif"max_tokens"inkwargsandisinstance(kwargs["max_tokens"],int):ls_params["ls_max_tokens"]=kwargs["max_tokens"]elifhasattr(self,"max_tokens")andisinstance(self.max_tokens,int):ls_params["ls_max_tokens"]=self.max_tokensreturnls_params
[docs]defstream(self,input:LanguageModelInput,config:Optional[RunnableConfig]=None,*,stop:Optional[List[str]]=None,**kwargs:Any,)->Iterator[str]:iftype(self)._stream==BaseLLM._stream:# model doesn't implement streaming, so use default implementationyieldself.invoke(input,config=config,stop=stop,**kwargs)else:prompt=self._convert_input(input).to_string()config=ensure_config(config)params=self.dict()params["stop"]=stopparams={**params,**kwargs}options={"stop":stop}inheritable_metadata={**(config.get("metadata")or{}),**self._get_ls_params(stop=stop,**kwargs),}callback_manager=CallbackManager.configure(config.get("callbacks"),self.callbacks,self.verbose,config.get("tags"),self.tags,inheritable_metadata,self.metadata,)(run_manager,)=callback_manager.on_llm_start(dumpd(self),[prompt],invocation_params=params,options=options,name=config.get("run_name"),run_id=config.pop("run_id",None),batch_size=1,)generation:Optional[GenerationChunk]=Nonetry:forchunkinself._stream(prompt,stop=stop,run_manager=run_manager,**kwargs):yieldchunk.textifgenerationisNone:generation=chunkelse:generation+=chunkassertgenerationisnotNoneexceptBaseExceptionase:run_manager.on_llm_error(e,response=LLMResult(generations=[[generation]]ifgenerationelse[]),)raiseeelse:run_manager.on_llm_end(LLMResult(generations=[[generation]]))
# --- Custom methods ---@abstractmethoddef_generate(self,prompts:List[str],stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->LLMResult:"""Run the LLM on the given prompts."""asyncdef_agenerate(self,prompts:List[str],stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->LLMResult:"""Run the LLM on the given prompts."""returnawaitrun_in_executor(None,self._generate,prompts,stop,run_manager.get_sync()ifrun_managerelseNone,**kwargs,)def_stream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->Iterator[GenerationChunk]:"""Stream the LLM on the given prompt. This method should be overridden by subclasses that support streaming. If not implemented, the default behavior of calls to stream will be to fallback to the non-streaming version of the model and return the output as a single chunk. Args: prompt: The prompt to generate from. stop: Stop words to use when generating. Model output is cut off at the first occurrence of any of these substrings. run_manager: Callback manager for the run. **kwargs: Arbitrary additional keyword arguments. These are usually passed to the model provider API call. Returns: An iterator of GenerationChunks. """raiseNotImplementedError()asyncdef_astream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->AsyncIterator[GenerationChunk]:"""An async version of the _stream method. The default implementation uses the synchronous _stream method and wraps it in an async iterator. Subclasses that need to provide a true async implementation should override this method. Args: prompt: The prompt to generate from. stop: Stop words to use when generating. Model output is cut off at the first occurrence of any of these substrings. run_manager: Callback manager for the run. **kwargs: Arbitrary additional keyword arguments. These are usually passed to the model provider API call. Returns: An async iterator of GenerationChunks. """iterator=awaitrun_in_executor(None,self._stream,prompt,stop,run_manager.get_sync()ifrun_managerelseNone,**kwargs,)done=object()whileTrue:item=awaitrun_in_executor(None,next,iterator,done,# type: ignore[call-arg, arg-type])ifitemisdone:breakyielditem# type: ignore[misc]
def_generate_helper(self,prompts:List[str],stop:Optional[List[str]],run_managers:List[CallbackManagerForLLMRun],new_arg_supported:bool,**kwargs:Any,)->LLMResult:try:output=(self._generate(prompts,stop=stop,# TODO: support multiple run managersrun_manager=run_managers[0]ifrun_managerselseNone,**kwargs,)ifnew_arg_supportedelseself._generate(prompts,stop=stop))exceptBaseExceptionase:forrun_managerinrun_managers:run_manager.on_llm_error(e,response=LLMResult(generations=[]))raiseeflattened_outputs=output.flatten()formanager,flattened_outputinzip(run_managers,flattened_outputs):manager.on_llm_end(flattened_output)ifrun_managers:output.run=[RunInfo(run_id=run_manager.run_id)forrun_managerinrun_managers]returnoutput
[docs]defgenerate(self,prompts:List[str],stop:Optional[List[str]]=None,callbacks:Optional[Union[Callbacks,List[Callbacks]]]=None,*,tags:Optional[Union[List[str],List[List[str]]]]=None,metadata:Optional[Union[Dict[str,Any],List[Dict[str,Any]]]]=None,run_name:Optional[Union[str,List[str]]]=None,run_id:Optional[Union[uuid.UUID,List[Optional[uuid.UUID]]]]=None,**kwargs:Any,)->LLMResult:"""Pass a sequence of prompts to a model and return generations. This method should make use of batched calls for models that expose a batched API. Use this method when you want to: 1. take advantage of batched calls, 2. need more output from the model than just the top generated value, 3. are building chains that are agnostic to the underlying language model type (e.g., pure text completion models vs chat models). Args: prompts: List of string prompts. stop: Stop words to use when generating. Model output is cut off at the first occurrence of any of these substrings. callbacks: Callbacks to pass through. Used for executing additional functionality, such as logging or streaming, throughout generation. tags: List of tags to associate with each prompt. If provided, the length of the list must match the length of the prompts list. metadata: List of metadata dictionaries to associate with each prompt. If provided, the length of the list must match the length of the prompts list. run_name: List of run names to associate with each prompt. If provided, the length of the list must match the length of the prompts list. run_id: List of run IDs to associate with each prompt. If provided, the length of the list must match the length of the prompts list. **kwargs: Arbitrary additional keyword arguments. These are usually passed to the model provider API call. Returns: An LLMResult, which contains a list of candidate Generations for each input prompt and additional model provider-specific output. """ifnotisinstance(prompts,list):raiseValueError("Argument 'prompts' is expected to be of type List[str], received"f" argument of type {type(prompts)}.")# Create callback managersifisinstance(metadata,list):metadata=[{**(metaor{}),**self._get_ls_params(stop=stop,**kwargs),}formetainmetadata]elifisinstance(metadata,dict):metadata={**(metadataor{}),**self._get_ls_params(stop=stop,**kwargs),}else:passif(isinstance(callbacks,list)andcallbacksand(isinstance(callbacks[0],(list,BaseCallbackManager))orcallbacks[0]isNone)):# We've received a list of callbacks args to apply to each inputassertlen(callbacks)==len(prompts)asserttagsisNoneor(isinstance(tags,list)andlen(tags)==len(prompts))assertmetadataisNoneor(isinstance(metadata,list)andlen(metadata)==len(prompts))assertrun_nameisNoneor(isinstance(run_name,list)andlen(run_name)==len(prompts))callbacks=cast(List[Callbacks],callbacks)tags_list=cast(List[Optional[List[str]]],tagsor([None]*len(prompts)))metadata_list=cast(List[Optional[Dict[str,Any]]],metadataor([{}]*len(prompts)))run_name_list=run_nameorcast(List[Optional[str]],([None]*len(prompts)))callback_managers=[CallbackManager.configure(callback,self.callbacks,self.verbose,tag,self.tags,meta,self.metadata,)forcallback,tag,metainzip(callbacks,tags_list,metadata_list)]else:# We've received a single callbacks arg to apply to all inputscallback_managers=[CallbackManager.configure(cast(Callbacks,callbacks),self.callbacks,self.verbose,cast(List[str],tags),self.tags,cast(Dict[str,Any],metadata),self.metadata,)]*len(prompts)run_name_list=[cast(Optional[str],run_name)]*len(prompts)run_ids_list=self._get_run_ids_list(run_id,prompts)params=self.dict()params["stop"]=stopoptions={"stop":stop}(existing_prompts,llm_string,missing_prompt_idxs,missing_prompts,)=get_prompts(params,prompts,self.cache)new_arg_supported=inspect.signature(self._generate).parameters.get("run_manager")if(self.cacheisNoneandget_llm_cache()isNone)orself.cacheisFalse:run_managers=[callback_manager.on_llm_start(dumpd(self),[prompt],invocation_params=params,options=options,name=run_name,batch_size=len(prompts),run_id=run_id_,)[0]forcallback_manager,prompt,run_name,run_id_inzip(callback_managers,prompts,run_name_list,run_ids_list)]output=self._generate_helper(prompts,stop,run_managers,bool(new_arg_supported),**kwargs)returnoutputiflen(missing_prompts)>0:run_managers=[callback_managers[idx].on_llm_start(dumpd(self),[prompts[idx]],invocation_params=params,options=options,name=run_name_list[idx],batch_size=len(missing_prompts),)[0]foridxinmissing_prompt_idxs]new_results=self._generate_helper(missing_prompts,stop,run_managers,bool(new_arg_supported),**kwargs)llm_output=update_cache(self.cache,existing_prompts,llm_string,missing_prompt_idxs,new_results,prompts,)run_info=([RunInfo(run_id=run_manager.run_id)forrun_managerinrun_managers]ifrun_managerselseNone)else:llm_output={}run_info=Nonegenerations=[existing_prompts[i]foriinrange(len(prompts))]returnLLMResult(generations=generations,llm_output=llm_output,run=run_info)
@staticmethoddef_get_run_ids_list(run_id:Optional[Union[uuid.UUID,List[Optional[uuid.UUID]]]],prompts:list)->list:ifrun_idisNone:return[None]*len(prompts)ifisinstance(run_id,list):iflen(run_id)!=len(prompts):raiseValueError("Number of manually provided run_id's does not match batch length."f" {len(run_id)} != {len(prompts)}")returnrun_idreturn[run_id]+[None]*(len(prompts)-1)asyncdef_agenerate_helper(self,prompts:List[str],stop:Optional[List[str]],run_managers:List[AsyncCallbackManagerForLLMRun],new_arg_supported:bool,**kwargs:Any,)->LLMResult:try:output=(awaitself._agenerate(prompts,stop=stop,run_manager=run_managers[0]ifrun_managerselseNone,**kwargs,)ifnew_arg_supportedelseawaitself._agenerate(prompts,stop=stop))exceptBaseExceptionase:awaitasyncio.gather(*[run_manager.on_llm_error(e,response=LLMResult(generations=[]))forrun_managerinrun_managers])raiseeflattened_outputs=output.flatten()awaitasyncio.gather(*[run_manager.on_llm_end(flattened_output)forrun_manager,flattened_outputinzip(run_managers,flattened_outputs)])ifrun_managers:output.run=[RunInfo(run_id=run_manager.run_id)forrun_managerinrun_managers]returnoutput
[docs]asyncdefagenerate(self,prompts:List[str],stop:Optional[List[str]]=None,callbacks:Optional[Union[Callbacks,List[Callbacks]]]=None,*,tags:Optional[Union[List[str],List[List[str]]]]=None,metadata:Optional[Union[Dict[str,Any],List[Dict[str,Any]]]]=None,run_name:Optional[Union[str,List[str]]]=None,run_id:Optional[Union[uuid.UUID,List[Optional[uuid.UUID]]]]=None,**kwargs:Any,)->LLMResult:"""Asynchronously pass a sequence of prompts to a model and return generations. This method should make use of batched calls for models that expose a batched API. Use this method when you want to: 1. take advantage of batched calls, 2. need more output from the model than just the top generated value, 3. are building chains that are agnostic to the underlying language model type (e.g., pure text completion models vs chat models). Args: prompts: List of string prompts. stop: Stop words to use when generating. Model output is cut off at the first occurrence of any of these substrings. callbacks: Callbacks to pass through. Used for executing additional functionality, such as logging or streaming, throughout generation. tags: List of tags to associate with each prompt. If provided, the length of the list must match the length of the prompts list. metadata: List of metadata dictionaries to associate with each prompt. If provided, the length of the list must match the length of the prompts list. run_name: List of run names to associate with each prompt. If provided, the length of the list must match the length of the prompts list. run_id: List of run IDs to associate with each prompt. If provided, the length of the list must match the length of the prompts list. **kwargs: Arbitrary additional keyword arguments. These are usually passed to the model provider API call. Returns: An LLMResult, which contains a list of candidate Generations for each input prompt and additional model provider-specific output. """ifisinstance(metadata,list):metadata=[{**(metaor{}),**self._get_ls_params(stop=stop,**kwargs),}formetainmetadata]elifisinstance(metadata,dict):metadata={**(metadataor{}),**self._get_ls_params(stop=stop,**kwargs),}else:pass# Create callback managersifisinstance(callbacks,list)and(isinstance(callbacks[0],(list,BaseCallbackManager))orcallbacks[0]isNone):# We've received a list of callbacks args to apply to each inputassertlen(callbacks)==len(prompts)asserttagsisNoneor(isinstance(tags,list)andlen(tags)==len(prompts))assertmetadataisNoneor(isinstance(metadata,list)andlen(metadata)==len(prompts))assertrun_nameisNoneor(isinstance(run_name,list)andlen(run_name)==len(prompts))callbacks=cast(List[Callbacks],callbacks)tags_list=cast(List[Optional[List[str]]],tagsor([None]*len(prompts)))metadata_list=cast(List[Optional[Dict[str,Any]]],metadataor([{}]*len(prompts)))run_name_list=run_nameorcast(List[Optional[str]],([None]*len(prompts)))callback_managers=[AsyncCallbackManager.configure(callback,self.callbacks,self.verbose,tag,self.tags,meta,self.metadata,)forcallback,tag,metainzip(callbacks,tags_list,metadata_list)]else:# We've received a single callbacks arg to apply to all inputscallback_managers=[AsyncCallbackManager.configure(cast(Callbacks,callbacks),self.callbacks,self.verbose,cast(List[str],tags),self.tags,cast(Dict[str,Any],metadata),self.metadata,)]*len(prompts)run_name_list=[cast(Optional[str],run_name)]*len(prompts)run_ids_list=self._get_run_ids_list(run_id,prompts)params=self.dict()params["stop"]=stopoptions={"stop":stop}(existing_prompts,llm_string,missing_prompt_idxs,missing_prompts,)=awaitaget_prompts(params,prompts,self.cache)# Verify whether the cache is set, and if the cache is set,# verify whether the cache is available.new_arg_supported=inspect.signature(self._agenerate).parameters.get("run_manager")if(self.cacheisNoneandget_llm_cache()isNone)orself.cacheisFalse:run_managers=awaitasyncio.gather(*[callback_manager.on_llm_start(dumpd(self),[prompt],invocation_params=params,options=options,name=run_name,batch_size=len(prompts),run_id=run_id_,)forcallback_manager,prompt,run_name,run_id_inzip(callback_managers,prompts,run_name_list,run_ids_list)])run_managers=[r[0]forrinrun_managers]# type: ignore[misc]output=awaitself._agenerate_helper(prompts,stop,run_managers,# type: ignore[arg-type]bool(new_arg_supported),**kwargs,# type: ignore[arg-type])returnoutputiflen(missing_prompts)>0:run_managers=awaitasyncio.gather(*[callback_managers[idx].on_llm_start(dumpd(self),[prompts[idx]],invocation_params=params,options=options,name=run_name_list[idx],batch_size=len(missing_prompts),)foridxinmissing_prompt_idxs])run_managers=[r[0]forrinrun_managers]# type: ignore[misc]new_results=awaitself._agenerate_helper(missing_prompts,stop,run_managers,# type: ignore[arg-type]bool(new_arg_supported),**kwargs,# type: ignore[arg-type])llm_output=awaitaupdate_cache(self.cache,existing_prompts,llm_string,missing_prompt_idxs,new_results,prompts,)run_info=([RunInfo(run_id=run_manager.run_id)forrun_managerinrun_managers]# type: ignore[attr-defined]ifrun_managerselseNone)else:llm_output={}run_info=Nonegenerations=[existing_prompts[i]foriinrange(len(prompts))]returnLLMResult(generations=generations,llm_output=llm_output,run=run_info)
[docs]@deprecated("0.1.7",alternative="invoke",removal="1.0")def__call__(self,prompt:str,stop:Optional[List[str]]=None,callbacks:Callbacks=None,*,tags:Optional[List[str]]=None,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->str:"""Check Cache and run the LLM on the given prompt and input. Args: prompt: The prompt to generate from. stop: Stop words to use when generating. Model output is cut off at the first occurrence of any of these substrings. callbacks: Callbacks to pass through. Used for executing additional functionality, such as logging or streaming, throughout generation. tags: List of tags to associate with the prompt. metadata: Metadata to associate with the prompt. **kwargs: Arbitrary additional keyword arguments. These are usually passed to the model provider API call. Returns: The generated text. Raises: ValueError: If the prompt is not a string. """ifnotisinstance(prompt,str):raiseValueError("Argument `prompt` is expected to be a string. Instead found "f"{type(prompt)}. If you want to run the LLM on multiple prompts, use ""`generate` instead.")return(self.generate([prompt],stop=stop,callbacks=callbacks,tags=tags,metadata=metadata,**kwargs,).generations[0][0].text)
asyncdef_call_async(self,prompt:str,stop:Optional[List[str]]=None,callbacks:Callbacks=None,*,tags:Optional[List[str]]=None,metadata:Optional[Dict[str,Any]]=None,**kwargs:Any,)->str:"""Check Cache and run the LLM on the given prompt and input."""result=awaitself.agenerate([prompt],stop=stop,callbacks=callbacks,tags=tags,metadata=metadata,**kwargs,)returnresult.generations[0][0].text
def__str__(self)->str:"""Get a string representation of the object for printing."""cls_name=f"\033[1m{self.__class__.__name__}\033[0m"returnf"{cls_name}\nParams: {self._identifying_params}"@property@abstractmethoddef_llm_type(self)->str:"""Return type of llm."""defdict(self,**kwargs:Any)->Dict:"""Return a dictionary of the LLM."""starter_dict=dict(self._identifying_params)starter_dict["_type"]=self._llm_typereturnstarter_dict
[docs]defsave(self,file_path:Union[Path,str])->None:"""Save the LLM. Args: file_path: Path to file to save the LLM to. Raises: ValueError: If the file path is not a string or Path object. Example: .. code-block:: python llm.save(file_path="path/llm.yaml") """# Convert file to Path object.ifisinstance(file_path,str):save_path=Path(file_path)else:save_path=file_pathdirectory_path=save_path.parentdirectory_path.mkdir(parents=True,exist_ok=True)# Fetch dictionary to saveprompt_dict=self.dict()ifsave_path.suffix==".json":withopen(file_path,"w")asf:json.dump(prompt_dict,f,indent=4)elifsave_path.suffix.endswith((".yaml",".yml")):withopen(file_path,"w")asf:yaml.dump(prompt_dict,f,default_flow_style=False)else:raiseValueError(f"{save_path} must be json or yaml")
[docs]classLLM(BaseLLM):"""Simple interface for implementing a custom LLM. You should subclass this class and implement the following: - `_call` method: Run the LLM on the given prompt and input (used by `invoke`). - `_identifying_params` property: Return a dictionary of the identifying parameters This is critical for caching and tracing purposes. Identifying parameters is a dict that identifies the LLM. It should mostly include a `model_name`. Optional: Override the following methods to provide more optimizations: - `_acall`: Provide a native async version of the `_call` method. If not provided, will delegate to the synchronous version using `run_in_executor`. (Used by `ainvoke`). - `_stream`: Stream the LLM on the given prompt and input. `stream` will use `_stream` if provided, otherwise it use `_call` and output will arrive in one chunk. - `_astream`: Override to provide a native async version of the `_stream` method. `astream` will use `_astream` if provided, otherwise it will implement a fallback behavior that will use `_stream` if `_stream` is implemented, and use `_acall` if `_stream` is not implemented. Please see the following guide for more information on how to implement a custom LLM: https://python.langchain.com/v0.2/docs/how_to/custom_llm/ """@abstractmethoddef_call(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->str:"""Run the LLM on the given input. Override this method to implement the LLM logic. Args: prompt: The prompt to generate from. stop: Stop words to use when generating. Model output is cut off at the first occurrence of any of the stop substrings. If stop tokens are not supported consider raising NotImplementedError. run_manager: Callback manager for the run. **kwargs: Arbitrary additional keyword arguments. These are usually passed to the model provider API call. Returns: The model output as a string. SHOULD NOT include the prompt. """asyncdef_acall(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->str:"""Async version of the _call method. The default implementation delegates to the synchronous _call method using `run_in_executor`. Subclasses that need to provide a true async implementation should override this method to reduce the overhead of using `run_in_executor`. Args: prompt: The prompt to generate from. stop: Stop words to use when generating. Model output is cut off at the first occurrence of any of the stop substrings. If stop tokens are not supported consider raising NotImplementedError. run_manager: Callback manager for the run. **kwargs: Arbitrary additional keyword arguments. These are usually passed to the model provider API call. Returns: The model output as a string. SHOULD NOT include the prompt. """returnawaitrun_in_executor(None,self._call,prompt,stop,run_manager.get_sync()ifrun_managerelseNone,**kwargs,)def_generate(self,prompts:List[str],stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->LLMResult:"""Run the LLM on the given prompt and input."""# TODO: add caching here.generations=[]new_arg_supported=inspect.signature(self._call).parameters.get("run_manager")forpromptinprompts:text=(self._call(prompt,stop=stop,run_manager=run_manager,**kwargs)ifnew_arg_supportedelseself._call(prompt,stop=stop,**kwargs))generations.append([Generation(text=text)])returnLLMResult(generations=generations)asyncdef_agenerate(self,prompts:List[str],stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->LLMResult:"""Async run the LLM on the given prompt and input."""generations=[]new_arg_supported=inspect.signature(self._acall).parameters.get("run_manager")forpromptinprompts:text=(awaitself._acall(prompt,stop=stop,run_manager=run_manager,**kwargs)ifnew_arg_supportedelseawaitself._acall(prompt,stop=stop,**kwargs))generations.append([Generation(text=text)])returnLLMResult(generations=generations)