[docs]@deprecated("0.0.21",removal="1.0",alternative_import="langchain_huggingface.HuggingFaceEndpoint",)classHuggingFaceTextGenInference(LLM):""" HuggingFace text generation API. ! This class is deprecated, you should use HuggingFaceEndpoint instead ! To use, you should have the `text-generation` python package installed and a text-generation server running. Example: .. code-block:: python # Basic Example (no streaming) llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=512, top_k=10, top_p=0.95, typical_p=0.95, temperature=0.01, repetition_penalty=1.03, ) print(llm.invoke("What is Deep Learning?")) # noqa: T201 # Streaming response example from langchain_community.callbacks import streaming_stdout callbacks = [streaming_stdout.StreamingStdOutCallbackHandler()] llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=512, top_k=10, top_p=0.95, typical_p=0.95, temperature=0.01, repetition_penalty=1.03, callbacks=callbacks, streaming=True ) print(llm.invoke("What is Deep Learning?")) # noqa: T201 """max_new_tokens:int=512"""Maximum number of generated tokens"""top_k:Optional[int]=None"""The number of highest probability vocabulary tokens to keep for top-k-filtering."""top_p:Optional[float]=0.95"""If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation."""typical_p:Optional[float]=0.95"""Typical Decoding mass. See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information."""temperature:Optional[float]=0.8"""The value used to module the logits distribution."""repetition_penalty:Optional[float]=None"""The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details."""return_full_text:bool=False"""Whether to prepend the prompt to the generated text"""truncate:Optional[int]=None"""Truncate inputs tokens to the given size"""stop_sequences:List[str]=Field(default_factory=list)"""Stop generating tokens if a member of `stop_sequences` is generated"""seed:Optional[int]=None"""Random sampling seed"""inference_server_url:str="""""text-generation-inference instance base url"""timeout:int=120"""Timeout in seconds"""streaming:bool=False"""Whether to generate a stream of tokens asynchronously"""do_sample:bool=False"""Activate logits sampling"""watermark:bool=False"""Watermarking with [A Watermark for Large Language Models] (https://arxiv.org/abs/2301.10226)"""server_kwargs:Dict[str,Any]=Field(default_factory=dict)"""Holds any text-generation-inference server parameters not explicitly specified"""model_kwargs:Dict[str,Any]=Field(default_factory=dict)"""Holds any model parameters valid for `call` not explicitly specified"""client:Anyasync_client:AnyclassConfig:extra="forbid"@root_validator(pre=True)defbuild_extra(cls,values:Dict[str,Any])->Dict[str,Any]:"""Build extra kwargs from additional params that were passed in."""all_required_field_names=get_pydantic_field_names(cls)extra=values.get("model_kwargs",{})forfield_nameinlist(values):iffield_nameinextra:raiseValueError(f"Found {field_name} supplied twice.")iffield_namenotinall_required_field_names:logger.warning(f"""WARNING! {field_name} is not default parameter.{field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""")extra[field_name]=values.pop(field_name)invalid_model_kwargs=all_required_field_names.intersection(extra.keys())ifinvalid_model_kwargs:raiseValueError(f"Parameters {invalid_model_kwargs} should be specified explicitly. "f"Instead they were passed in as part of `model_kwargs` parameter.")values["model_kwargs"]=extrareturnvalues@pre_initdefvalidate_environment(cls,values:Dict)->Dict:"""Validate that python package exists in environment."""try:importtext_generationvalues["client"]=text_generation.Client(values["inference_server_url"],timeout=values["timeout"],**values["server_kwargs"],)values["async_client"]=text_generation.AsyncClient(values["inference_server_url"],timeout=values["timeout"],**values["server_kwargs"],)exceptImportError:raiseImportError("Could not import text_generation python package. ""Please install it with `pip install text_generation`.")returnvalues@propertydef_llm_type(self)->str:"""Return type of llm."""return"huggingface_textgen_inference"@propertydef_default_params(self)->Dict[str,Any]:"""Get the default parameters for calling text generation inference API."""return{"max_new_tokens":self.max_new_tokens,"top_k":self.top_k,"top_p":self.top_p,"typical_p":self.typical_p,"temperature":self.temperature,"repetition_penalty":self.repetition_penalty,"return_full_text":self.return_full_text,"truncate":self.truncate,"stop_sequences":self.stop_sequences,"seed":self.seed,"do_sample":self.do_sample,"watermark":self.watermark,**self.model_kwargs,}def_invocation_params(self,runtime_stop:Optional[List[str]],**kwargs:Any)->Dict[str,Any]:params={**self._default_params,**kwargs}params["stop_sequences"]=params["stop_sequences"]+(runtime_stopor[])returnparamsdef_call(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->str:ifself.streaming:completion=""forchunkinself._stream(prompt,stop,run_manager,**kwargs):completion+=chunk.textreturncompletioninvocation_params=self._invocation_params(stop,**kwargs)res=self.client.generate(prompt,**invocation_params)# remove stop sequences from the end of the generated textforstop_seqininvocation_params["stop_sequences"]:ifstop_seqinres.generated_text:res.generated_text=res.generated_text[:res.generated_text.index(stop_seq)]returnres.generated_textasyncdef_acall(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->str:ifself.streaming:completion=""asyncforchunkinself._astream(prompt,stop,run_manager,**kwargs):completion+=chunk.textreturncompletioninvocation_params=self._invocation_params(stop,**kwargs)res=awaitself.async_client.generate(prompt,**invocation_params)# remove stop sequences from the end of the generated textforstop_seqininvocation_params["stop_sequences"]:ifstop_seqinres.generated_text:res.generated_text=res.generated_text[:res.generated_text.index(stop_seq)]returnres.generated_textdef_stream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->Iterator[GenerationChunk]:invocation_params=self._invocation_params(stop,**kwargs)forresinself.client.generate_stream(prompt,**invocation_params):# identify stop sequence in generated text, if anystop_seq_found:Optional[str]=Noneforstop_seqininvocation_params["stop_sequences"]:ifstop_seqinres.token.text:stop_seq_found=stop_seq# identify text to yieldtext:Optional[str]=Noneifres.token.special:text=Noneelifstop_seq_found:text=res.token.text[:res.token.text.index(stop_seq_found)]else:text=res.token.text# yield text, if anyiftext:chunk=GenerationChunk(text=text)ifrun_manager:run_manager.on_llm_new_token(chunk.text)yieldchunk# break if stop sequence foundifstop_seq_found:breakasyncdef_astream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->AsyncIterator[GenerationChunk]:invocation_params=self._invocation_params(stop,**kwargs)asyncforresinself.async_client.generate_stream(prompt,**invocation_params):# identify stop sequence in generated text, if anystop_seq_found:Optional[str]=Noneforstop_seqininvocation_params["stop_sequences"]:ifstop_seqinres.token.text:stop_seq_found=stop_seq# identify text to yieldtext:Optional[str]=Noneifres.token.special:text=Noneelifstop_seq_found:text=res.token.text[:res.token.text.index(stop_seq_found)]else:text=res.token.text# yield text, if anyiftext:chunk=GenerationChunk(text=text)ifrun_manager:awaitrun_manager.on_llm_new_token(chunk.text)yieldchunk# break if stop sequence foundifstop_seq_found:break