[docs]classHuggingFacePipeline(BaseLLM):"""HuggingFace Pipeline API. To use, you should have the ``transformers`` python package installed. Only supports `text-generation`, `text2text-generation`, `summarization` and `translation` for now. Example using from_model_id: .. code-block:: python from langchain_huggingface import HuggingFacePipeline hf = HuggingFacePipeline.from_model_id( model_id="gpt2", task="text-generation", pipeline_kwargs={"max_new_tokens": 10}, ) Example passing pipeline in directly: .. code-block:: python from langchain_huggingface import HuggingFacePipeline from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline model_id = "gpt2" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10 ) hf = HuggingFacePipeline(pipeline=pipe) """pipeline:Any#: :meta private:model_id:str=DEFAULT_MODEL_ID"""Model name to use."""model_kwargs:Optional[dict]=None"""Keyword arguments passed to the model."""pipeline_kwargs:Optional[dict]=None"""Keyword arguments passed to the pipeline."""batch_size:int=DEFAULT_BATCH_SIZE"""Batch size to use when passing multiple documents to generate."""classConfig:"""Configuration for this pydantic object."""extra="forbid"
[docs]@classmethoddeffrom_model_id(cls,model_id:str,task:str,backend:str="default",device:Optional[int]=-1,device_map:Optional[str]=None,model_kwargs:Optional[dict]=None,pipeline_kwargs:Optional[dict]=None,batch_size:int=DEFAULT_BATCH_SIZE,**kwargs:Any,)->HuggingFacePipeline:"""Construct the pipeline object from model_id and task."""try:fromtransformersimport(# type: ignore[import]AutoModelForCausalLM,AutoModelForSeq2SeqLM,AutoTokenizer,)fromtransformersimportpipelineashf_pipeline# type: ignore[import]exceptImportError:raiseValueError("Could not import transformers python package. ""Please install it with `pip install transformers`.")_model_kwargs=model_kwargsor{}tokenizer=AutoTokenizer.from_pretrained(model_id,**_model_kwargs)try:iftask=="text-generation":ifbackend=="openvino":try:fromoptimum.intel.openvinoimport(# type: ignore[import]OVModelForCausalLM,)exceptImportError:raiseValueError("Could not import optimum-intel python package. ""Please install it with: ""pip install 'optimum[openvino,nncf]' ")try:# use local modelmodel=OVModelForCausalLM.from_pretrained(model_id,**_model_kwargs)exceptException:# use remote modelmodel=OVModelForCausalLM.from_pretrained(model_id,export=True,**_model_kwargs)else:model=AutoModelForCausalLM.from_pretrained(model_id,**_model_kwargs)eliftaskin("text2text-generation","summarization","translation"):ifbackend=="openvino":try:fromoptimum.intel.openvinoimportOVModelForSeq2SeqLMexceptImportError:raiseValueError("Could not import optimum-intel python package. ""Please install it with: ""pip install 'optimum[openvino,nncf]' ")try:# use local modelmodel=OVModelForSeq2SeqLM.from_pretrained(model_id,**_model_kwargs)exceptException:# use remote modelmodel=OVModelForSeq2SeqLM.from_pretrained(model_id,export=True,**_model_kwargs)else:model=AutoModelForSeq2SeqLM.from_pretrained(model_id,**_model_kwargs)else:raiseValueError(f"Got invalid task {task}, "f"currently only {VALID_TASKS} are supported")exceptImportErrorase:raiseValueError(f"Could not load the {task} model due to missing dependencies.")fromeiftokenizer.pad_tokenisNone:tokenizer.pad_token_id=model.config.eos_token_idif((getattr(model,"is_loaded_in_4bit",False)orgetattr(model,"is_loaded_in_8bit",False))anddeviceisnotNoneandbackend=="default"):logger.warning(f"Setting the `device` argument to None from {device} to avoid ""the error caused by attempting to move the model that was already ""loaded on the GPU using the Accelerate module to the same or ""another device.")device=Noneif(deviceisnotNoneandimportlib.util.find_spec("torch")isnotNoneandbackend=="default"):importtorchcuda_device_count=torch.cuda.device_count()ifdevice<-1or(device>=cuda_device_count):raiseValueError(f"Got device=={device}, "f"device is required to be within [-1, {cuda_device_count})")ifdevice_mapisnotNoneanddevice<0:device=NoneifdeviceisnotNoneanddevice<0andcuda_device_count>0:logger.warning("Device has %d GPUs available. ""Provide device={deviceId} to `from_model_id` to use available""GPUs for execution. deviceId is -1 (default) for CPU and ""can be a positive integer associated with CUDA device id.",cuda_device_count,)ifdeviceisnotNoneanddevice_mapisnotNoneandbackend=="openvino":logger.warning("Please set device for OpenVINO through: `model_kwargs`")if"trust_remote_code"in_model_kwargs:_model_kwargs={k:vfork,vin_model_kwargs.items()ifk!="trust_remote_code"}_pipeline_kwargs=pipeline_kwargsor{}pipeline=hf_pipeline(task=task,model=model,tokenizer=tokenizer,device=device,device_map=device_map,batch_size=batch_size,model_kwargs=_model_kwargs,**_pipeline_kwargs,)ifpipeline.tasknotinVALID_TASKS:raiseValueError(f"Got invalid task {pipeline.task}, "f"currently only {VALID_TASKS} are supported")returncls(pipeline=pipeline,model_id=model_id,model_kwargs=_model_kwargs,pipeline_kwargs=_pipeline_kwargs,batch_size=batch_size,**kwargs,)
@propertydef_identifying_params(self)->Mapping[str,Any]:"""Get the identifying parameters."""return{"model_id":self.model_id,"model_kwargs":self.model_kwargs,"pipeline_kwargs":self.pipeline_kwargs,}@propertydef_llm_type(self)->str:return"huggingface_pipeline"def_generate(self,prompts:List[str],stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->LLMResult:# List to hold all resultstext_generations:List[str]=[]pipeline_kwargs=kwargs.get("pipeline_kwargs",{})skip_prompt=kwargs.get("skip_prompt",False)foriinrange(0,len(prompts),self.batch_size):batch_prompts=prompts[i:i+self.batch_size]# Process batch of promptsresponses=self.pipeline(batch_prompts,**pipeline_kwargs,)# Process each response in the batchforj,responseinenumerate(responses):ifisinstance(response,list):# if model returns multiple generations, pick the top oneresponse=response[0]ifself.pipeline.task=="text-generation":text=response["generated_text"]elifself.pipeline.task=="text2text-generation":text=response["generated_text"]elifself.pipeline.task=="summarization":text=response["summary_text"]elifself.pipeline.taskin"translation":text=response["translation_text"]else:raiseValueError(f"Got invalid task {self.pipeline.task}, "f"currently only {VALID_TASKS} are supported")ifskip_prompt:text=text[len(batch_prompts[j]):]# Append the processed text to resultstext_generations.append(text)returnLLMResult(generations=[[Generation(text=text)]fortextintext_generations])def_stream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->Iterator[GenerationChunk]:fromthreadingimportThreadimporttorchfromtransformersimport(StoppingCriteria,StoppingCriteriaList,TextIteratorStreamer,)pipeline_kwargs=kwargs.get("pipeline_kwargs",{})skip_prompt=kwargs.get("skip_prompt",True)ifstopisnotNone:stop=self.pipeline.tokenizer.convert_tokens_to_ids(stop)stopping_ids_list=stopor[]classStopOnTokens(StoppingCriteria):def__call__(self,input_ids:torch.LongTensor,scores:torch.FloatTensor,**kwargs:Any,)->bool:forstop_idinstopping_ids_list:ifinput_ids[0][-1]==stop_id:returnTruereturnFalsestopping_criteria=StoppingCriteriaList([StopOnTokens()])inputs=self.pipeline.tokenizer(prompt,return_tensors="pt")streamer=TextIteratorStreamer(self.pipeline.tokenizer,timeout=60.0,skip_prompt=skip_prompt,skip_special_tokens=True,)generation_kwargs=dict(inputs,streamer=streamer,stopping_criteria=stopping_criteria,**pipeline_kwargs,)t1=Thread(target=self.pipeline.model.generate,kwargs=generation_kwargs)t1.start()forcharinstreamer:chunk=GenerationChunk(text=char)ifrun_manager:run_manager.on_llm_new_token(chunk.text,chunk=chunk)yieldchunk