[docs]classDeepSparse(LLM):"""Neural Magic DeepSparse LLM interface. To use, you should have the ``deepsparse`` or ``deepsparse-nightly`` python package installed. See https://github.com/neuralmagic/deepsparse This interface let's you deploy optimized LLMs straight from the [SparseZoo](https://sparsezoo.neuralmagic.com/?useCase=text_generation) Example: .. code-block:: python from langchain_community.llms import DeepSparse llm = DeepSparse(model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base_quant-none") """# noqa: E501pipeline:Any#: :meta private:model:str"""The path to a model file or directory or the name of a SparseZoo model stub."""model_configuration:Optional[Dict[str,Any]]=None"""Keyword arguments passed to the pipeline construction. Common parameters are sequence_length, prompt_sequence_length"""generation_config:Union[None,str,Dict]=None"""GenerationConfig dictionary consisting of parameters used to control sequences generated for each prompt. Common parameters are: max_length, max_new_tokens, num_return_sequences, output_scores, top_p, top_k, repetition_penalty."""streaming:bool=False"""Whether to stream the results, token by token."""@propertydef_identifying_params(self)->Dict[str,Any]:"""Get the identifying parameters."""return{"model":self.model,"model_config":self.model_configuration,"generation_config":self.generation_config,"streaming":self.streaming,}@propertydef_llm_type(self)->str:"""Return type of llm."""return"deepsparse"
[docs]@pre_initdefvalidate_environment(cls,values:Dict)->Dict:"""Validate that ``deepsparse`` package is installed."""try:fromdeepsparseimportPipelineexceptImportError:raiseImportError("Could not import `deepsparse` package. ""Please install it with `pip install deepsparse[llm]`")model_config=values["model_configuration"]or{}values["pipeline"]=Pipeline.create(task="text_generation",model_path=values["model"],**model_config,)returnvalues
def_call(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->str:"""Generate text from a prompt. Args: prompt: The prompt to generate text from. stop: A list of strings to stop generation when encountered. Returns: The generated text. Example: .. code-block:: python from langchain_community.llms import DeepSparse llm = DeepSparse(model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base_quant-none") llm.invoke("Tell me a joke.") """ifself.streaming:combined_output=""forchunkinself._stream(prompt=prompt,stop=stop,run_manager=run_manager,**kwargs):combined_output+=chunk.texttext=combined_outputelse:text=(self.pipeline(sequences=prompt,**self.generation_config).generations[0].text)ifstopisnotNone:text=enforce_stop_tokens(text,stop)returntextasyncdef_acall(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->str:"""Generate text from a prompt. Args: prompt: The prompt to generate text from. stop: A list of strings to stop generation when encountered. Returns: The generated text. Example: .. code-block:: python from langchain_community.llms import DeepSparse llm = DeepSparse(model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base_quant-none") llm.invoke("Tell me a joke.") """ifself.streaming:combined_output=""asyncforchunkinself._astream(prompt=prompt,stop=stop,run_manager=run_manager,**kwargs):combined_output+=chunk.texttext=combined_outputelse:text=(self.pipeline(sequences=prompt,**self.generation_config).generations[0].text)ifstopisnotNone:text=enforce_stop_tokens(text,stop)returntextdef_stream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[CallbackManagerForLLMRun]=None,**kwargs:Any,)->Iterator[GenerationChunk]:"""Yields results objects as they are generated in real time. It also calls the callback manager's on_llm_new_token event with similar parameters to the OpenAI LLM class method of the same name. Args: prompt: The prompt to pass into the model. stop: Optional list of stop words to use when generating. Returns: A generator representing the stream of tokens being generated. Yields: A dictionary like object containing a string token. Example: .. code-block:: python from langchain_community.llms import DeepSparse llm = DeepSparse( model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base_quant-none", streaming=True ) for chunk in llm.stream("Tell me a joke", stop=["'","\n"]): print(chunk, end='', flush=True) # noqa: T201 """inference=self.pipeline(sequences=prompt,streaming=True,**self.generation_config)fortokenininference:chunk=GenerationChunk(text=token.generations[0].text)ifrun_manager:run_manager.on_llm_new_token(token=chunk.text)yieldchunkasyncdef_astream(self,prompt:str,stop:Optional[List[str]]=None,run_manager:Optional[AsyncCallbackManagerForLLMRun]=None,**kwargs:Any,)->AsyncIterator[GenerationChunk]:"""Yields results objects as they are generated in real time. It also calls the callback manager's on_llm_new_token event with similar parameters to the OpenAI LLM class method of the same name. Args: prompt: The prompt to pass into the model. stop: Optional list of stop words to use when generating. Returns: A generator representing the stream of tokens being generated. Yields: A dictionary like object containing a string token. Example: .. code-block:: python from langchain_community.llms import DeepSparse llm = DeepSparse( model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base_quant-none", streaming=True ) for chunk in llm.stream("Tell me a joke", stop=["'","\n"]): print(chunk, end='', flush=True) # noqa: T201 """inference=self.pipeline(sequences=prompt,streaming=True,**self.generation_config)fortokenininference:chunk=GenerationChunk(text=token.generations[0].text)ifrun_manager:awaitrun_manager.on_llm_new_token(token=chunk.text)yieldchunk