"""Loading datasets and evaluators."""fromtypingimportAny,Dict,List,Optional,Sequence,Type,Unionfromlangchain_core.language_modelsimportBaseLanguageModelfromlangchain.chains.baseimportChainfromlangchain.evaluation.agents.trajectory_eval_chainimportTrajectoryEvalChainfromlangchain.evaluation.comparisonimportPairwiseStringEvalChainfromlangchain.evaluation.comparison.eval_chainimportLabeledPairwiseStringEvalChainfromlangchain.evaluation.criteria.eval_chainimport(CriteriaEvalChain,LabeledCriteriaEvalChain,)fromlangchain.evaluation.embedding_distance.baseimport(EmbeddingDistanceEvalChain,PairwiseEmbeddingDistanceEvalChain,)fromlangchain.evaluation.exact_match.baseimportExactMatchStringEvaluatorfromlangchain.evaluation.parsing.baseimport(JsonEqualityEvaluator,JsonValidityEvaluator,)fromlangchain.evaluation.parsing.json_distanceimportJsonEditDistanceEvaluatorfromlangchain.evaluation.parsing.json_schemaimportJsonSchemaEvaluatorfromlangchain.evaluation.qaimportContextQAEvalChain,CotQAEvalChain,QAEvalChainfromlangchain.evaluation.regex_match.baseimportRegexMatchStringEvaluatorfromlangchain.evaluation.schemaimportEvaluatorType,LLMEvalChain,StringEvaluatorfromlangchain.evaluation.scoring.eval_chainimport(LabeledScoreStringEvalChain,ScoreStringEvalChain,)fromlangchain.evaluation.string_distance.baseimport(PairwiseStringDistanceEvalChain,StringDistanceEvalChain,)
[docs]defload_dataset(uri:str)->List[Dict]:"""Load a dataset from the `LangChainDatasets on HuggingFace <https://huggingface.co/LangChainDatasets>`_. Args: uri: The uri of the dataset to load. Returns: A list of dictionaries, each representing a row in the dataset. **Prerequisites** .. code-block:: shell pip install datasets Examples -------- .. code-block:: python from langchain.evaluation import load_dataset ds = load_dataset("llm-math") """try:fromdatasetsimportload_datasetexceptImportError:raiseImportError("load_dataset requires the `datasets` package."" Please install with `pip install datasets`")dataset=load_dataset(f"LangChainDatasets/{uri}")return[dfordindataset["train"]]
[docs]defload_evaluator(evaluator:EvaluatorType,*,llm:Optional[BaseLanguageModel]=None,**kwargs:Any,)->Union[Chain,StringEvaluator]:"""Load the requested evaluation chain specified by a string. Parameters ---------- evaluator : EvaluatorType The type of evaluator to load. llm : BaseLanguageModel, optional The language model to use for evaluation, by default None **kwargs : Any Additional keyword arguments to pass to the evaluator. Returns ------- Chain The loaded evaluation chain. Examples -------- >>> from langchain.evaluation import load_evaluator, EvaluatorType >>> evaluator = load_evaluator(EvaluatorType.QA) """ifevaluatornotin_EVALUATOR_MAP:raiseValueError(f"Unknown evaluator type: {evaluator}"f"\nValid types are: {list(_EVALUATOR_MAP.keys())}")evaluator_cls=_EVALUATOR_MAP[evaluator]ifissubclass(evaluator_cls,LLMEvalChain):try:try:fromlangchain_openaiimportChatOpenAIexceptImportError:try:fromlangchain_community.chat_models.openaiimportChatOpenAIexceptImportError:raiseImportError("Could not import langchain_openai or fallback onto ""langchain_community. Please install langchain_openai ""or specify a language model explicitly. ""It's recommended to install langchain_openai AND ""specify a language model explicitly.")llm=llmorChatOpenAI(# type: ignore[call-arg]model="gpt-4",seed=42,temperature=0)exceptExceptionase:raiseValueError(f"Evaluation with the {evaluator_cls} requires a ""language model to function."" Failed to create the default 'gpt-4' model."" Please manually provide an evaluation LLM"" or check your openai credentials.")fromereturnevaluator_cls.from_llm(llm=llm,**kwargs)else:returnevaluator_cls(**kwargs)
[docs]defload_evaluators(evaluators:Sequence[EvaluatorType],*,llm:Optional[BaseLanguageModel]=None,config:Optional[dict]=None,**kwargs:Any,)->List[Union[Chain,StringEvaluator]]:"""Load evaluators specified by a list of evaluator types. Parameters ---------- evaluators : Sequence[EvaluatorType] The list of evaluator types to load. llm : BaseLanguageModel, optional The language model to use for evaluation, if none is provided, a default ChatOpenAI gpt-4 model will be used. config : dict, optional A dictionary mapping evaluator types to additional keyword arguments, by default None **kwargs : Any Additional keyword arguments to pass to all evaluators. Returns ------- List[Chain] The loaded evaluators. Examples -------- >>> from langchain.evaluation import load_evaluators, EvaluatorType >>> evaluators = [EvaluatorType.QA, EvaluatorType.CRITERIA] >>> loaded_evaluators = load_evaluators(evaluators, criteria="helpfulness") """loaded=[]forevaluatorinevaluators:_kwargs=config.get(evaluator,{})ifconfigelse{}loaded.append(load_evaluator(evaluator,llm=llm,**{**kwargs,**_kwargs}))returnloaded