Source code for langchain_community.callbacks.fiddler_callback
importtimefromtypingimportAny,Dict,List,OptionalfromuuidimportUUIDfromlangchain_core.callbacksimportBaseCallbackHandlerfromlangchain_core.outputsimportLLMResultfromlangchain_core.utilsimportguard_importfromlangchain_community.callbacks.utilsimportimport_pandas# Define constants# LLMResult keysTOKEN_USAGE="token_usage"TOTAL_TOKENS="total_tokens"PROMPT_TOKENS="prompt_tokens"COMPLETION_TOKENS="completion_tokens"RUN_ID="run_id"MODEL_NAME="model_name"GOOD="good"BAD="bad"NEUTRAL="neutral"SUCCESS="success"FAILURE="failure"# Default valuesDEFAULT_MAX_TOKEN=65536DEFAULT_MAX_DURATION=120000# Fiddler specific constantsPROMPT="prompt"RESPONSE="response"CONTEXT="context"DURATION="duration"FEEDBACK="feedback"LLM_STATUS="llm_status"FEEDBACK_POSSIBLE_VALUES=[GOOD,BAD,NEUTRAL]# Define a dataset dictionary_dataset_dict={PROMPT:["fiddler"]*10,RESPONSE:["fiddler"]*10,CONTEXT:["fiddler"]*10,FEEDBACK:["good"]*10,LLM_STATUS:["success"]*10,MODEL_NAME:["fiddler"]*10,RUN_ID:["123e4567-e89b-12d3-a456-426614174000"]*10,TOTAL_TOKENS:[0,DEFAULT_MAX_TOKEN]*5,PROMPT_TOKENS:[0,DEFAULT_MAX_TOKEN]*5,COMPLETION_TOKENS:[0,DEFAULT_MAX_TOKEN]*5,DURATION:[1,DEFAULT_MAX_DURATION]*5,}
[docs]defimport_fiddler()->Any:"""Import the fiddler python package and raise an error if it is not installed."""returnguard_import("fiddler",pip_name="fiddler-client")
[docs]def__init__(self,url:str,org:str,project:str,model:str,api_key:str,)->None:""" Initialize Fiddler callback handler. Args: url: Fiddler URL (e.g. https://demo.fiddler.ai). Make sure to include the protocol (http/https). org: Fiddler organization id project: Fiddler project name to publish events to model: Fiddler model name to publish events to api_key: Fiddler authentication token """super().__init__()# Initialize Fiddler client and other necessary propertiesself.fdl=import_fiddler()self.pd=import_pandas()self.url=urlself.org=orgself.project=projectself.model=modelself.api_key=api_keyself._df=self.pd.DataFrame(_dataset_dict)self.run_id_prompts:Dict[UUID,List[str]]={}self.run_id_response:Dict[UUID,List[str]]={}self.run_id_starttime:Dict[UUID,int]={}# Initialize Fiddler client hereself.fiddler_client=self.fdl.FiddlerApi(url,org_id=org,auth_token=api_key)ifself.projectnotinself.fiddler_client.get_project_names():print(# noqa: T201f"adding project {self.project}.This only has to be done once.")try:self.fiddler_client.add_project(self.project)exceptExceptionase:print(# noqa: T201f"Error adding project {self.project}:""{e}. Fiddler integration will not work.")raiseedataset_info=self.fdl.DatasetInfo.from_dataframe(self._df,max_inferred_cardinality=0)# Set feedback column to categoricalforiinrange(len(dataset_info.columns)):ifdataset_info.columns[i].name==FEEDBACK:dataset_info.columns[i].data_type=self.fdl.DataType.CATEGORYdataset_info.columns[i].possible_values=FEEDBACK_POSSIBLE_VALUESelifdataset_info.columns[i].name==LLM_STATUS:dataset_info.columns[i].data_type=self.fdl.DataType.CATEGORYdataset_info.columns[i].possible_values=[SUCCESS,FAILURE]ifself.modelnotinself.fiddler_client.get_model_names(self.project):ifself.modelnotinself.fiddler_client.get_dataset_names(self.project):print(# noqa: T201f"adding dataset {self.model} to project {self.project}.""This only has to be done once.")try:self.fiddler_client.upload_dataset(project_id=self.project,dataset_id=self.model,dataset={"train":self._df},info=dataset_info,)exceptExceptionase:print(# noqa: T201f"Error adding dataset {self.model}: {e}.""Fiddler integration will not work.")raiseemodel_info=self.fdl.ModelInfo.from_dataset_info(dataset_info=dataset_info,dataset_id="train",model_task=self.fdl.ModelTask.LLM,features=[PROMPT,CONTEXT,RESPONSE],target=FEEDBACK,metadata_cols=[RUN_ID,TOTAL_TOKENS,PROMPT_TOKENS,COMPLETION_TOKENS,MODEL_NAME,DURATION,],custom_features=self.custom_features,)print(# noqa: T201f"adding model {self.model} to project {self.project}.""This only has to be done once.")try:self.fiddler_client.add_model(project_id=self.project,dataset_id=self.model,model_id=self.model,model_info=model_info,)exceptExceptionase:print(# noqa: T201f"Error adding model {self.model}: {e}.""Fiddler integration will not work.")raisee
@propertydefcustom_features(self)->list:""" Define custom features for the model to automatically enrich the data with. Here, we enable the following enrichments: - Automatic Embedding generation for prompt and response - Text Statistics such as: - Automated Readability Index - Coleman Liau Index - Dale Chall Readability Score - Difficult Words - Flesch Reading Ease - Flesch Kincaid Grade - Gunning Fog - Linsear Write Formula - PII - Personal Identifiable Information - Sentiment Analysis """return[self.fdl.Enrichment(name="Prompt Embedding",enrichment="embedding",columns=[PROMPT],),self.fdl.TextEmbedding(name="Prompt CF",source_column=PROMPT,column="Prompt Embedding",),self.fdl.Enrichment(name="Response Embedding",enrichment="embedding",columns=[RESPONSE],),self.fdl.TextEmbedding(name="Response CF",source_column=RESPONSE,column="Response Embedding",),self.fdl.Enrichment(name="Text Statistics",enrichment="textstat",columns=[PROMPT,RESPONSE],config={"statistics":["automated_readability_index","coleman_liau_index","dale_chall_readability_score","difficult_words","flesch_reading_ease","flesch_kincaid_grade","gunning_fog","linsear_write_formula",]},),self.fdl.Enrichment(name="PII",enrichment="pii",columns=[PROMPT,RESPONSE],),self.fdl.Enrichment(name="Sentiment",enrichment="sentiment",columns=[PROMPT,RESPONSE],),]def_publish_events(self,run_id:UUID,prompt_responses:List[str],duration:int,llm_status:str,model_name:Optional[str]="",token_usage_dict:Optional[Dict[str,Any]]=None,)->None:""" Publish events to fiddler """prompt_count=len(self.run_id_prompts[run_id])df=self.pd.DataFrame({PROMPT:self.run_id_prompts[run_id],RESPONSE:prompt_responses,RUN_ID:[str(run_id)]*prompt_count,DURATION:[duration]*prompt_count,LLM_STATUS:[llm_status]*prompt_count,MODEL_NAME:[model_name]*prompt_count,})iftoken_usage_dict:forkey,valueintoken_usage_dict.items():df[key]=[value]*prompt_countifisinstance(value,int)elsevaluetry:ifdf.shape[0]>1:self.fiddler_client.publish_events_batch(self.project,self.model,df)else:df_dict=df.to_dict(orient="records")self.fiddler_client.publish_event(self.project,self.model,event=df_dict[0])exceptExceptionase:print(# noqa: T201f"Error publishing events to fiddler: {e}. continuing...")