[docs]defstringify_embedding(embedding:List)->str:"""Convert an embedding to a string."""return" ".join([f"{i}:{e}"fori,einenumerate(embedding)])
[docs]defis_stringtype_instance(item:Any)->bool:"""Check if an item is a string."""returnisinstance(item,str)or(isinstance(item,_Embed)andisinstance(item.value,str))
[docs]defembed_string_type(item:Union[str,_Embed],model:Any,namespace:Optional[str]=None)->Dict[str,Union[str,List[str]]]:"""Embed a string or an _Embed object."""keep_str=""ifisinstance(item,_Embed):encoded=stringify_embedding(model.encode(item.value))ifitem.keep:keep_str=item.value.replace(" ","_")+" "elifisinstance(item,str):encoded=item.replace(" ","_")else:raiseValueError(f"Unsupported type {type(item)} for embedding")ifnamespaceisNone:raiseValueError("The default namespace must be provided when embedding a string or _Embed object."# noqa: E501)return{namespace:keep_str+encoded}
[docs]defembed_dict_type(item:Dict,model:Any)->Dict[str,Any]:"""Embed a dictionary item."""inner_dict:Dict={}forns,embed_iteminitem.items():ifisinstance(embed_item,list):inner_dict[ns]=[]forembed_list_iteminembed_item:embedded=embed_string_type(embed_list_item,model,ns)inner_dict[ns].append(embedded[ns])else:inner_dict.update(embed_string_type(embed_item,model,ns))returninner_dict
[docs]defembed_list_type(item:list,model:Any,namespace:Optional[str]=None)->List[Dict[str,Union[str,List[str]]]]:"""Embed a list item."""ret_list:List=[]forembed_iteminitem:ifisinstance(embed_item,dict):ret_list.append(embed_dict_type(embed_item,model))elifisinstance(embed_item,list):item_embedding=embed_list_type(embed_item,model,namespace)# Get the first key from the first dictionaryfirst_key=next(iter(item_embedding[0]))# Group the values under that keygrouping={first_key:[item[first_key]foriteminitem_embedding]}ret_list.append(grouping)else:ret_list.append(embed_string_type(embed_item,model,namespace))returnret_list
[docs]defembed(to_embed:Union[Union[str,_Embed],Dict,List[Union[str,_Embed]],List[Dict]],model:Any,namespace:Optional[str]=None,)->List[Dict[str,Union[str,List[str]]]]:""" Embed the actions or context using the SentenceTransformer model (or a model that has an `encode` function). Attributes: to_embed: (Union[Union(str, _Embed(str)), Dict, List[Union(str, _Embed(str))], List[Dict]], required) The text to be embedded, either a string, a list of strings or a dictionary or a list of dictionaries. namespace: (str, optional) The default namespace to use when dictionary or list of dictionaries not provided. model: (Any, required) The model to use for embedding Returns: List[Dict[str, str]]: A list of dictionaries where each dictionary has the namespace as the key and the embedded string as the value """# noqa: E501if(isinstance(to_embed,_Embed)andisinstance(to_embed.value,str))orisinstance(to_embed,str):return[embed_string_type(to_embed,model,namespace)]elifisinstance(to_embed,dict):return[embed_dict_type(to_embed,model)]elifisinstance(to_embed,list):returnembed_list_type(to_embed,model,namespace)else:raiseValueError("Invalid input format for embedding")