[docs]classSpacyEmbeddings(BaseModel,Embeddings):"""Embeddings by spaCy models. Attributes: model_name (str): Name of a spaCy model. nlp (Any): The spaCy model loaded into memory. Methods: embed_documents(texts: List[str]) -> List[List[float]]: Generates embeddings for a list of documents. embed_query(text: str) -> List[float]: Generates an embedding for a single piece of text. """model_name:str="en_core_web_sm"nlp:Optional[Any]=NoneclassConfig:extra="forbid"@root_validator(pre=True)defvalidate_environment(cls,values:Dict)->Dict:""" Validates that the spaCy package and the model are installed. Args: values (Dict): The values provided to the class constructor. Returns: The validated values. Raises: ValueError: If the spaCy package or the model are not installed. """ifvalues.get("model_name")isNone:values["model_name"]="en_core_web_sm"model_name=values.get("model_name")# Check if the spaCy package is installedifimportlib.util.find_spec("spacy")isNone:raiseValueError("SpaCy package not found. ""Please install it with `pip install spacy`.")try:# Try to load the spaCy modelimportspacyvalues["nlp"]=spacy.load(model_name)exceptOSError:# If the model is not found, raise a ValueErrorraiseValueError(f"SpaCy model '{model_name}' not found. "f"Please install it with"f" `python -m spacy download {model_name}`""or provide a valid spaCy model name.")returnvalues# Return the validated values
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:""" Generates embeddings for a list of documents. Args: texts (List[str]): The documents to generate embeddings for. Returns: A list of embeddings, one for each document. """return[self.nlp(text).vector.tolist()fortextintexts]# type: ignore[misc]
[docs]defembed_query(self,text:str)->List[float]:""" Generates an embedding for a single piece of text. Args: text (str): The text to generate an embedding for. Returns: The embedding for the text. """returnself.nlp(text).vector.tolist()# type: ignore[misc]
[docs]asyncdefaembed_documents(self,texts:List[str])->List[List[float]]:""" Asynchronously generates embeddings for a list of documents. This method is not implemented and raises a NotImplementedError. Args: texts (List[str]): The documents to generate embeddings for. Raises: NotImplementedError: This method is not implemented. """raiseNotImplementedError("Asynchronous embedding generation is not supported.")
[docs]asyncdefaembed_query(self,text:str)->List[float]:""" Asynchronously generates an embedding for a single piece of text. This method is not implemented and raises a NotImplementedError. Args: text (str): The text to generate an embedding for. Raises: NotImplementedError: This method is not implemented. """raiseNotImplementedError("Asynchronous embedding generation is not supported.")