[docs]classJohnSnowLabsEmbeddings(BaseModel,Embeddings):"""JohnSnowLabs embedding models To use, you should have the ``johnsnowlabs`` python package installed. Example: .. code-block:: python from langchain_community.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert') output = embedding.embed_query("foo bar") """# noqa: E501model:Any="embed_sentence.bert"def__init__(self,model:Any="embed_sentence.bert",hardware_target:str="cpu",**kwargs:Any,):"""Initialize the johnsnowlabs model."""super().__init__(**kwargs)# 1) Check importstry:fromjohnsnowlabsimportnlpfromnlu.pipe.pipelineimportNLUPipelineexceptImportErrorasexc:raiseImportError("Could not import johnsnowlabs python package. ""Please install it with `pip install johnsnowlabs`.")fromexc# 2) Start a Spark Sessiontry:os.environ["PYSPARK_PYTHON"]=sys.executableos.environ["PYSPARK_DRIVER_PYTHON"]=sys.executablenlp.start(hardware_target=hardware_target)exceptExceptionasexc:raiseException("Failure starting Spark Session")fromexc# 3) Load the modeltry:ifisinstance(model,str):self.model=nlp.load(model)elifisinstance(model,NLUPipeline):self.model=modelelse:self.model=nlp.to_nlu_pipe(model)exceptExceptionasexc:raiseException("Failure loading model")fromexcmodel_config=ConfigDict(extra="forbid",)
[docs]defembed_documents(self,texts:List[str])->List[List[float]]:"""Compute doc embeddings using a JohnSnowLabs transformer model. Args: texts: The list of texts to embed. Returns: List of embeddings, one for each text. """df=self.model.predict(texts,output_level="document")emb_col=Noneforcindf.columns:if"embedding"inc:emb_col=creturn[vec.tolist()forvecindf[emb_col].tolist()]
[docs]defembed_query(self,text:str)->List[float]:"""Compute query embeddings using a JohnSnowLabs transformer model. Args: text: The text to embed. Returns: Embeddings for the text. """returnself.embed_documents([text])[0]