"""written under MIT Licence, Michael Feil 2023."""
import asyncio
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Callable, Dict, List, Optional, Tuple
import aiohttp
import numpy as np
import requests
from langchain_core.embeddings import Embeddings
from langchain_core.utils import get_from_dict_or_env
from pydantic import BaseModel, ConfigDict, model_validator
__all__ = ["InfinityEmbeddings"]
[docs]
class InfinityEmbeddings(BaseModel, Embeddings):
"""Self-hosted embedding models for `infinity` package.
See https://github.com/michaelfeil/infinity
This also works for text-embeddings-inference and other
self-hosted openai-compatible servers.
Infinity is a package to interact with Embedding Models on https://github.com/michaelfeil/infinity
Example:
.. code-block:: python
from langchain_community.embeddings import InfinityEmbeddings
InfinityEmbeddings(
model="BAAI/bge-small",
infinity_api_url="http://localhost:7997",
)
"""
model: str
"Underlying Infinity model id."
infinity_api_url: str = "http://localhost:7997"
"""Endpoint URL to use."""
client: Any = None #: :meta private:
"""Infinity client."""
# LLM call kwargs
model_config = ConfigDict(
extra="forbid",
)
@model_validator(mode="before")
@classmethod
def validate_environment(cls, values: Dict) -> Any:
"""Validate that api key and python package exists in environment."""
values["infinity_api_url"] = get_from_dict_or_env(
values, "infinity_api_url", "INFINITY_API_URL"
)
values["client"] = TinyAsyncOpenAIInfinityEmbeddingClient(
host=values["infinity_api_url"],
)
return values
[docs]
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Call out to Infinity's embedding endpoint.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
embeddings = self.client.embed(
model=self.model,
texts=texts,
)
return embeddings
[docs]
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
"""Async call out to Infinity's embedding endpoint.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
embeddings = await self.client.aembed(
model=self.model,
texts=texts,
)
return embeddings
[docs]
def embed_query(self, text: str) -> List[float]:
"""Call out to Infinity's embedding endpoint.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self.embed_documents([text])[0]
[docs]
async def aembed_query(self, text: str) -> List[float]:
"""Async call out to Infinity's embedding endpoint.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
embeddings = await self.aembed_documents([text])
return embeddings[0]
[docs]
class TinyAsyncOpenAIInfinityEmbeddingClient: #: :meta private:
"""Helper tool to embed Infinity.
It is not a part of Langchain's stable API,
direct use discouraged.
Example:
.. code-block:: python
mini_client = TinyAsyncInfinityEmbeddingClient(
)
embeds = mini_client.embed(
model="BAAI/bge-small",
text=["doc1", "doc2"]
)
# or
embeds = await mini_client.aembed(
model="BAAI/bge-small",
text=["doc1", "doc2"]
)
"""
[docs]
def __init__(
self,
host: str = "http://localhost:7797/v1",
aiosession: Optional[aiohttp.ClientSession] = None,
) -> None:
self.host = host
self.aiosession = aiosession
if self.host is None or len(self.host) < 3:
raise ValueError(" param `host` must be set to a valid url")
self._batch_size = 128
@staticmethod
def _permute(
texts: List[str], sorter: Callable = len
) -> Tuple[List[str], Callable]:
"""Sort texts in ascending order, and
delivers a lambda expr, which can sort a same length list
https://github.com/UKPLab/sentence-transformers/blob/
c5f93f70eca933c78695c5bc686ceda59651ae3b/sentence_transformers/SentenceTransformer.py#L156
Args:
texts (List[str]): _description_
sorter (Callable, optional): _description_. Defaults to len.
Returns:
Tuple[List[str], Callable]: _description_
Example:
```
texts = ["one","three","four"]
perm_texts, undo = self._permute(texts)
texts == undo(perm_texts)
```
"""
if len(texts) == 1:
# special case query
return texts, lambda t: t
length_sorted_idx = np.argsort([-sorter(sen) for sen in texts])
texts_sorted = [texts[idx] for idx in length_sorted_idx]
return texts_sorted, lambda unsorted_embeddings: [ # E731
unsorted_embeddings[idx] for idx in np.argsort(length_sorted_idx)
]
def _batch(self, texts: List[str]) -> List[List[str]]:
"""
splits Lists of text parts into batches of size max `self._batch_size`
When encoding vector database,
Args:
texts (List[str]): List of sentences
self._batch_size (int, optional): max batch size of one request.
Returns:
List[List[str]]: Batches of List of sentences
"""
if len(texts) == 1:
# special case query
return [texts]
batches = []
for start_index in range(0, len(texts), self._batch_size):
batches.append(texts[start_index : start_index + self._batch_size])
return batches
@staticmethod
def _unbatch(batch_of_texts: List[List[Any]]) -> List[Any]:
if len(batch_of_texts) == 1 and len(batch_of_texts[0]) == 1:
# special case query
return batch_of_texts[0]
texts = []
for sublist in batch_of_texts:
texts.extend(sublist)
return texts
def _kwargs_post_request(self, model: str, texts: List[str]) -> Dict[str, Any]:
"""Build the kwargs for the Post request, used by sync
Args:
model (str): _description_
texts (List[str]): _description_
Returns:
Dict[str, Collection[str]]: _description_
"""
return dict(
url=f"{self.host}/embeddings",
headers={
# "accept": "application/json",
"content-type": "application/json",
},
json=dict(
input=texts,
model=model,
),
)
def _sync_request_embed(
self, model: str, batch_texts: List[str]
) -> List[List[float]]:
response = requests.post(
**self._kwargs_post_request(model=model, texts=batch_texts)
)
if response.status_code != 200:
raise Exception(
f"Infinity returned an unexpected response with status "
f"{response.status_code}: {response.text}"
)
return [e["embedding"] for e in response.json()["data"]]
[docs]
def embed(self, model: str, texts: List[str]) -> List[List[float]]:
"""call the embedding of model
Args:
model (str): to embedding model
texts (List[str]): List of sentences to embed.
Returns:
List[List[float]]: List of vectors for each sentence
"""
perm_texts, unpermute_func = self._permute(texts)
perm_texts_batched = self._batch(perm_texts)
# Request
map_args = (
self._sync_request_embed,
[model] * len(perm_texts_batched),
perm_texts_batched,
)
if len(perm_texts_batched) == 1:
embeddings_batch_perm = list(map(*map_args))
else:
with ThreadPoolExecutor(32) as p:
embeddings_batch_perm = list(p.map(*map_args))
embeddings_perm = self._unbatch(embeddings_batch_perm)
embeddings = unpermute_func(embeddings_perm)
return embeddings
async def _async_request(
self, session: aiohttp.ClientSession, kwargs: Dict[str, Any]
) -> List[List[float]]:
async with session.post(**kwargs) as response:
if response.status != 200:
raise Exception(
f"Infinity returned an unexpected response with status "
f"{response.status}: {response.text}"
)
embedding = (await response.json())["data"]
return [e["embedding"] for e in embedding]
[docs]
async def aembed(self, model: str, texts: List[str]) -> List[List[float]]:
"""call the embedding of model, async method
Args:
model (str): to embedding model
texts (List[str]): List of sentences to embed.
Returns:
List[List[float]]: List of vectors for each sentence
"""
perm_texts, unpermute_func = self._permute(texts)
perm_texts_batched = self._batch(perm_texts)
# Request
async with aiohttp.ClientSession(
trust_env=True, connector=aiohttp.TCPConnector(limit=32)
) as session:
embeddings_batch_perm = await asyncio.gather(
*[
self._async_request(
session=session,
kwargs=self._kwargs_post_request(model=model, texts=t),
)
for t in perm_texts_batched
]
)
embeddings_perm = self._unbatch(embeddings_batch_perm)
embeddings = unpermute_func(embeddings_perm)
return embeddings