[docs]defcreate_index(contexts:List[str],embeddings:Embeddings)->np.ndarray:""" Create an index of embeddings for a list of contexts. Args: contexts: List of contexts to embed. embeddings: Embeddings model to use. Returns: Index of embeddings. """withconcurrent.futures.ThreadPoolExecutor()asexecutor:returnnp.array(list(executor.map(embeddings.embed_query,contexts)))
[docs]classNanoPQRetriever(BaseRetriever):"""`NanoPQ retriever."""embeddings:Embeddings"""Embeddings model to use."""index:Any=None"""Index of embeddings."""texts:List[str]"""List of texts to index."""metadatas:Optional[List[dict]]=None"""List of metadatas corresponding with each text."""k:int=4"""Number of results to return."""relevancy_threshold:Optional[float]=None"""Threshold for relevancy."""subspace:int=4"""No of subspaces to be created, should be a multiple of embedding shape"""clusters:int=128"""No of clusters to be created"""model_config=ConfigDict(arbitrary_types_allowed=True,)
def_get_relevant_documents(self,query:str,*,run_manager:CallbackManagerForRetrieverRun)->List[Document]:try:fromnanopqimportPQexceptImportError:raiseImportError("Could not import nanopq, please install with `pip install nanopq`.")query_embeds=np.array(self.embeddings.embed_query(query))try:pq=PQ(M=self.subspace,Ks=self.clusters,verbose=True).fit(self.index.astype("float32"))exceptAssertionError:error_message=("Received params: training_sample={training_sample}, ""n_cluster={n_clusters}, subspace={subspace}, ""embedding_shape={embedding_shape}. Issue with the combination. ""Please retrace back to find the exact error").format(training_sample=self.index.shape[0],n_clusters=self.clusters,subspace=self.subspace,embedding_shape=self.index.shape[1],)raiseRuntimeError(error_message)index_code=pq.encode(vecs=self.index.astype("float32"))dt=pq.dtable(query=query_embeds.astype("float32"))dists=dt.adist(codes=index_code)sorted_ix=np.argsort(dists)top_k_results=[Document(page_content=self.texts[row],metadata=self.metadatas[row]ifself.metadataselse{},)forrowinsorted_ix[0:self.k]]returntop_k_results