Source code for langchain_community.retrievers.elastic_search_bm25
"""Wrapper around Elasticsearch vector database."""from__future__importannotationsimportuuidfromtypingimportAny,Iterable,Listfromlangchain_core.callbacksimportCallbackManagerForRetrieverRunfromlangchain_core.documentsimportDocumentfromlangchain_core.retrieversimportBaseRetriever
[docs]classElasticSearchBM25Retriever(BaseRetriever):"""`Elasticsearch` retriever that uses `BM25`. To connect to an Elasticsearch instance that requires login credentials, including Elastic Cloud, use the Elasticsearch URL format https://username:password@es_host:9243. For example, to connect to Elastic Cloud, create the Elasticsearch URL with the required authentication details and pass it to the ElasticVectorSearch constructor as the named parameter elasticsearch_url. You can obtain your Elastic Cloud URL and login credentials by logging in to the Elastic Cloud console at https://cloud.elastic.co, selecting your deployment, and navigating to the "Deployments" page. To obtain your Elastic Cloud password for the default "elastic" user: 1. Log in to the Elastic Cloud console at https://cloud.elastic.co 2. Go to "Security" > "Users" 3. Locate the "elastic" user and click "Edit" 4. Click "Reset password" 5. Follow the prompts to reset the password The format for Elastic Cloud URLs is https://username:password@cluster_id.region_id.gcp.cloud.es.io:9243. """client:Any"""Elasticsearch client."""index_name:str"""Name of the index to use in Elasticsearch."""
[docs]@classmethoddefcreate(cls,elasticsearch_url:str,index_name:str,k1:float=2.0,b:float=0.75)->ElasticSearchBM25Retriever:""" Create a ElasticSearchBM25Retriever from a list of texts. Args: elasticsearch_url: URL of the Elasticsearch instance to connect to. index_name: Name of the index to use in Elasticsearch. k1: BM25 parameter k1. b: BM25 parameter b. Returns: """fromelasticsearchimportElasticsearch# Create an Elasticsearch client instancees=Elasticsearch(elasticsearch_url)# Define the index settings and mappingssettings={"analysis":{"analyzer":{"default":{"type":"standard"}}},"similarity":{"custom_bm25":{"type":"BM25","k1":k1,"b":b,}},}mappings={"properties":{"content":{"type":"text","similarity":"custom_bm25",# Use the custom BM25 similarity}}}# Create the index with the specified settings and mappingses.indices.create(index=index_name,mappings=mappings,settings=settings)returncls(client=es,index_name=index_name)
[docs]defadd_texts(self,texts:Iterable[str],refresh_indices:bool=True,)->List[str]:"""Run more texts through the embeddings and add to the retriever. Args: texts: Iterable of strings to add to the retriever. refresh_indices: bool to refresh ElasticSearch indices Returns: List of ids from adding the texts into the retriever. """try:fromelasticsearch.helpersimportbulkexceptImportError:raiseImportError("Could not import elasticsearch python package. ""Please install it with `pip install elasticsearch`.")requests=[]ids=[]fori,textinenumerate(texts):_id=str(uuid.uuid4())request={"_op_type":"index","_index":self.index_name,"content":text,"_id":_id,}ids.append(_id)requests.append(request)bulk(self.client,requests)ifrefresh_indices:self.client.indices.refresh(index=self.index_name)returnids