Skip to main content

China Mobile ECloud ElasticSearch VectorSearch

China Mobile ECloud VectorSearch is a fully managed, enterprise-level distributed search and analysis service. China Mobile ECloud VectorSearch provides low-cost, high-performance, and reliable retrieval and analysis platform level product services for structured/unstructured data. As a vector database , it supports multiple index types and similarity distance methods.

This notebook shows how to use functionality related to the ECloud ElasticSearch VectorStore. To run, you should have an China Mobile ECloud VectorSearch instance up and running:

Read the help document to quickly familiarize and configure China Mobile ECloud ElasticSearch instance.

After the instance is up and running, follow these steps to split documents, get embeddings, connect to the baidu cloud elasticsearch instance, index documents, and perform vector retrieval.

#!pip install elasticsearch == 7.10.1

First, we want to use OpenAIEmbeddings so we have to get the OpenAI API Key.

import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

Secondly, split documents and get embeddings.

from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import EcloudESVectorStore
loader = TextLoader("../../../state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

ES_URL = "http://localhost:9200"
USER = "your user name"
PASSWORD = "your password"
indexname = "your index name"

then, index documents

docsearch = EcloudESVectorStore.from_documents(
docs,
embeddings,
es_url=ES_URL,
user=USER,
password=PASSWORD,
index_name=indexname,
refresh_indices=True,
)

Finally, Query and retrive data

query = "What did the president say about Ketanji Brown Jackson"
docs = docsearch.similarity_search(query, k=10)
print(docs[0].page_content)

A commonly used case

def test_dense_float_vectore_lsh_cosine() -> None:
"""
Test indexing with vectore type knn_dense_float_vector and model-similarity of lsh-cosine
this mapping is compatible with model of exact and similarity of l2/cosine
this mapping is compatible with model of lsh and similarity of cosine
"""
docsearch = EcloudESVectorStore.from_documents(
docs,
embeddings,
es_url=ES_URL,
user=USER,
password=PASSWORD,
index_name=indexname,
refresh_indices=True,
text_field="my_text",
vector_field="my_vec",
vector_type="knn_dense_float_vector",
vector_params={"model": "lsh", "similarity": "cosine", "L": 99, "k": 1},
)

docs = docsearch.similarity_search(
query,
k=10,
search_params={
"model": "exact",
"vector_field": "my_vec",
"text_field": "my_text",
},
)
print(docs[0].page_content)

docs = docsearch.similarity_search(
query,
k=10,
search_params={
"model": "exact",
"similarity": "l2",
"vector_field": "my_vec",
"text_field": "my_text",
},
)
print(docs[0].page_content)

docs = docsearch.similarity_search(
query,
k=10,
search_params={
"model": "exact",
"similarity": "cosine",
"vector_field": "my_vec",
"text_field": "my_text",
},
)
print(docs[0].page_content)

docs = docsearch.similarity_search(
query,
k=10,
search_params={
"model": "lsh",
"similarity": "cosine",
"candidates": 10,
"vector_field": "my_vec",
"text_field": "my_text",
},
)
print(docs[0].page_content)

With filter case

def test_dense_float_vectore_exact_with_filter() -> None:
"""
Test indexing with vectore type knn_dense_float_vector and default model/similarity
this mapping is compatible with model of exact and similarity of l2/cosine
"""
docsearch = EcloudESVectorStore.from_documents(
docs,
embeddings,
es_url=ES_URL,
user=USER,
password=PASSWORD,
index_name=indexname,
refresh_indices=True,
text_field="my_text",
vector_field="my_vec",
vector_type="knn_dense_float_vector",
)
# filter={"match_all": {}} ,default
docs = docsearch.similarity_search(
query,
k=10,
filter={"match_all": {}},
search_params={
"model": "exact",
"vector_field": "my_vec",
"text_field": "my_text",
},
)
print(docs[0].page_content)

# filter={"term": {"my_text": "Jackson"}}
docs = docsearch.similarity_search(
query,
k=10,
filter={"term": {"my_text": "Jackson"}},
search_params={
"model": "exact",
"vector_field": "my_vec",
"text_field": "my_text",
},
)
print(docs[0].page_content)

# filter={"term": {"my_text": "president"}}
docs = docsearch.similarity_search(
query,
k=10,
filter={"term": {"my_text": "president"}},
search_params={
"model": "exact",
"similarity": "l2",
"vector_field": "my_vec",
"text_field": "my_text",
},
)
print(docs[0].page_content)

Help us out by providing feedback on this documentation page: