VectorStores#

Wrappers on top of vector stores.

class langchain.vectorstores.AtlasDB(name: str, embedding_function: Optional[langchain.embeddings.base.Embeddings] = None, api_key: Optional[str] = None, description: str = 'A description for your project', is_public: bool = True, reset_project_if_exists: bool = False)[source]#

Wrapper around Atlas: Nomic’s neural database and rhizomatic instrument.

To use, you should have the nomic python package installed.

Example

from langchain.vectorstores import AtlasDB
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
vectorstore = AtlasDB("my_project", embeddings.embed_query)
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, refresh: bool = True, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts (Iterable[str]) – Texts to add to the vectorstore.

  • metadatas (Optional[List[dict]], optional) – Optional list of metadatas.

  • ids (Optional[List[str]]) – An optional list of ids.

  • refresh (bool) – Whether or not to refresh indices with the updated data. Default True.

Returns

List of IDs of the added texts.

Return type

List[str]

create_index(**kwargs: Any) Any[source]#

Creates an index in your project.

See https://docs.nomic.ai/atlas_api.html#nomic.project.AtlasProject.create_index for full detail.

classmethod from_documents(documents: List[langchain.schema.Document], embedding: Optional[langchain.embeddings.base.Embeddings] = None, ids: Optional[List[str]] = None, name: Optional[str] = None, api_key: Optional[str] = None, persist_directory: Optional[str] = None, description: str = 'A description for your project', is_public: bool = True, reset_project_if_exists: bool = False, index_kwargs: Optional[dict] = None, **kwargs: Any) langchain.vectorstores.atlas.AtlasDB[source]#

Create an AtlasDB vectorstore from a list of documents.

Parameters
  • name (str) – Name of the collection to create.

  • api_key (str) – Your nomic API key,

  • documents (List[Document]) – List of documents to add to the vectorstore.

  • embedding (Optional[Embeddings]) – Embedding function. Defaults to None.

  • ids (Optional[List[str]]) – Optional list of document IDs. If None, ids will be auto created

  • description (str) – A description for your project.

  • is_public (bool) – Whether your project is publicly accessible. True by default.

  • reset_project_if_exists (bool) – Whether to reset this project if it already exists. Default False. Generally userful during development and testing.

  • index_kwargs (Optional[dict]) – Dict of kwargs for index creation. See https://docs.nomic.ai/atlas_api.html

Returns

Nomic’s neural database and finest rhizomatic instrument

Return type

AtlasDB

classmethod from_texts(texts: List[str], embedding: Optional[langchain.embeddings.base.Embeddings] = None, metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, name: Optional[str] = None, api_key: Optional[str] = None, description: str = 'A description for your project', is_public: bool = True, reset_project_if_exists: bool = False, index_kwargs: Optional[dict] = None, **kwargs: Any) langchain.vectorstores.atlas.AtlasDB[source]#

Create an AtlasDB vectorstore from a raw documents.

Parameters
  • texts (List[str]) – The list of texts to ingest.

  • name (str) – Name of the project to create.

  • api_key (str) – Your nomic API key,

  • embedding (Optional[Embeddings]) – Embedding function. Defaults to None.

  • metadatas (Optional[List[dict]]) – List of metadatas. Defaults to None.

  • ids (Optional[List[str]]) – Optional list of document IDs. If None, ids will be auto created

  • description (str) – A description for your project.

  • is_public (bool) – Whether your project is publicly accessible. True by default.

  • reset_project_if_exists (bool) – Whether to reset this project if it already exists. Default False. Generally userful during development and testing.

  • index_kwargs (Optional[dict]) – Dict of kwargs for index creation. See https://docs.nomic.ai/atlas_api.html

Returns

Nomic’s neural database and finest rhizomatic instrument

Return type

AtlasDB

Run similarity search with AtlasDB

Parameters
  • query (str) – Query text to search for.

  • k (int) – Number of results to return. Defaults to 4.

Returns

List of documents most similar to the query text.

Return type

List[Document]

class langchain.vectorstores.Chroma(collection_name: str = 'langchain', embedding_function: Optional[Embeddings] = None, persist_directory: Optional[str] = None, client_settings: Optional[chromadb.config.Settings] = None)[source]#

Wrapper around ChromaDB embeddings platform.

To use, you should have the chromadb python package installed.

Example

from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
vectorstore = Chroma("langchain_store", embeddings.embed_query)
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts (Iterable[str]) – Texts to add to the vectorstore.

  • metadatas (Optional[List[dict]], optional) – Optional list of metadatas.

  • ids (Optional[List[str]], optional) – Optional list of IDs.

Returns

List of IDs of the added texts.

Return type

List[str]

delete_collection() None[source]#

Delete the collection.

classmethod from_documents(documents: List[Document], embedding: Optional[Embeddings] = None, ids: Optional[List[str]] = None, collection_name: str = 'langchain', persist_directory: Optional[str] = None, client_settings: Optional[chromadb.config.Settings] = None, **kwargs: Any) Chroma[source]#

Create a Chroma vectorstore from a list of documents.

If a persist_directory is specified, the collection will be persisted there. Otherwise, the data will be ephemeral in-memory.

Parameters
  • collection_name (str) – Name of the collection to create.

  • persist_directory (Optional[str]) – Directory to persist the collection.

  • ids (Optional[List[str]]) – List of document IDs. Defaults to None.

  • documents (List[Document]) – List of documents to add to the vectorstore.

  • embedding (Optional[Embeddings]) – Embedding function. Defaults to None.

  • client_settings (Optional[chromadb.config.Settings]) – Chroma client settings

Returns

Chroma vectorstore.

Return type

Chroma

classmethod from_texts(texts: List[str], embedding: Optional[Embeddings] = None, metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, collection_name: str = 'langchain', persist_directory: Optional[str] = None, client_settings: Optional[chromadb.config.Settings] = None, **kwargs: Any) Chroma[source]#

Create a Chroma vectorstore from a raw documents.

If a persist_directory is specified, the collection will be persisted there. Otherwise, the data will be ephemeral in-memory.

Parameters
  • texts (List[str]) – List of texts to add to the collection.

  • collection_name (str) – Name of the collection to create.

  • persist_directory (Optional[str]) – Directory to persist the collection.

  • embedding (Optional[Embeddings]) – Embedding function. Defaults to None.

  • metadatas (Optional[List[dict]]) – List of metadatas. Defaults to None.

  • ids (Optional[List[str]]) – List of document IDs. Defaults to None.

  • client_settings (Optional[chromadb.config.Settings]) – Chroma client settings

Returns

Chroma vectorstore.

Return type

Chroma

persist() None[source]#

Persist the collection.

This can be used to explicitly persist the data to disk. It will also be called automatically when the object is destroyed.

Run similarity search with Chroma.

Parameters
  • query (str) – Query text to search for.

  • k (int) – Number of results to return. Defaults to 4.

  • filter (Optional[Dict[str, str]]) – Filter by metadata. Defaults to None.

Returns

List of documents most similar to the query text.

Return type

List[Document]

similarity_search_by_vector(embedding: List[float], k: int = 4, filter: Optional[Dict[str, str]] = None, **kwargs: Any) List[langchain.schema.Document][source]#

Return docs most similar to embedding vector. :param embedding: Embedding to look up documents similar to. :param k: Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the query vector.

similarity_search_with_score(query: str, k: int = 4, filter: Optional[Dict[str, str]] = None, **kwargs: Any) List[Tuple[langchain.schema.Document, float]][source]#

Run similarity search with Chroma with distance.

Parameters
  • query (str) – Query text to search for.

  • k (int) – Number of results to return. Defaults to 4.

  • filter (Optional[Dict[str, str]]) – Filter by metadata. Defaults to None.

Returns

List of documents most similar to the query

text with distance in float.

Return type

List[Tuple[Document, float]]

class langchain.vectorstores.DeepLake(dataset_path: str = 'mem://langchain', token: Optional[str] = None, embedding_function: Optional[langchain.embeddings.base.Embeddings] = None)[source]#

Wrapper around Deep Lake, a data lake for deep learning applications.

It not only stores embeddings, but also the original data and queries with version control automatically enabled.

It is more than just a vector store. You can use the dataset to fine-tune your own LLM models or use it for other downstream tasks.

We implement naive similiarity search, but it can be extended with Tensor Query Language (TQL for production use cases) over billion rows.

To use, you should have the deeplake python package installed.

Example

from langchain.vectorstores import DeepLake
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
vectorstore = DeepLake("langchain_store", embeddings.embed_query)
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts (Iterable[str]) – Texts to add to the vectorstore.

  • metadatas (Optional[List[dict]], optional) – Optional list of metadatas.

  • ids (Optional[List[str]], optional) – Optional list of IDs.

Returns

List of IDs of the added texts.

Return type

List[str]

delete_dataset() None[source]#

Delete the collection.

classmethod from_texts(texts: List[str], embedding: Optional[langchain.embeddings.base.Embeddings] = None, metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, dataset_path: str = 'mem://langchain', **kwargs: Any) langchain.vectorstores.deeplake.DeepLake[source]#

Create a Deep Lake dataset from a raw documents.

If a persist_directory is specified, the collection will be persisted there. Otherwise, the data will be ephemeral in-memory.

Parameters
  • path (str, pathlib.Path) –

    • The full path to the dataset. Can be:

    • a Deep Lake cloud path of the form hub://username/datasetname.

      To write to Deep Lake cloud datasets, ensure that you are logged in to Deep Lake (use β€˜activeloop login’ from command line)

    • an s3 path of the form s3://bucketname/path/to/dataset.

      Credentials are required in either the environment or passed to the creds argument.

    • a local file system path of the form ./path/to/dataset or

      ~/path/to/dataset or path/to/dataset.

    • a memory path of the form mem://path/to/dataset which doesn’t

      save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.

  • documents (List[Document]) – List of documents to add.

  • embedding (Optional[Embeddings]) – Embedding function. Defaults to None.

  • metadatas (Optional[List[dict]]) – List of metadatas. Defaults to None.

  • ids (Optional[List[str]]) – List of document IDs. Defaults to None.

Returns

Deep Lake dataset.

Return type

DeepLake

persist() None[source]#

Persist the collection.

Return docs most similar to query.

class langchain.vectorstores.ElasticVectorSearch(elasticsearch_url: str, index_name: str, embedding: langchain.embeddings.base.Embeddings)[source]#

Wrapper around Elasticsearch as a vector database.

Example

from langchain import ElasticVectorSearch
elastic_vector_search = ElasticVectorSearch(
    "http://localhost:9200",
    "embeddings",
    embedding
)
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts – Iterable of strings to add to the vectorstore.

  • metadatas – Optional list of metadatas associated with the texts.

Returns

List of ids from adding the texts into the vectorstore.

classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any) langchain.vectorstores.elastic_vector_search.ElasticVectorSearch[source]#

Construct ElasticVectorSearch wrapper from raw documents.

This is a user-friendly interface that:
  1. Embeds documents.

  2. Creates a new index for the embeddings in the Elasticsearch instance.

  3. Adds the documents to the newly created Elasticsearch index.

This is intended to be a quick way to get started.

Example

from langchain import ElasticVectorSearch
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
elastic_vector_search = ElasticVectorSearch.from_texts(
    texts,
    embeddings,
    elasticsearch_url="http://localhost:9200"
)

Return docs most similar to query.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the query.

class langchain.vectorstores.FAISS(embedding_function: Callable, index: Any, docstore: langchain.docstore.base.Docstore, index_to_docstore_id: Dict[int, str])[source]#

Wrapper around FAISS vector database.

To use, you should have the faiss python package installed.

Example

from langchain import FAISS
faiss = FAISS(embedding_function, index, docstore, index_to_docstore_id)
add_embeddings(text_embeddings: Iterable[Tuple[str, List[float]]], metadatas: Optional[List[dict]] = None, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • text_embeddings – Iterable pairs of string and embedding to add to the vectorstore.

  • metadatas – Optional list of metadatas associated with the texts.

Returns

List of ids from adding the texts into the vectorstore.

add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts – Iterable of strings to add to the vectorstore.

  • metadatas – Optional list of metadatas associated with the texts.

Returns

List of ids from adding the texts into the vectorstore.

classmethod from_embeddings(text_embeddings: List[Tuple[str, List[float]]], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any) langchain.vectorstores.faiss.FAISS[source]#

Construct FAISS wrapper from raw documents.

This is a user friendly interface that:
  1. Embeds documents.

  2. Creates an in memory docstore

  3. Initializes the FAISS database

This is intended to be a quick way to get started.

Example

from langchain import FAISS
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
faiss = FAISS.from_texts(texts, embeddings)
classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any) langchain.vectorstores.faiss.FAISS[source]#

Construct FAISS wrapper from raw documents.

This is a user friendly interface that:
  1. Embeds documents.

  2. Creates an in memory docstore

  3. Initializes the FAISS database

This is intended to be a quick way to get started.

Example

from langchain import FAISS
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
faiss = FAISS.from_texts(texts, embeddings)
classmethod load_local(folder_path: str, embeddings: langchain.embeddings.base.Embeddings) langchain.vectorstores.faiss.FAISS[source]#

Load FAISS index, docstore, and index_to_docstore_id to disk.

Parameters
  • folder_path – folder path to load index, docstore, and index_to_docstore_id from.

  • embeddings – Embeddings to use when generating queries

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • fetch_k – Number of Documents to fetch to pass to MMR algorithm.

Returns

List of Documents selected by maximal marginal relevance.

max_marginal_relevance_search_by_vector(embedding: List[float], k: int = 4, fetch_k: int = 20) List[langchain.schema.Document][source]#

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

Parameters
  • embedding – Embedding to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • fetch_k – Number of Documents to fetch to pass to MMR algorithm.

Returns

List of Documents selected by maximal marginal relevance.

merge_from(target: langchain.vectorstores.faiss.FAISS) None[source]#

Merge another FAISS object with the current one.

Add the target FAISS to the current one.

Parameters

target – FAISS object you wish to merge into the current one

Returns

None.

save_local(folder_path: str) None[source]#

Save FAISS index, docstore, and index_to_docstore_id to disk.

Parameters

folder_path – folder path to save index, docstore, and index_to_docstore_id to.

Return docs most similar to query.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the query.

similarity_search_by_vector(embedding: List[float], k: int = 4, **kwargs: Any) List[langchain.schema.Document][source]#

Return docs most similar to embedding vector.

Parameters
  • embedding – Embedding to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the embedding.

similarity_search_with_score(query: str, k: int = 4) List[Tuple[langchain.schema.Document, float]][source]#

Return docs most similar to query.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the query and score for each

similarity_search_with_score_by_vector(embedding: List[float], k: int = 4) List[Tuple[langchain.schema.Document, float]][source]#

Return docs most similar to query.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the query and score for each

class langchain.vectorstores.Milvus(embedding_function: langchain.embeddings.base.Embeddings, connection_args: dict, collection_name: str, text_field: str)[source]#

Wrapper around the Milvus vector database.

add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, partition_name: Optional[str] = None, timeout: Optional[int] = None, **kwargs: Any) List[str][source]#

Insert text data into Milvus.

When using add_texts() it is assumed that a collecton has already been made and indexed. If metadata is included, it is assumed that it is ordered correctly to match the schema provided to the Collection and that the embedding vector is the first schema field.

Parameters
  • texts (Iterable[str]) – The text being embedded and inserted.

  • metadatas (Optional[List[dict]], optional) – The metadata that corresponds to each insert. Defaults to None.

  • partition_name (str, optional) – The partition of the collection to insert data into. Defaults to None.

  • timeout – specified timeout.

Returns

The resulting keys for each inserted element.

Return type

List[str]

classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any) langchain.vectorstores.milvus.Milvus[source]#

Create a Milvus collection, indexes it with HNSW, and insert data.

Parameters
  • texts (List[str]) – Text to insert.

  • embedding (Embeddings) – Embedding function to use.

  • metadatas (Optional[List[dict]], optional) – Dict metatadata. Defaults to None.

Returns

The Milvus vector store.

Return type

VectorStore

Perform a search and return results that are reordered by MMR.

Parameters
  • query (str) – The text being searched.

  • k (int, optional) – How many results to give. Defaults to 4.

  • fetch_k (int, optional) – Total results to select k from. Defaults to 20.

  • param (dict, optional) – The search params for the specified index. Defaults to None.

  • expr (str, optional) – Filtering expression. Defaults to None.

  • partition_names (List[str], optional) – What partitions to search. Defaults to None.

  • round_decimal (int, optional) – Round the resulting distance. Defaults to -1.

  • timeout (int, optional) – Amount to wait before timeout error. Defaults to None.

Returns

Document results for search.

Return type

List[Document]

Perform a similarity search against the query string.

Parameters
  • query (str) – The text to search.

  • k (int, optional) – How many results to return. Defaults to 4.

  • param (dict, optional) – The search params for the index type. Defaults to None.

  • expr (str, optional) – Filtering expression. Defaults to None.

  • partition_names (List[str], optional) – What partitions to search. Defaults to None.

  • round_decimal (int, optional) – What decimal point to round to. Defaults to -1.

  • timeout (int, optional) – How long to wait before timeout error. Defaults to None.

Returns

Document results for search.

Return type

List[Document]

similarity_search_with_score(query: str, k: int = 4, param: Optional[dict] = None, expr: Optional[str] = None, partition_names: Optional[List[str]] = None, round_decimal: int = - 1, timeout: Optional[int] = None, **kwargs: Any) List[Tuple[langchain.schema.Document, float]][source]#

Perform a search on a query string and return results.

Parameters
  • query (str) – The text being searched.

  • k (int, optional) – The amount of results ot return. Defaults to 4.

  • param (dict, optional) – The search params for the specified index. Defaults to None.

  • expr (str, optional) – Filtering expression. Defaults to None.

  • partition_names (List[str], optional) – Partitions to search through. Defaults to None.

  • round_decimal (int, optional) – Round the resulting distance. Defaults to -1.

  • timeout (int, optional) – Amount to wait before timeout error. Defaults to None.

  • kwargs – Collection.search() keyword arguments.

Returns

search_embedding,

(Document, distance, primary_field) results.

Return type

List[float], List[Tuple[Document, any, any]]

class langchain.vectorstores.OpenSearchVectorSearch(opensearch_url: str, index_name: str, embedding_function: langchain.embeddings.base.Embeddings, **kwargs: Any)[source]#

Wrapper around OpenSearch as a vector database.

Example

from langchain import OpenSearchVectorSearch
opensearch_vector_search = OpenSearchVectorSearch(
    "http://localhost:9200",
    "embeddings",
    embedding_function
)
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, bulk_size: int = 500, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts – Iterable of strings to add to the vectorstore.

  • metadatas – Optional list of metadatas associated with the texts.

  • bulk_size – Bulk API request count; Default: 500

Returns

List of ids from adding the texts into the vectorstore.

classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, bulk_size: int = 500, **kwargs: Any) langchain.vectorstores.opensearch_vector_search.OpenSearchVectorSearch[source]#

Construct OpenSearchVectorSearch wrapper from raw documents.

Example

from langchain import OpenSearchVectorSearch
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
opensearch_vector_search = OpenSearchVectorSearch.from_texts(
    texts,
    embeddings,
    opensearch_url="http://localhost:9200"
)

OpenSearch by default supports Approximate Search powered by nmslib, faiss and lucene engines recommended for large datasets. Also supports brute force search through Script Scoring and Painless Scripting.

Optional Keyword Args for Approximate Search:

engine: β€œnmslib”, β€œfaiss”, β€œhnsw”; default: β€œnmslib”

space_type: β€œl2”, β€œl1”, β€œcosinesimil”, β€œlinf”, β€œinnerproduct”; default: β€œl2”

ef_search: Size of the dynamic list used during k-NN searches. Higher values lead to more accurate but slower searches; default: 512

ef_construction: Size of the dynamic list used during k-NN graph creation. Higher values lead to more accurate graph but slower indexing speed; default: 512

m: Number of bidirectional links created for each new element. Large impact on memory consumption. Between 2 and 100; default: 16

Keyword Args for Script Scoring or Painless Scripting:

is_appx_search: False

Return docs most similar to query.

By default supports Approximate Search. Also supports Script Scoring and Painless Scripting.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the query.

Optional Args for Approximate Search:

search_type: β€œapproximate_search”; default: β€œapproximate_search” size: number of results the query actually returns; default: 4

Optional Args for Script Scoring Search:

search_type: β€œscript_scoring”; default: β€œapproximate_search”

space_type: β€œl2”, β€œl1”, β€œlinf”, β€œcosinesimil”, β€œinnerproduct”, β€œhammingbit”; default: β€œl2”

pre_filter: script_score query to pre-filter documents before identifying nearest neighbors; default: {β€œmatch_all”: {}}

Optional Args for Painless Scripting Search:

search_type: β€œpainless_scripting”; default: β€œapproximate_search” space_type: β€œl2Squared”, β€œl1Norm”, β€œcosineSimilarity”; default: β€œl2Squared”

pre_filter: script_score query to pre-filter documents before identifying nearest neighbors; default: {β€œmatch_all”: {}}

class langchain.vectorstores.Pinecone(index: Any, embedding_function: Callable, text_key: str, namespace: Optional[str] = None)[source]#

Wrapper around Pinecone vector database.

To use, you should have the pinecone-client python package installed.

Example

from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

pinecone.init(api_key="***", environment="us-west1-gcp")
index = pinecone.Index("langchain-demo")
embeddings = OpenAIEmbeddings()
vectorstore = Pinecone(index, embeddings.embed_query, "text")
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, namespace: Optional[str] = None, batch_size: int = 32, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts – Iterable of strings to add to the vectorstore.

  • metadatas – Optional list of metadatas associated with the texts.

  • ids – Optional list of ids to associate with the texts.

  • namespace – Optional pinecone namespace to add the texts to.

Returns

List of ids from adding the texts into the vectorstore.

classmethod from_existing_index(index_name: str, embedding: langchain.embeddings.base.Embeddings, text_key: str = 'text', namespace: Optional[str] = None) langchain.vectorstores.pinecone.Pinecone[source]#

Load pinecone vectorstore from index name.

classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, batch_size: int = 32, text_key: str = 'text', index_name: Optional[str] = None, namespace: Optional[str] = None, **kwargs: Any) langchain.vectorstores.pinecone.Pinecone[source]#

Construct Pinecone wrapper from raw documents.

This is a user friendly interface that:
  1. Embeds documents.

  2. Adds the documents to a provided Pinecone index

This is intended to be a quick way to get started.

Example

from langchain import Pinecone
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
pinecone = Pinecone.from_texts(
    texts,
    embeddings,
    index_name="langchain-demo"
)

Return pinecone documents most similar to query.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • filter – Dictionary of argument(s) to filter on metadata

  • namespace – Namespace to search in. Default will search in β€˜β€™ namespace.

Returns

List of Documents most similar to the query and score for each

similarity_search_with_score(query: str, k: int = 5, filter: Optional[dict] = None, namespace: Optional[str] = None) List[Tuple[langchain.schema.Document, float]][source]#

Return pinecone documents most similar to query, along with scores.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • filter – Dictionary of argument(s) to filter on metadata

  • namespace – Namespace to search in. Default will search in β€˜β€™ namespace.

Returns

List of Documents most similar to the query and score for each

class langchain.vectorstores.Qdrant(client: Any, collection_name: str, embedding_function: Callable, content_payload_key: str = 'page_content', metadata_payload_key: str = 'metadata')[source]#

Wrapper around Qdrant vector database.

To use you should have the qdrant-client package installed.

Example

from langchain import Qdrant

client = QdrantClient()
collection_name = "MyCollection"
qdrant = Qdrant(client, collection_name, embedding_function)
CONTENT_KEY = 'page_content'#
METADATA_KEY = 'metadata'#
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts – Iterable of strings to add to the vectorstore.

  • metadatas – Optional list of metadatas associated with the texts.

Returns

List of ids from adding the texts into the vectorstore.

classmethod from_documents(documents: List[langchain.schema.Document], embedding: langchain.embeddings.base.Embeddings, url: Optional[str] = None, port: Optional[int] = 6333, grpc_port: int = 6334, prefer_grpc: bool = False, https: Optional[bool] = None, api_key: Optional[str] = None, prefix: Optional[str] = None, timeout: Optional[float] = None, host: Optional[str] = None, collection_name: Optional[str] = None, distance_func: str = 'Cosine', content_payload_key: str = 'page_content', metadata_payload_key: str = 'metadata', **kwargs: Any) langchain.vectorstores.qdrant.Qdrant[source]#

Return VectorStore initialized from documents and embeddings.

classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, url: Optional[str] = None, port: Optional[int] = 6333, grpc_port: int = 6334, prefer_grpc: bool = False, https: Optional[bool] = None, api_key: Optional[str] = None, prefix: Optional[str] = None, timeout: Optional[float] = None, host: Optional[str] = None, collection_name: Optional[str] = None, distance_func: str = 'Cosine', content_payload_key: str = 'page_content', metadata_payload_key: str = 'metadata', **kwargs: Any) langchain.vectorstores.qdrant.Qdrant[source]#

Construct Qdrant wrapper from raw documents.

Parameters
  • texts – A list of texts to be indexed in Qdrant.

  • embedding – A subclass of Embeddings, responsible for text vectorization.

  • metadatas – An optional list of metadata. If provided it has to be of the same length as a list of texts.

  • url – either host or str of β€œOptional[scheme], host, Optional[port], Optional[prefix]”. Default: None

  • port – Port of the REST API interface. Default: 6333

  • grpc_port – Port of the gRPC interface. Default: 6334

  • prefer_grpc – If true - use gPRC interface whenever possible in custom methods.

  • https – If true - use HTTPS(SSL) protocol. Default: None

  • api_key – API key for authentication in Qdrant Cloud. Default: None

  • prefix –

    If not None - add prefix to the REST URL path. Example: service/v1 will result in

    http://localhost:6333/service/v1/{qdrant-endpoint} for REST API.

    Default: None

  • timeout – Timeout for REST and gRPC API requests. Default: 5.0 seconds for REST and unlimited for gRPC

  • host – Host name of Qdrant service. If url and host are None, set to β€˜localhost’. Default: None

  • collection_name – Name of the Qdrant collection to be used. If not provided, will be created randomly.

  • distance_func – Distance function. One of the: β€œCosine” / β€œEuclid” / β€œDot”.

  • content_payload_key – A payload key used to store the content of the document.

  • metadata_payload_key – A payload key used to store the metadata of the document.

  • **kwargs – Additional arguments passed directly into REST client initialization

This is a user friendly interface that:
  1. Embeds documents.

  2. Creates an in memory docstore

  3. Initializes the Qdrant database

This is intended to be a quick way to get started.

Example

from langchain import Qdrant
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
qdrant = Qdrant.from_texts(texts, embeddings, "localhost")

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • fetch_k – Number of Documents to fetch to pass to MMR algorithm.

Returns

List of Documents selected by maximal marginal relevance.

Return docs most similar to query.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • filter – Filter by metadata. Defaults to None.

Returns

List of Documents most similar to the query.

similarity_search_with_score(query: str, k: int = 4, filter: Optional[Dict[str, Union[str, int, bool]]] = None) List[Tuple[langchain.schema.Document, float]][source]#

Return docs most similar to query.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • filter – Filter by metadata. Defaults to None.

Returns

List of Documents most similar to the query and score for each

class langchain.vectorstores.VectorStore[source]#

Interface for vector stores.

add_documents(documents: List[langchain.schema.Document], **kwargs: Any) List[str][source]#

Run more documents through the embeddings and add to the vectorstore.

Parameters

(List[Document] (documents) – Documents to add to the vectorstore.

Returns

List of IDs of the added texts.

Return type

List[str]

abstract add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any) List[str][source]#

Run more texts through the embeddings and add to the vectorstore.

Parameters
  • texts – Iterable of strings to add to the vectorstore.

  • metadatas – Optional list of metadatas associated with the texts.

  • kwargs – vectorstore specific parameters

Returns

List of ids from adding the texts into the vectorstore.

as_retriever(**kwargs: Any) langchain.schema.BaseRetriever[source]#
classmethod from_documents(documents: List[langchain.schema.Document], embedding: langchain.embeddings.base.Embeddings, **kwargs: Any) langchain.vectorstores.base.VectorStore[source]#

Return VectorStore initialized from documents and embeddings.

abstract classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any) langchain.vectorstores.base.VectorStore[source]#

Return VectorStore initialized from texts and embeddings.

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

Parameters
  • query – Text to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • fetch_k – Number of Documents to fetch to pass to MMR algorithm.

Returns

List of Documents selected by maximal marginal relevance.

max_marginal_relevance_search_by_vector(embedding: List[float], k: int = 4, fetch_k: int = 20) List[langchain.schema.Document][source]#

Return docs selected using the maximal marginal relevance.

Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents.

Parameters
  • embedding – Embedding to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

  • fetch_k – Number of Documents to fetch to pass to MMR algorithm.

Returns

List of Documents selected by maximal marginal relevance.

Return docs most similar to query.

similarity_search_by_vector(embedding: List[float], k: int = 4, **kwargs: Any) List[langchain.schema.Document][source]#

Return docs most similar to embedding vector.

Parameters
  • embedding – Embedding to look up documents similar to.

  • k – Number of Documents to return. Defaults to 4.

Returns

List of Documents most similar to the query vector.

class langchain.vectorstores.Weaviate(client: Any, index_name: str, text_key: str, attributes: Optional[List[str]] = None)[source]#

Wrapper around Weaviate vector database.

To use, you should have the weaviate-client python package installed.

Example

import weaviate
from langchain.vectorstores import Weaviate
client = weaviate.Client(url=os.environ["WEAVIATE_URL"], ...)
weaviate = Weaviate(client, index_name, text_key)
add_texts(texts: Iterable[str], metadatas: Optional[List[dict]] = None, **kwargs: Any) List[str][source]#

Upload texts with metadata (properties) to Weaviate.

classmethod from_texts(texts: List[str], embedding: langchain.embeddings.base.Embeddings, metadatas: Optional[List[dict]] = None, **kwargs: Any) langchain.vectorstores.base.VectorStore[source]#

Not implemented for Weaviate yet.

Look up similar documents in weaviate.