VertexAIEmbeddings#

class langchain_google_vertexai.embeddings.VertexAIEmbeddings[source]#

Bases: _VertexAICommon, Embeddings

Google Cloud VertexAI embedding models.

Initialize the sentence_transformer.

param additional_headers: Dict[str, str] | None = None#: A key-value dictionary representing additional headers for the model call

param api_endpoint: str | None = None (alias 'base_url')#: Desired API endpoint, e.g., us-central1-aiplatform.googleapis.com

param api_transport: str | None = None#: The desired API transport method, can be either ‘grpc’ or ‘rest’. Uses the default parameter in vertexai.init if defined.

param audio_timestamp: bool | None = None#: Enable timestamp understanding of audio-only files

param client_cert_source: Callable[[], Tuple[bytes, bytes]] | None = None#: A callback which returns client certificate bytes and private key bytes both

param credentials: Any = None#: The default custom credentials (google.auth.credentials.Credentials) to use

param endpoint_version: Literal['v1', 'v1beta1'] = 'v1beta1'#

Whether to use v1 or v1beta1 endpoint.

v1 is more performant, but v1beta1 might have some new features.

param frequency_penalty: float | None = None#: Positive values penalize tokens that repeatedly appear in the generated text,

param full_model_name: str | None = None#: The full name of the model’s endpoint.

param include_thoughts: bool | None = None#: Indicates whether to include thoughts in the response.

param location: str = 'us-central1'#: The default location to use when making API calls.

param max_output_tokens: int | None = None (alias 'max_tokens')#: Token limit determines the maximum amount of text output from one prompt.

param max_retries: int = 6#: The maximum number of retries to make when generating.

param model_name: str | None = None (alias 'model')#: Underlying model name.

param n: int = 1#: How many completions to generate for each prompt.

param presence_penalty: float | None = None#: Positive values penalize tokens that already appear in the generated text,

param project: str | None = None#: The default GCP project to use when making Vertex API calls.

param request_parallelism: int = 5#: The amount of parallelism allowed for requests issued to VertexAI models.

param response_modalities: List[Modality] | None = None#: A list of modalities of the response

param safety_settings: SafetySettingsType | None = None#

The default safety settings to use for all generations.

For example:

from langchain_google_vertexai import HarmBlockThreshold, HarmCategory

safety_settings = {
HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE, HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH, HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE, HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,

}

param seed: int | None = None#: Random seed for the generation.

param stop: List[str] | None = None (alias 'stop_sequences')#: Optional list of stop words to use when generating.

param streaming: bool = False#: Whether to stream the results or not.

param temperature: float | None = None#: Sampling temperature, it controls the degree of randomness in token selection.

param thinking_budget: int | None = None#: Indicates the thinking budget in tokens.

param top_k: int | None = None#: How the model selects tokens for output, the next token is selected from

param top_p: float | None = None#: Tokens are selected from most probable to least until the sum of their

param tuned_model_name: str | None = None#: The name of a tuned model.

async aembed_documents( texts: list[str], ) → list[list[float]]#

Asynchronous Embed search docs.

Parameters:: texts (list[str]) – List of text to embed.
Returns:: List of embeddings.
Return type:: list[list[float]]

async aembed_query(text: str) → list[float]#

Asynchronous Embed query text.

Parameters:: text (str) – Text to embed.
Returns:: Embedding.
Return type:: list[float]

embed( texts: List[str], batch_size: int = 0, embeddings_task_type: Literal['RETRIEVAL_QUERY', 'RETRIEVAL_DOCUMENT', 'SEMANTIC_SIMILARITY', 'CLASSIFICATION', 'CLUSTERING', 'QUESTION_ANSWERING', 'FACT_VERIFICATION', 'CODE_RETRIEVAL_QUERY'] | None = None, dimensions: int | None = None, ) → List[List[float]][source]#

Embed a list of strings.

Parameters:

texts (List[str]) – List[str] The list of strings to embed.
batch_size (int) – [int] The batch size of embeddings to send to the model. If zero, then the largest batch size will be detected dynamically at the first request, starting from 250, down to 5.
embeddings_task_type (Literal['RETRIEVAL_QUERY', 'RETRIEVAL_DOCUMENT', 'SEMANTIC_SIMILARITY', 'CLASSIFICATION', 'CLUSTERING', 'QUESTION_ANSWERING', 'FACT_VERIFICATION', 'CODE_RETRIEVAL_QUERY'] | None) –
[str] optional embeddings task type, one of the following

RETRIEVAL_QUERY - Text is a query
in a search/retrieval setting.

RETRIEVAL_DOCUMENT - Text is a document
in a search/retrieval setting.

SEMANTIC_SIMILARITY - Embeddings will be used
for Semantic Textual Similarity (STS).

CLASSIFICATION - Embeddings will be used for classification. CLUSTERING - Embeddings will be used for clustering. CODE_RETRIEVAL_QUERY - Embeddings will be used for

code retrieval for Java and Python.

The following are only supported on preview models: QUESTION_ANSWERING FACT_VERIFICATION
dimensions (int | None) – [int] optional. Output embeddings dimensions. Only supported on preview models.

Returns:

List of embeddings, one for each text.

Return type:

List[List[float]]

embed_documents( texts: List[str], batch_size: int = 0, *, embeddings_task_type: Literal['RETRIEVAL_QUERY', 'RETRIEVAL_DOCUMENT', 'SEMANTIC_SIMILARITY', 'CLASSIFICATION', 'CLUSTERING', 'QUESTION_ANSWERING', 'FACT_VERIFICATION', 'CODE_RETRIEVAL_QUERY'] = 'RETRIEVAL_DOCUMENT', ) → List[List[float]][source]#

Embed a list of documents.

Parameters:

texts (List[str]) – List[str] The list of texts to embed.
batch_size (int) – [int] The batch size of embeddings to send to the model. If zero, then the largest batch size will be detected dynamically at the first request, starting from 250, down to 5.
embeddings_task_type (Literal['RETRIEVAL_QUERY', 'RETRIEVAL_DOCUMENT', 'SEMANTIC_SIMILARITY', 'CLASSIFICATION', 'CLUSTERING', 'QUESTION_ANSWERING', 'FACT_VERIFICATION', 'CODE_RETRIEVAL_QUERY'])

Returns:

List of embeddings, one for each text.

Return type:

List[List[float]]

embed_image( image_path: str, contextual_text: str | None = None, dimensions: int | None = None, ) → List[float][source]#

Deprecated since version 2.0.1: Use embed_images() instead. It will be removed in langchain-google-vertexai==3.0.0.

Embed an image.

Parameters:

image_path (str) – Path to image (Google Cloud Storage or web) to generate
for. (embeddings)
contextual_text (str | None) – Text to generate embeddings for.
dimensions (int | None)

Returns:

Embedding for the image.

Return type:

List[float]

embed_images( uris: List[str], contextual_text: str | None = None, dimensions: int | None = None, ) → List[List[float]][source]#

Embed a list of images.

Parameters:

uris (List[str]) – Paths to image (local, Google Cloud Storage or web) to generate
for. (embeddings)
contextual_text (str | None) – Text to generate embeddings for.
dimensions (int | None)

Returns:

Embedding for the image.

Return type:

List[List[float]]

embed_query( text: str, *, embeddings_task_type: Literal['RETRIEVAL_QUERY', 'RETRIEVAL_DOCUMENT', 'SEMANTIC_SIMILARITY', 'CLASSIFICATION', 'CLUSTERING', 'QUESTION_ANSWERING', 'FACT_VERIFICATION', 'CODE_RETRIEVAL_QUERY'] = 'RETRIEVAL_QUERY', ) → List[float][source]#

Embed a text.

Parameters:

text (str) – The text to embed.
embeddings_task_type (Literal['RETRIEVAL_QUERY', 'RETRIEVAL_DOCUMENT', 'SEMANTIC_SIMILARITY', 'CLASSIFICATION', 'CLUSTERING', 'QUESTION_ANSWERING', 'FACT_VERIFICATION', 'CODE_RETRIEVAL_QUERY'])

Returns:

Embedding for the text.

Return type:

List[float]

property async_prediction_client: PredictionServiceAsyncClient | PredictionServiceAsyncClient#: Returns PredictionServiceClient.

property max_tokens: int | None#

property model_type: str#

property model_version: GoogleEmbeddingModelVersion#

property prediction_client: PredictionServiceClient | PredictionServiceClient#: Returns PredictionServiceClient.

task_executor: ClassVar[Executor | None] = FieldInfo(annotation=NoneType, required=False, default=None, exclude=True)#