Document Compressors#

pydantic model langchain.retrievers.document_compressors.CohereRerank[source]#
field client: Client [Required]#
field model: str = 'rerank-english-v2.0'#
field top_n: int = 3#
async acompress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Compress retrieved documents given the query context.

compress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Compress retrieved documents given the query context.

pydantic model langchain.retrievers.document_compressors.DocumentCompressorPipeline[source]#

Document compressor that uses a pipeline of transformers.

field transformers: List[Union[langchain.schema.BaseDocumentTransformer, langchain.retrievers.document_compressors.base.BaseDocumentCompressor]] [Required]#

List of document filters that are chained together and run in sequence.

async acompress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Compress retrieved documents given the query context.

compress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Transform a list of documents.

pydantic model langchain.retrievers.document_compressors.EmbeddingsFilter[source]#
field embeddings: langchain.embeddings.base.Embeddings [Required]#

Embeddings to use for embedding document contents and queries.

field k: Optional[int] = 20#

The number of relevant documents to return. Can be set to None, in which case similarity_threshold must be specified. Defaults to 20.

field similarity_fn: Callable = <function cosine_similarity>#

Similarity function for comparing documents. Function expected to take as input two matrices (List[List[float]]) and return a matrix of scores where higher values indicate greater similarity.

field similarity_threshold: Optional[float] = None#

Threshold for determining when two documents are similar enough to be considered redundant. Defaults to None, must be specified if k is set to None.

async acompress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Filter down documents.

compress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Filter documents based on similarity of their embeddings to the query.

pydantic model langchain.retrievers.document_compressors.LLMChainExtractor[source]#
field get_input: Callable[[str, langchain.schema.Document], dict] = <function default_get_input>#

Callable for constructing the chain input from the query and a Document.

field llm_chain: langchain.chains.llm.LLMChain [Required]#

LLM wrapper to use for compressing documents.

async acompress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Compress page content of raw documents asynchronously.

compress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Compress page content of raw documents.

classmethod from_llm(llm: langchain.base_language.BaseLanguageModel, prompt: Optional[langchain.prompts.prompt.PromptTemplate] = None, get_input: Optional[Callable[[str, langchain.schema.Document], str]] = None, llm_chain_kwargs: Optional[dict] = None) langchain.retrievers.document_compressors.chain_extract.LLMChainExtractor[source]#

Initialize from LLM.

pydantic model langchain.retrievers.document_compressors.LLMChainFilter[source]#

Filter that drops documents that aren’t relevant to the query.

field get_input: Callable[[str, langchain.schema.Document], dict] = <function default_get_input>#

Callable for constructing the chain input from the query and a Document.

field llm_chain: langchain.chains.llm.LLMChain [Required]#

LLM wrapper to use for filtering documents. The chain prompt is expected to have a BooleanOutputParser.

async acompress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Filter down documents.

compress_documents(documents: Sequence[langchain.schema.Document], query: str) Sequence[langchain.schema.Document][source]#

Filter down documents based on their relevance to the query.

classmethod from_llm(llm: langchain.base_language.BaseLanguageModel, prompt: Optional[langchain.prompts.base.BasePromptTemplate] = None, **kwargs: Any) langchain.retrievers.document_compressors.chain_filter.LLMChainFilter[source]#