Source code for langchain_community.retrievers.tavily_search_api
import os
from enum import Enum
from typing import Any, Dict, List, Optional
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
[docs]class SearchDepth(Enum):
"""Search depth as enumerator."""
BASIC = "basic"
ADVANCED = "advanced"
[docs]class TavilySearchAPIRetriever(BaseRetriever):
"""Tavily Search API retriever.
Setup:
Install ``langchain-community`` and set environment variable ``TAVILY_API_KEY``.
.. code-block:: bash
pip install -U langchain-community
export TAVILY_API_KEY="your-api-key"
Key init args:
k: int
Number of results to include.
include_generated_answer: bool
Include a generated answer with results
include_raw_content: bool
Include raw content with results.
include_images: bool
Return images in addition to text.
Instantiate:
.. code-block:: python
from langchain_community.retrievers import TavilySearchAPIRetriever
retriever = TavilySearchAPIRetriever(k=3)
Usage:
.. code-block:: python
query = "what year was breath of the wild released?"
retriever.invoke(query)
Use within a chain:
.. code-block:: python
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
prompt = ChatPromptTemplate.from_template(
\"\"\"Answer the question based only on the context provided.
Context: {context}
Question: {question}\"\"\"
)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
chain.invoke("how many units did bretch of the wild sell in 2020")
""" # noqa: E501
k: int = 10
include_generated_answer: bool = False
include_raw_content: bool = False
include_images: bool = False
search_depth: SearchDepth = SearchDepth.BASIC
include_domains: Optional[List[str]] = None
exclude_domains: Optional[List[str]] = None
kwargs: Optional[Dict[str, Any]] = {}
api_key: Optional[str] = None
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
try:
try:
from tavily import TavilyClient
except ImportError:
# Older of tavily used Client
from tavily import Client as TavilyClient
except ImportError:
raise ImportError(
"Tavily python package not found. "
"Please install it with `pip install tavily-python`."
)
tavily = TavilyClient(api_key=self.api_key or os.environ["TAVILY_API_KEY"])
max_results = self.k if not self.include_generated_answer else self.k - 1
response = tavily.search(
query=query,
max_results=max_results,
search_depth=self.search_depth.value,
include_answer=self.include_generated_answer,
include_domains=self.include_domains,
exclude_domains=self.exclude_domains,
include_raw_content=self.include_raw_content,
include_images=self.include_images,
**self.kwargs,
)
docs = [
Document(
page_content=result.get("content", "")
if not self.include_raw_content
else result.get("raw_content", ""),
metadata={
"title": result.get("title", ""),
"source": result.get("url", ""),
**{
k: v
for k, v in result.items()
if k not in ("content", "title", "url", "raw_content")
},
"images": response.get("images"),
},
)
for result in response.get("results")
]
if self.include_generated_answer:
docs = [
Document(
page_content=response.get("answer", ""),
metadata={
"title": "Suggested Answer",
"source": "https://tavily.com/",
},
),
*docs,
]
return docs