from datetime import datetime
from io import IOBase
from typing import List, Optional, Union
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain_core.language_models import BaseLanguageModel
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_core.prompts import (
ChatPromptTemplate,
MessagesPlaceholder,
)
from langchain_core.prompts.chat import (
BaseMessagePromptTemplate,
HumanMessagePromptTemplate,
)
from langchain_core.tools import BaseTool
from langchain_cohere.chat_models import ChatCohere
from langchain_cohere.csv_agent.prompts import (
CSV_PREAMBLE,
)
# lets define a set of tools for the Agent
from langchain_cohere.csv_agent.tools import (
get_file_peek_tool,
get_file_read_tool,
get_python_tool,
)
[docs]
def create_prompt(
system_message: Optional[BaseMessage] = SystemMessage(
content="You are a helpful AI assistant."
),
extra_prompt_messages: Optional[List[BaseMessagePromptTemplate]] = None,
) -> ChatPromptTemplate:
"""Create prompt for this agent.
Args:
system_message: Message to use as the system message that will be the
first in the prompt.
extra_prompt_messages: Prompt messages that will be placed between the
system message and the new human input.
Returns:
A prompt template to pass into this agent.
"""
_prompts = extra_prompt_messages or []
messages: List[Union[BaseMessagePromptTemplate, BaseMessage]]
if system_message:
messages = [system_message]
else:
messages = []
messages.extend(
[
*_prompts,
HumanMessagePromptTemplate.from_template("{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad"),
]
)
return ChatPromptTemplate(messages=messages)
def _get_csv_head_str(path: str, number_of_head_rows: int) -> str:
with open(path, "r") as file:
lines = []
for _ in range(number_of_head_rows):
lines.append(file.readline().strip("\n"))
# validate that the head contents are well formatted csv
return " ".join(lines)
def _get_prompt(
path: Union[str, List[str]], number_of_head_rows: int
) -> ChatPromptTemplate:
if isinstance(path, str):
lines = _get_csv_head_str(path, number_of_head_rows)
prompt_message = f"The user uploaded the following attachments:\nFilename: {path}\nWord Count: {count_words_in_file(path)}\nPreview: {lines}" # noqa: E501
elif isinstance(path, list):
prompt_messages = []
for file_path in path:
lines = _get_csv_head_str(file_path, number_of_head_rows)
prompt_messages.append(
f"The user uploaded the following attachments:\nFilename: {file_path}\nWord Count: {count_words_in_file(file_path)}\nPreview: {lines}" # noqa: E501
)
prompt_message = " ".join(prompt_messages)
prompt = create_prompt(system_message=HumanMessage(prompt_message))
return prompt
[docs]
def count_words_in_file(file_path: str) -> int:
try:
with open(file_path, "r") as file:
content = file.readlines()
words = [len(sentence.split()) for sentence in content]
return sum(words)
except FileNotFoundError:
print("File not found.")
return 0
except Exception as e:
print("An error occurred:", str(e))
return 0
[docs]
def create_csv_agent(
llm: BaseLanguageModel,
path: Union[str, List[str]],
extra_tools: List[BaseTool] = [],
pandas_kwargs: Optional[dict] = None,
prompt: Optional[ChatPromptTemplate] = None,
number_of_head_rows: int = 5,
verbose: bool = True,
return_intermediate_steps: bool = True,
temp_path_dir: Optional[str] = None,
temp_path_prefix: Optional[str] = "langchain",
temp_path_suffix: Optional[str] = "csv_agent",
) -> AgentExecutor:
"""Create csv agent with the specified language model.
Args:
llm: Language model to use for the agent.
path: A string path, or a list of string paths
that can be read in as pandas DataFrames with pd.read_csv().
number_of_head_rows: Number of rows to display in the prompt for sample data
include_df_in_prompt: Display the DataFrame sample values in the prompt.
pandas_kwargs: Named arguments to pass to pd.read_csv().
prefix: Prompt prefix string.
suffix: Prompt suffix string.
prompt: Prompt to use for the agent. This takes precedence over the other prompt arguments, such as suffix and prefix.
temp_path_dir: Temporary directory to store the csv files in for the python repl.
delete_temp_path: Whether to delete the temporary directory after the agent is done. This only works if temp_path_dir is not provided.
Returns:
An AgentExecutor with the specified agent_type agent and access to
a PythonREPL and any user-provided extra_tools.
Example:
.. code-block:: python
from langchain_cohere import ChatCohere, create_csv_agent
llm = ChatCohere(model="command-r-plus", temperature=0)
agent_executor = create_csv_agent(
llm,
"titanic.csv"
)
resp = agent_executor.invoke({"input":"How many people were on the titanic?"})
print(resp.get("output"))
""" # noqa: E501
try:
import pandas as pd
except ImportError:
raise ImportError(
"pandas package not found, please install with `pip install pandas`."
)
_kwargs = pandas_kwargs or {}
if isinstance(path, (str)):
df = pd.read_csv(path, **_kwargs)
elif isinstance(path, list):
df = []
for item in path:
if not isinstance(item, (str, IOBase)):
raise ValueError(f"Expected str or file-like object, got {type(path)}")
df.append(pd.read_csv(item, **_kwargs))
else:
raise ValueError(f"Expected str, list, or file-like object, got {type(path)}")
if not prompt:
prompt = _get_prompt(path, number_of_head_rows)
final_tools = [
get_file_read_tool(),
get_file_peek_tool(),
get_python_tool(),
] + extra_tools
if "preamble" in llm.__dict__ and not llm.__dict__.get("preamble"):
llm = ChatCohere(**llm.__dict__)
llm.preamble = CSV_PREAMBLE.format(
current_date=datetime.now().strftime("%A, %B %d, %Y %H:%M:%S")
)
agent = create_tool_calling_agent(llm=llm, tools=final_tools, prompt=prompt)
agent_executor = AgentExecutor(
agent=agent,
tools=final_tools,
verbose=verbose,
return_intermediate_steps=return_intermediate_steps,
)
return agent_executor