Source code for langchain.memory.token_buffer
from typing import Any, Dict, List
from langchain_core._api import deprecated
from langchain_core.language_models import BaseLanguageModel
from langchain_core.messages import BaseMessage, get_buffer_string
from langchain.memory.chat_memory import BaseChatMemory
[docs]
@deprecated(
since="0.3.1",
removal="1.0.0",
message=(
"Please see the migration guide at: "
"https://python.langchain.com/docs/versions/migrating_memory/"
),
)
class ConversationTokenBufferMemory(BaseChatMemory):
"""Conversation chat memory with token limit.
Keeps only the most recent messages in the conversation under the constraint
that the total number of tokens in the conversation does not exceed a certain limit.
"""
human_prefix: str = "Human"
ai_prefix: str = "AI"
llm: BaseLanguageModel
memory_key: str = "history"
max_token_limit: int = 2000
@property
def buffer(self) -> Any:
"""String buffer of memory."""
return self.buffer_as_messages if self.return_messages else self.buffer_as_str
@property
def buffer_as_str(self) -> str:
"""Exposes the buffer as a string in case return_messages is False."""
return get_buffer_string(
self.chat_memory.messages,
human_prefix=self.human_prefix,
ai_prefix=self.ai_prefix,
)
@property
def buffer_as_messages(self) -> List[BaseMessage]:
"""Exposes the buffer as a list of messages in case return_messages is True."""
return self.chat_memory.messages
@property
def memory_variables(self) -> List[str]:
"""Will always return list of memory variables.
:meta private:
"""
return [self.memory_key]
[docs]
def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
"""Return history buffer."""
return {self.memory_key: self.buffer}
[docs]
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
"""Save context from this conversation to buffer. Pruned."""
super().save_context(inputs, outputs)
# Prune buffer if it exceeds max token limit
buffer = self.chat_memory.messages
curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer)
if curr_buffer_length > self.max_token_limit:
pruned_memory = []
while curr_buffer_length > self.max_token_limit:
pruned_memory.append(buffer.pop(0))
curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer)