Source code for langchain.memory.token_buffer

from typing import Any, Dict, List

from langchain_core._api import deprecated
from langchain_core.language_models import BaseLanguageModel
from langchain_core.messages import BaseMessage, get_buffer_string

from langchain.memory.chat_memory import BaseChatMemory


[docs] @deprecated( since="0.3.1", removal="1.0.0", message=( "Please see the migration guide at: " "https://python.langchain.com/docs/versions/migrating_memory/" ), ) class ConversationTokenBufferMemory(BaseChatMemory): """Conversation chat memory with token limit. Keeps only the most recent messages in the conversation under the constraint that the total number of tokens in the conversation does not exceed a certain limit. """ human_prefix: str = "Human" ai_prefix: str = "AI" llm: BaseLanguageModel memory_key: str = "history" max_token_limit: int = 2000 @property def buffer(self) -> Any: """String buffer of memory.""" return self.buffer_as_messages if self.return_messages else self.buffer_as_str @property def buffer_as_str(self) -> str: """Exposes the buffer as a string in case return_messages is False.""" return get_buffer_string( self.chat_memory.messages, human_prefix=self.human_prefix, ai_prefix=self.ai_prefix, ) @property def buffer_as_messages(self) -> List[BaseMessage]: """Exposes the buffer as a list of messages in case return_messages is True.""" return self.chat_memory.messages @property def memory_variables(self) -> List[str]: """Will always return list of memory variables. :meta private: """ return [self.memory_key]
[docs] def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: """Return history buffer.""" return {self.memory_key: self.buffer}
[docs] def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: """Save context from this conversation to buffer. Pruned.""" super().save_context(inputs, outputs) # Prune buffer if it exceeds max token limit buffer = self.chat_memory.messages curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) if curr_buffer_length > self.max_token_limit: pruned_memory = [] while curr_buffer_length > self.max_token_limit: pruned_memory.append(buffer.pop(0)) curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer)