17 if (client->is_remote())
20 std::cerr <<
"Remote clients can only use id_slot -1" << std::endl;
29 history = json::array();
36 json working_history = json::array();
37 std::string effective_system = system_prompt;
39 effective_system +=
"\n\n[Conversation summary]\n" + summary;
40 working_history.push_back(
ChatMessage(system_role, effective_system).to_json());
41 return working_history;
49 for (
const auto &m : history) working_history.push_back(m);
52 return working_history;
55void LLMAgent::set_n_keep()
63std::string
LLMAgent::chat(
const std::string &user_prompt,
bool add_to_history, CharArrayFn callback,
bool return_response_json,
bool debug_prompt)
65 if (
n_keep == -1) set_n_keep();
77 if (log_callback !=
nullptr) log_callback(query_prompt.c_str());
81 std::string response =
completion(query_prompt, callback, return_response_json);
82 std::string assistant_content = response;
83 if (return_response_json)
88 history.push_back(
ChatMessage(USER_ROLE, user_prompt).to_json());
89 history.push_back(
ChatMessage(ASSISTANT_ROLE, assistant_content).to_json());
98 history.push_back(msg.
to_json());
103 if (!history.empty())
105 history.erase(history.end() - 1);
113 std::ofstream file(filepath);
117 save_data[
"history"] = history;
118 save_data[
"summary"] = summary;
119 file << save_data.dump(4);
124 std::cerr <<
"Unable to open file for writing: " << filepath << std::endl;
127 catch (
const std::exception &e)
129 std::cerr <<
"Error saving history to file: " << e.what() << std::endl;
137 std::ifstream file(filepath);
145 if (data.is_object() && data.contains(
"history") && data[
"history"].is_array())
147 history = data[
"history"];
148 summary = data.value(
"summary",
"");
151 else if (data.is_array())
158 std::cerr <<
"Invalid history file format" << std::endl;
163 std::cerr <<
"Unable to open file for reading: " << filepath << std::endl;
166 catch (
const std::exception &e)
168 std::cerr <<
"Error loading history from file: " << e.what() << std::endl;
175 if (ctx <= 0)
return false;
178 if (prompt_tokens < ctx)
return false;
180 switch (overflow_strategy)
196 if (ctx <= 0 || history.empty())
return;
198 std::cout<<
"context size reached, truncating history"<<std::endl;
200 int target_tokens =
static_cast<int>(ctx * target_context_ratio);
202 auto measure = [&]() ->
int {
206 while (history.size() >= 2 && measure() > target_tokens)
207 history.erase(history.begin(), history.begin() + 2);
210 if (!history.empty() && measure() > target_tokens)
211 history.erase(history.begin());
216 if (history.empty())
return;
218 if (ctx <= 0)
return;
220 std::cout<<
"context size reached, summarizing history"<<std::endl;
222 auto build_summary_prompt = [&](
const std::string &transcript) -> std::string {
223 std::string query = summarize_prompt;
224 if (!summary.empty())
225 query +=
"Existing summary:\n" + summary +
"\n\n";
226 query +=
"Messages:\n" + transcript;
227 json msgs = json::array();
228 msgs.push_back(
ChatMessage(USER_ROLE, query).to_json());
233 json summary_prompt_msg = json::array({
242 std::string transcript;
243 for (
int i=0; i<history.size(); i+=2)
245 std::string line =
"";
246 for (
int j=0; j<2; j++)
248 if (i+j >= history.size()-1)
break;
249 json msg = history[i+j];
250 std::string role = msg.at(
"role").get<std::string>();
251 std::string content = msg.at(
"content").get<std::string>();
252 line += role +
": " + content +
"\n";
256 if (!transcript.empty() &&
static_cast<int>(
tokenize(build_summary_prompt(transcript + line)).size()) >= ctx*0.75)
258 summary =
completion(build_summary_prompt(transcript));
263 if (!transcript.empty())
264 summary =
completion(build_summary_prompt(transcript));
267 history = json::array();
272 int target_tokens =
static_cast<int>(ctx * target_context_ratio);
273 if (probe_tokens > target_tokens)
275 std::cerr <<
"LLMAgent: summary itself exceeds context budget — discarding summary" << std::endl;
279 catch (
const std::exception &e)
281 std::cerr <<
"LLMAgent: summarization failed (" << e.what() <<
"), falling back to truncation" << std::endl;
282 history = json::array();
293 std::string system_prompt = system_prompt_ ? system_prompt_ :
"";
294 return new LLMAgent(llm, system_prompt);
297const char *
LLMAgent_Chat(
LLMAgent *llm,
const char *user_prompt,
bool add_to_history, CharArrayFn callback,
bool return_response_json,
bool debug_prompt)
299 return stringToCharArray(llm->
chat(user_prompt, add_to_history, callback, return_response_json, debug_prompt));
310 std::string sys_prompt = system_prompt ? system_prompt :
"";
321 return stringToCharArray(llm->
get_history().dump());
338 json history = json::parse(history_json ? history_json :
"[]");
339 if (!history.is_array())
340 std::cerr <<
"Expected JSON array for history." << std::endl;
344 catch (
const std::exception &e)
346 std::cerr <<
"Error parsing history JSON: " << e.what() << std::endl;
367 std::string path = filepath ? filepath :
"";
376 std::string path = filepath ? filepath :
"";
392 if (strategy < strategy_min || strategy > strategy_max)
394 std::cerr <<
"LLMAgent_Set_Overflow_Strategy: invalid strategy " << strategy << std::endl;
399 std::string prompt = summarize_prompt ? summarize_prompt :
"";
High-level conversational agent interface for LLMs.
ContextOverflowStrategy
Strategy to apply when the chat history would exceed the model's context window.
@ Summarize
Summarise the full history (rolling chunks if needed), embed it in the system message,...
@ None
No automatic handling — may crash if context is exceeded.
@ Truncate
Remove oldest messages (in pairs) from the front until history fits within target_context_ratio.
High-level conversational agent for LLM interactions.
LLMAgent(LLMLocal *llm, const std::string &system_prompt="")
Constructor for LLM agent.
json build_working_history(const std::string &user_prompt, bool include_history=true) const
Build the full message list to send to the model.
void load_history(const std::string &filepath)
Load conversation history from file.
bool handle_overflow(const std::string &user_prompt)
Handle context overflow using the configured strategy before a chat call.
void set_history(const json &history_)
Set conversation history.
void truncate_history(const std::string &user_prompt)
Remove oldest message pairs from the front until history fits within target_context_ratio.
json get_history() const
Get conversation history.
void set_overflow_strategy(ContextOverflowStrategy strategy, float target_ratio=0.5f, const std::string &summarize_prompt=SUMMARY_PROMPT)
Configure how the agent handles context overflow.
int get_slot()
Get current processing slot ID.
void add_user_message(const std::string &content)
Add a user message to conversation history.
virtual void add_message(const std::string &role, const std::string &content)
Add a message to conversation history.
void summarize_history(const std::string &user_prompt)
Summarise the entire history (chunking if needed), embed summary in system message,...
void save_history(const std::string &filepath) const
Save conversation history to file.
void add_assistant_message(const std::string &content)
Add an assistant message to conversation history.
size_t get_history_size() const
Get number of messages in history.
void set_summary(const std::string &summary_)
Set the rolling summary directly (e.g. after loading from file)
std::string get_summary() const
Get the current rolling summary (empty if none has been generated yet)
void set_slot(int id_slot)
Set processing slot ID.
void set_system_prompt(const std::string &system_prompt_)
Set system prompt.
void remove_last_message()
Remove the last message from history.
json build_system_history() const
Builds the history to send to the model including only the prompts.
std::string get_system_prompt() const
Get current system prompt.
std::string chat(const std::string &user_prompt, bool add_to_history=true, CharArrayFn callback=nullptr, bool return_response_json=false, bool debug_prompt=false)
Conduct a chat interaction.
void clear_history()
Clear all conversation history.
int get_slot_context_size() override
Get slot context size (delegate to wrapped LLM)
virtual std::string completion(const std::string &prompt, CharArrayFn callback=nullptr, bool return_response_json=false)
Generate completion with agent's slot.
Client for accessing LLM functionality locally or remotely.
Abstract class for local LLM operations with slot management.
virtual int get_next_available_slot()=0
Get an available processing slot.
Registry for managing LLM provider instances.
const CharArrayFn get_log_callback()
Get current log callback.
static LLMProviderRegistry & instance()
Get the singleton registry instance.
int32_t n_keep
Number of tokens to keep from the beginning of the context.
virtual std::string apply_template(const json &messages)
Apply template to messages.
virtual std::string parse_completion_json(const json &result)
Parse completion result.
virtual std::vector< int > tokenize(const std::string &query)
Tokenize text.
void LLMAgent_Set_Overflow_Strategy(LLMAgent *llm, int strategy, float target_ratio, const char *summarize_prompt)
Configure the context overflow strategy (C API)
void LLMAgent_Save_History(LLMAgent *llm, const char *filepath)
Save conversation history to file (C API)
void LLMAgent_Load_History(LLMAgent *llm, const char *filepath)
Load conversation history from file (C API)
size_t LLMAgent_Get_History_Size(LLMAgent *llm)
Get conversation history size (C API)
void LLMAgent_Add_Assistant_Message(LLMAgent *llm, const char *content)
Add assistant message to history (C API)
LLMAgent * LLMAgent_Construct(LLMLocal *llm, const char *system_prompt_)
Construct LLMAgent (C API)
void LLMAgent_Clear_History(LLMAgent *llm)
Clear conversation history (C API)
void LLMAgent_Set_History(LLMAgent *llm, const char *history_json)
Set conversation history (C API)
const char * LLMAgent_Chat(LLMAgent *llm, const char *user_prompt, bool add_to_history=true, CharArrayFn callback=nullptr, bool return_response_json=false, bool debug_prompt=false)
Conduct chat interaction (C API)
const char * LLMAgent_Get_Summary(LLMAgent *llm)
Get the current rolling summary (C API)
int LLMAgent_Get_Slot(LLMAgent *llm)
Get processing slot (C API)
void LLMAgent_Set_Slot(LLMAgent *llm, int slot_id)
Set processing slot (C API)
const char * LLMAgent_Get_History(LLMAgent *llm)
Get conversation history (C API)
void LLMAgent_Add_User_Message(LLMAgent *llm, const char *content)
Add user message to history (C API)
void LLMAgent_Set_System_Prompt(LLMAgent *llm, const char *system_prompt)
Set system prompt (C API)
void LLMAgent_Set_Summary(LLMAgent *llm, const char *summary)
Set the rolling summary directly (C API)
const char * LLMAgent_Get_System_Prompt(LLMAgent *llm)
Get system prompt (C API)
void LLMAgent_Remove_Last_Message(LLMAgent *llm)
Remove last message from history (C API)
Structure representing a single chat message.
json to_json() const
Convert message to JSON representation.