LlamaLib  v2.0.5
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM_agent.cpp
1#include "LLM_agent.h"
2#include <fstream>
3#include <iostream>
4
5LLMAgent::LLMAgent(LLMLocal *llm_, const std::string &system_prompt_) : llm(llm_), system_prompt(system_prompt_)
6{
7 id_slot = llm->get_next_available_slot();
9}
10
11void LLMAgent::set_slot(int id_slot_)
12{
13 if (id_slot != -1)
14 {
15 if (LLMClient *client = dynamic_cast<LLMClient *>(llm))
16 {
17 if (client->is_remote())
18 {
19 id_slot_ = -1;
20 std::cerr << "Remote clients can only use id_slot -1" << std::endl;
21 }
22 }
23 }
24 id_slot = id_slot_;
25}
26
28{
29 history = json::array();
30 summary = "";
31 n_keep = -1;
32}
33
35{
36 json working_history = json::array();
37 std::string effective_system = system_prompt;
38 if (!summary.empty())
39 effective_system += "\n\n[Conversation summary]\n" + summary;
40 working_history.push_back(ChatMessage(system_role, effective_system).to_json());
41 return working_history;
42}
43
44json LLMAgent::build_working_history(const std::string &user_prompt, bool include_history) const
45{
46 json working_history = build_system_history();
47 if (include_history)
48 {
49 for (const auto &m : history) working_history.push_back(m);
50 }
51 working_history.push_back(ChatMessage(USER_ROLE, user_prompt).to_json());
52 return working_history;
53}
54
55void LLMAgent::set_n_keep()
56{
57 try
58 {
60 } catch(...){ }
61}
62
63std::string LLMAgent::chat(const std::string &user_prompt, bool add_to_history, CharArrayFn callback, bool return_response_json, bool debug_prompt)
64{
65 if (n_keep == -1) set_n_keep();
66
67 // Handle context overflow before sending
68 if (overflow_strategy != ContextOverflowStrategy::None)
69 handle_overflow(user_prompt);
70
71 // Apply template to get the formatted prompt
72 std::string query_prompt = apply_template(build_working_history(user_prompt));
73 if (debug_prompt)
74 {
76 auto log_callback = registry.get_log_callback();
77 if (log_callback != nullptr) log_callback(query_prompt.c_str());
78 }
79
80 // Call completion with the formatted prompt
81 std::string response = completion(query_prompt, callback, return_response_json);
82 std::string assistant_content = response;
83 if (return_response_json)
84 assistant_content = parse_completion_json(response);
85
86 if (add_to_history)
87 {
88 history.push_back(ChatMessage(USER_ROLE, user_prompt).to_json());
89 history.push_back(ChatMessage(ASSISTANT_ROLE, assistant_content).to_json());
90 }
91
92 return response;
93}
94
95void LLMAgent::add_message(const std::string &role, const std::string &content)
96{
97 ChatMessage msg(role, content);
98 history.push_back(msg.to_json());
99}
100
102{
103 if (!history.empty())
104 {
105 history.erase(history.end() - 1);
106 }
107}
108
109void LLMAgent::save_history(const std::string &filepath) const
110{
111 try
112 {
113 std::ofstream file(filepath);
114 if (file.is_open())
115 {
116 json save_data;
117 save_data["history"] = history;
118 save_data["summary"] = summary;
119 file << save_data.dump(4);
120 file.close();
121 }
122 else
123 {
124 std::cerr << "Unable to open file for writing: " << filepath << std::endl;
125 }
126 }
127 catch (const std::exception &e)
128 {
129 std::cerr << "Error saving history to file: " << e.what() << std::endl;
130 }
131}
132
133void LLMAgent::load_history(const std::string &filepath)
134{
135 try
136 {
137 std::ifstream file(filepath);
138 if (file.is_open())
139 {
140 json data;
141 file >> data;
142 file.close();
143
144 // New format: {"history": [...], "summary": "..."}
145 if (data.is_object() && data.contains("history") && data["history"].is_array())
146 {
147 history = data["history"];
148 summary = data.value("summary", "");
149 }
150 // Legacy format: plain JSON array (no summary)
151 else if (data.is_array())
152 {
153 history = data;
154 summary = "";
155 }
156 else
157 {
158 std::cerr << "Invalid history file format" << std::endl;
159 }
160 }
161 else
162 {
163 std::cerr << "Unable to open file for reading: " << filepath << std::endl;
164 }
165 }
166 catch (const std::exception &e)
167 {
168 std::cerr << "Error loading history from file: " << e.what() << std::endl;
169 }
170}
171
172bool LLMAgent::handle_overflow(const std::string &user_prompt)
173{
174 int ctx = get_slot_context_size();
175 if (ctx <= 0) return false;
176
177 int prompt_tokens = static_cast<int>(tokenize(apply_template(build_working_history(user_prompt))).size());
178 if (prompt_tokens < ctx) return false;
179
180 switch (overflow_strategy)
181 {
183 truncate_history(user_prompt);
184 return true;
186 summarize_history(user_prompt);
187 return true;
188 default:
189 return false;
190 }
191}
192
193void LLMAgent::truncate_history(const std::string &user_prompt)
194{
195 int ctx = get_slot_context_size();
196 if (ctx <= 0 || history.empty()) return;
197
198 std::cout<<"context size reached, truncating history"<<std::endl;
199
200 int target_tokens = static_cast<int>(ctx * target_context_ratio);
201
202 auto measure = [&]() -> int {
203 return static_cast<int>(tokenize(apply_template(build_working_history(user_prompt))).size());
204 };
205
206 while (history.size() >= 2 && measure() > target_tokens)
207 history.erase(history.begin(), history.begin() + 2);
208
209 // Edge case: a single orphan message still overflows
210 if (!history.empty() && measure() > target_tokens)
211 history.erase(history.begin());
212}
213
214void LLMAgent::summarize_history(const std::string &user_prompt)
215{
216 if (history.empty()) return;
217 int ctx = get_slot_context_size();
218 if (ctx <= 0) return;
219
220 std::cout<<"context size reached, summarizing history"<<std::endl;
221 // Build the prompt for a summary request, incorporating any prior rolling summary.
222 auto build_summary_prompt = [&](const std::string &transcript) -> std::string {
223 std::string query = summarize_prompt;
224 if (!summary.empty())
225 query += "Existing summary:\n" + summary + "\n\n";
226 query += "Messages:\n" + transcript;
227 json msgs = json::array();
228 msgs.push_back(ChatMessage(USER_ROLE, query).to_json());
229 return apply_template(msgs);
230 };
231
232 int n_keep_prev = n_keep;
233 json summary_prompt_msg = json::array({
234 ChatMessage(USER_ROLE, summarize_prompt).to_json()
235 });
236 n_keep = tokenize(apply_template(summary_prompt_msg)).size();
237
238 try
239 {
240 // Walk history, flushing a summary call whenever the accumulating transcript
241 // would itself overflow the context.
242 std::string transcript;
243 for (int i=0; i<history.size(); i+=2)
244 {
245 std::string line = "";
246 for (int j=0; j<2; j++)
247 {
248 if (i+j >= history.size()-1) break;
249 json msg = history[i+j];
250 std::string role = msg.at("role").get<std::string>();
251 std::string content = msg.at("content").get<std::string>();
252 line += role + ": " + content + "\n";
253 }
254
255 // Flush before appending if this line would push the prompt over the limit
256 if (!transcript.empty() && static_cast<int>(tokenize(build_summary_prompt(transcript + line)).size()) >= ctx*0.75)
257 {
258 summary = completion(build_summary_prompt(transcript));
259 transcript = "";
260 }
261 transcript += line;
262 }
263 if (!transcript.empty())
264 summary = completion(build_summary_prompt(transcript));
265
266 // History is now condensed into summary — clear the raw messages
267 history = json::array();
268
269 // The summary lives in the system message (injected by build_working_history).
270 // If even system + summary + empty user message overflows the budget, discard it.
271 int probe_tokens = static_cast<int>(tokenize(apply_template(build_working_history(user_prompt))).size());
272 int target_tokens = static_cast<int>(ctx * target_context_ratio);
273 if (probe_tokens > target_tokens)
274 {
275 std::cerr << "LLMAgent: summary itself exceeds context budget — discarding summary" << std::endl;
276 summary = "";
277 }
278 }
279 catch (const std::exception &e)
280 {
281 std::cerr << "LLMAgent: summarization failed (" << e.what() << "), falling back to truncation" << std::endl;
282 history = json::array(); // clear history to avoid double-processing
283 truncate_history(user_prompt);
284 }
285
286 n_keep = n_keep_prev;
287}
288
289//================ C API ================//
290
291LLMAgent *LLMAgent_Construct(LLMLocal *llm, const char *system_prompt_)
292{
293 std::string system_prompt = system_prompt_ ? system_prompt_ : "";
294 return new LLMAgent(llm, system_prompt);
295}
296
297const char *LLMAgent_Chat(LLMAgent *llm, const char *user_prompt, bool add_to_history, CharArrayFn callback, bool return_response_json, bool debug_prompt)
298{
299 return stringToCharArray(llm->chat(user_prompt, add_to_history, callback, return_response_json, debug_prompt));
300}
301
302// History management C API implementations
304{
305 llm->clear_history();
306}
307
308void LLMAgent_Set_System_Prompt(LLMAgent *llm, const char *system_prompt)
309{
310 std::string sys_prompt = system_prompt ? system_prompt : "";
311 llm->set_system_prompt(sys_prompt);
312}
313
315{
316 return stringToCharArray(llm->get_system_prompt());
317}
318
320{
321 return stringToCharArray(llm->get_history().dump());
322}
323
324void LLMAgent_Set_Slot(LLMAgent *llm, int slot_id)
325{
326 llm->set_slot(slot_id);
327}
328
330{
331 return llm->get_slot();
332}
333
334void LLMAgent_Set_History(LLMAgent *llm, const char *history_json)
335{
336 try
337 {
338 json history = json::parse(history_json ? history_json : "[]");
339 if (!history.is_array())
340 std::cerr << "Expected JSON array for history." << std::endl;
341 else
342 llm->set_history(history);
343 }
344 catch (const std::exception &e)
345 {
346 std::cerr << "Error parsing history JSON: " << e.what() << std::endl;
347 }
348}
349
350void LLMAgent_Add_User_Message(LLMAgent *llm, const char *content)
351{
352 llm->add_user_message(content ? content : "");
353}
354
355void LLMAgent_Add_Assistant_Message(LLMAgent *llm, const char *content)
356{
357 llm->add_assistant_message(content ? content : "");
358}
359
364
365void LLMAgent_Save_History(LLMAgent *llm, const char *filepath)
366{
367 std::string path = filepath ? filepath : "";
368 if (!path.empty())
369 {
370 llm->save_history(path);
371 }
372}
373
374void LLMAgent_Load_History(LLMAgent *llm, const char *filepath)
375{
376 std::string path = filepath ? filepath : "";
377 if (!path.empty())
378 {
379 llm->load_history(path);
380 }
381}
382
384{
385 return llm->get_history_size();
386}
387
388void LLMAgent_Set_Overflow_Strategy(LLMAgent *llm, int strategy, float target_ratio, const char *summarize_prompt)
389{
390 constexpr int strategy_min = static_cast<int>(ContextOverflowStrategy::None);
391 constexpr int strategy_max = static_cast<int>(ContextOverflowStrategy::Summarize);
392 if (strategy < strategy_min || strategy > strategy_max)
393 {
394 std::cerr << "LLMAgent_Set_Overflow_Strategy: invalid strategy " << strategy << std::endl;
395 return;
396 }
397 ContextOverflowStrategy s = static_cast<ContextOverflowStrategy>(strategy);
398
399 std::string prompt = summarize_prompt ? summarize_prompt : "";
400 if (prompt.empty())
401 llm->set_overflow_strategy(s, target_ratio);
402 else
403 llm->set_overflow_strategy(s, target_ratio, prompt);
404}
405
407{
408 return stringToCharArray(llm->get_summary());
409}
410
411void LLMAgent_Set_Summary(LLMAgent *llm, const char *summary)
412{
413 llm->set_summary(summary ? summary : "");
414}
High-level conversational agent interface for LLMs.
ContextOverflowStrategy
Strategy to apply when the chat history would exceed the model's context window.
Definition LLM_agent.h:58
@ Summarize
Summarise the full history (rolling chunks if needed), embed it in the system message,...
@ None
No automatic handling — may crash if context is exceeded.
@ Truncate
Remove oldest messages (in pairs) from the front until history fits within target_context_ratio.
High-level conversational agent for LLM interactions.
Definition LLM_agent.h:83
LLMAgent(LLMLocal *llm, const std::string &system_prompt="")
Constructor for LLM agent.
Definition LLM_agent.cpp:5
json build_working_history(const std::string &user_prompt, bool include_history=true) const
Build the full message list to send to the model.
Definition LLM_agent.cpp:44
void load_history(const std::string &filepath)
Load conversation history from file.
bool handle_overflow(const std::string &user_prompt)
Handle context overflow using the configured strategy before a chat call.
void set_history(const json &history_)
Set conversation history.
Definition LLM_agent.h:211
void truncate_history(const std::string &user_prompt)
Remove oldest message pairs from the front until history fits within target_context_ratio.
json get_history() const
Get conversation history.
Definition LLM_agent.h:216
void set_overflow_strategy(ContextOverflowStrategy strategy, float target_ratio=0.5f, const std::string &summarize_prompt=SUMMARY_PROMPT)
Configure how the agent handles context overflow.
Definition LLM_agent.h:259
int get_slot()
Get current processing slot ID.
Definition LLM_agent.h:190
void add_user_message(const std::string &content)
Add a user message to conversation history.
Definition LLM_agent.h:223
virtual void add_message(const std::string &role, const std::string &content)
Add a message to conversation history.
Definition LLM_agent.cpp:95
void summarize_history(const std::string &user_prompt)
Summarise the entire history (chunking if needed), embed summary in system message,...
void save_history(const std::string &filepath) const
Save conversation history to file.
void add_assistant_message(const std::string &content)
Add an assistant message to conversation history.
Definition LLM_agent.h:228
size_t get_history_size() const
Get number of messages in history.
Definition LLM_agent.h:251
void set_summary(const std::string &summary_)
Set the rolling summary directly (e.g. after loading from file)
Definition LLM_agent.h:280
std::string get_summary() const
Get the current rolling summary (empty if none has been generated yet)
Definition LLM_agent.h:277
void set_slot(int id_slot)
Set processing slot ID.
Definition LLM_agent.cpp:11
void set_system_prompt(const std::string &system_prompt_)
Set system prompt.
Definition LLM_agent.h:202
void remove_last_message()
Remove the last message from history.
json build_system_history() const
Builds the history to send to the model including only the prompts.
Definition LLM_agent.cpp:34
std::string get_system_prompt() const
Get current system prompt.
Definition LLM_agent.h:206
std::string chat(const std::string &user_prompt, bool add_to_history=true, CharArrayFn callback=nullptr, bool return_response_json=false, bool debug_prompt=false)
Conduct a chat interaction.
Definition LLM_agent.cpp:63
void clear_history()
Clear all conversation history.
Definition LLM_agent.cpp:27
int get_slot_context_size() override
Get slot context size (delegate to wrapped LLM)
Definition LLM_agent.h:141
virtual std::string completion(const std::string &prompt, CharArrayFn callback=nullptr, bool return_response_json=false)
Generate completion with agent's slot.
Definition LLM_agent.h:158
Client for accessing LLM functionality locally or remotely.
Definition LLM_client.h:32
Abstract class for local LLM operations with slot management.
Definition LLM.h:222
virtual int get_next_available_slot()=0
Get an available processing slot.
Registry for managing LLM provider instances.
Definition LLM.h:384
const CharArrayFn get_log_callback()
Get current log callback.
Definition LLM.h:459
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Definition LLM.h:399
int32_t n_keep
Number of tokens to keep from the beginning of the context.
Definition LLM.h:62
virtual std::string apply_template(const json &messages)
Apply template to messages.
Definition LLM.cpp:145
virtual std::string parse_completion_json(const json &result)
Parse completion result.
Definition LLM.cpp:265
virtual std::vector< int > tokenize(const std::string &query)
Tokenize text.
Definition LLM.cpp:171
void LLMAgent_Set_Overflow_Strategy(LLMAgent *llm, int strategy, float target_ratio, const char *summarize_prompt)
Configure the context overflow strategy (C API)
void LLMAgent_Save_History(LLMAgent *llm, const char *filepath)
Save conversation history to file (C API)
void LLMAgent_Load_History(LLMAgent *llm, const char *filepath)
Load conversation history from file (C API)
size_t LLMAgent_Get_History_Size(LLMAgent *llm)
Get conversation history size (C API)
void LLMAgent_Add_Assistant_Message(LLMAgent *llm, const char *content)
Add assistant message to history (C API)
LLMAgent * LLMAgent_Construct(LLMLocal *llm, const char *system_prompt_)
Construct LLMAgent (C API)
void LLMAgent_Clear_History(LLMAgent *llm)
Clear conversation history (C API)
void LLMAgent_Set_History(LLMAgent *llm, const char *history_json)
Set conversation history (C API)
const char * LLMAgent_Chat(LLMAgent *llm, const char *user_prompt, bool add_to_history=true, CharArrayFn callback=nullptr, bool return_response_json=false, bool debug_prompt=false)
Conduct chat interaction (C API)
const char * LLMAgent_Get_Summary(LLMAgent *llm)
Get the current rolling summary (C API)
int LLMAgent_Get_Slot(LLMAgent *llm)
Get processing slot (C API)
void LLMAgent_Set_Slot(LLMAgent *llm, int slot_id)
Set processing slot (C API)
const char * LLMAgent_Get_History(LLMAgent *llm)
Get conversation history (C API)
void LLMAgent_Add_User_Message(LLMAgent *llm, const char *content)
Add user message to history (C API)
void LLMAgent_Set_System_Prompt(LLMAgent *llm, const char *system_prompt)
Set system prompt (C API)
void LLMAgent_Set_Summary(LLMAgent *llm, const char *summary)
Set the rolling summary directly (C API)
const char * LLMAgent_Get_System_Prompt(LLMAgent *llm)
Get system prompt (C API)
void LLMAgent_Remove_Last_Message(LLMAgent *llm)
Remove last message from history (C API)
Structure representing a single chat message.
Definition LLM_agent.h:16
json to_json() const
Convert message to JSON representation.
Definition LLM_agent.h:32