LlamaLib  v2.0.5
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM_agent.h
Go to the documentation of this file.
1
6
7#pragma once
8
9#include "LLM.h"
10#include "LLM_client.h"
11
15struct UNDREAMAI_API ChatMessage
16{
17 std::string role;
18 std::string content;
19
21 ChatMessage() = default;
22
26 ChatMessage(const std::string &role_, const std::string &content_)
27 : role(role_), content(content_) {}
28
32 json to_json() const
33 {
34 return json{{"role", role}, {"content", content.empty() ? " " : content}};
35 }
36
42 static ChatMessage from_json(const json &j)
43 {
44 return ChatMessage(j.at("role").get<std::string>(), j.at("content").get<std::string>());
45 }
46
50 bool operator==(const ChatMessage &other) const
51 {
52 return role == other.role && content == other.content;
53 }
54};
55
58{
59 None,
60 Truncate,
62};
63
64const std::string SUMMARY_PROMPT=
65"You are maintaining a concise working memory of an ongoing conversation."
66""
67"If an existing summary is provided, merge it with the new messages into a single updated summary."
68"If no existing summary is provided, create a new summary from the messages."
69""
70"Rules:"
71"- Preserve user goals, decisions made, constraints, preferences, open questions, and pending tasks."
72"- Remove anything resolved, superseded, redundant, or purely conversational."
73"- Keep only information relevant for future reasoning."
74"- Avoid duplicating or rephrasing information unnecessarily."
75"- Write in present tense where possible."
76"- Keep under 200 words."
77"- No bullet points. No preamble. Output only the summary text.";
78
82class UNDREAMAI_API LLMAgent : public LLMLocal
83{
84public:
85 const std::string USER_ROLE = "user";
86 const std::string ASSISTANT_ROLE = "assistant";
87
92 LLMAgent(LLMLocal *llm, const std::string &system_prompt = "");
93
94 //=================================== LLM METHOD DELEGATES ===================================//
98 std::string tokenize_json(const json &data) override { return llm->tokenize_json(data); }
99
104 std::string detokenize_json(const json &data) override { return llm->detokenize_json(data); }
105
110 std::string embeddings_json(const json &data) override { return llm->embeddings_json(data); }
111
117 std::string completion_json(const json &data, CharArrayFn callback = nullptr, bool callbackWithJSON = true) override { return llm->completion_json(data, callback, callbackWithJSON); }
118
123 std::string apply_template_json(const json &data) override { return llm->apply_template_json(data); }
124
129 std::string slot_json(const json &data) override { return llm->slot_json(data); }
130
133 void cancel(int id_slot) override { return llm->cancel(id_slot); }
134
137 int get_next_available_slot() override { return llm->get_next_available_slot(); }
138
141 int get_slot_context_size() override { return llm->get_slot_context_size(); }
142
143 //=================================== LLM METHOD DELEGATES ===================================//
144
145 //=================================== Slot-aware method overrides ===================================//
150 virtual json build_completion_json(const std::string &prompt) { return LLMLocal::build_completion_json(prompt, this->id_slot); }
151
158 virtual std::string completion(const std::string &prompt, CharArrayFn callback = nullptr, bool return_response_json = false)
159 {
160 return LLMLocal::completion(prompt, callback, this->id_slot, return_response_json);
161 }
162
168 virtual json build_slot_json(const std::string &action, const std::string &filepath) { return LLMLocal::build_slot_json(this->id_slot, action, filepath); }
169
174 virtual std::string save_slot(const std::string &filepath) { return LLMLocal::save_slot(this->id_slot, filepath); }
175
180 virtual std::string load_slot(const std::string &filepath) { return LLMLocal::load_slot(this->id_slot, filepath); }
181
184 virtual void cancel() { llm->cancel(this->id_slot); }
185 //=================================== Slot-aware method overrides ===================================//
186
190 inline int get_slot() { return id_slot; }
191
195 void set_slot(int id_slot);
196
197 // Prompt configuration methods
198
202 void set_system_prompt(const std::string &system_prompt_) { system_prompt = system_prompt_; }
203
206 std::string get_system_prompt() const { return system_prompt; }
207
211 void set_history(const json &history_) { history = history_; }
212
216 json get_history() const { return history; }
217
218 // History management methods
219
223 void add_user_message(const std::string &content) { add_message(USER_ROLE, content); }
224
228 void add_assistant_message(const std::string &content) { add_message(ASSISTANT_ROLE, content); }
229
232 void clear_history();
233
236 void remove_last_message();
237
241 void save_history(const std::string &filepath) const;
242
246 void load_history(const std::string &filepath);
247
251 size_t get_history_size() const { return history.size(); }
252
253 // Context overflow management
254
261 float target_ratio = 0.5f,
262 const std::string &summarize_prompt = SUMMARY_PROMPT
263 )
264 {
265 overflow_strategy = strategy;
266 target_context_ratio = target_ratio;
267 this->summarize_prompt = summarize_prompt;
268 }
269
271 ContextOverflowStrategy get_overflow_strategy() const { return overflow_strategy; }
272
274 std::string get_summarize_prompt() const { return summarize_prompt; }
275
277 std::string get_summary() const { return summary; }
278
280 void set_summary(const std::string &summary_) { summary = summary_; }
281
282 // Chat functionality
283
293 std::string chat(const std::string &user_prompt, bool add_to_history = true, CharArrayFn callback = nullptr, bool return_response_json = false, bool debug_prompt = false);
294
295protected:
296 void set_n_keep();
297
300 json build_system_history() const;
301
306 json build_working_history(const std::string &user_prompt, bool include_history=true) const;
307
311 bool handle_overflow(const std::string &user_prompt);
312
314 void truncate_history(const std::string &user_prompt);
315
317 void summarize_history(const std::string &user_prompt);
318
323 virtual void add_message(const std::string &role, const std::string &content);
324
325private:
326 LLMLocal *llm = nullptr;
327 int id_slot = -1;
328 std::string system_prompt = "";
329 std::string system_role = "system";
330 std::string summary = "";
331 json history;
332
333 // Context overflow
335 float target_context_ratio = 0.5f;
336 std::string summarize_prompt = SUMMARY_PROMPT;
337};
338
341
342extern "C"
343{
349 UNDREAMAI_API LLMAgent *LLMAgent_Construct(LLMLocal *llm, const char *system_prompt = "");
350
355 UNDREAMAI_API void LLMAgent_Set_System_Prompt(LLMAgent *llm, const char *system_prompt);
356
360 UNDREAMAI_API const char *LLMAgent_Get_System_Prompt(LLMAgent *llm);
361
365 UNDREAMAI_API void LLMAgent_Set_Slot(LLMAgent *llm, int slot_id);
366
370 UNDREAMAI_API int LLMAgent_Get_Slot(LLMAgent *llm);
371
381 UNDREAMAI_API const char *LLMAgent_Chat(LLMAgent *llm, const char *user_prompt, bool add_to_history = true, CharArrayFn callback = nullptr, bool return_response_json = false, bool debug_prompt = false);
382
386 UNDREAMAI_API void LLMAgent_Clear_History(LLMAgent *llm);
387
391 UNDREAMAI_API const char *LLMAgent_Get_History(LLMAgent *llm);
392
397 UNDREAMAI_API void LLMAgent_Set_History(LLMAgent *llm, const char *history_json);
398
403 UNDREAMAI_API void LLMAgent_Add_User_Message(LLMAgent *llm, const char *content);
404
409 UNDREAMAI_API void LLMAgent_Add_Assistant_Message(LLMAgent *llm, const char *content);
410
414 UNDREAMAI_API void LLMAgent_Remove_Last_Message(LLMAgent *llm);
415
420 UNDREAMAI_API void LLMAgent_Save_History(LLMAgent *llm, const char *filepath);
421
426 UNDREAMAI_API void LLMAgent_Load_History(LLMAgent *llm, const char *filepath);
427
431 UNDREAMAI_API size_t LLMAgent_Get_History_Size(LLMAgent *llm);
432
438 UNDREAMAI_API void LLMAgent_Set_Overflow_Strategy(LLMAgent *llm, int strategy, float target_ratio, const char *summarize_prompt);
439
443 UNDREAMAI_API const char *LLMAgent_Get_Summary(LLMAgent *llm);
444
448 UNDREAMAI_API void LLMAgent_Set_Summary(LLMAgent *llm, const char *summary);
449}
450
451
Core LLM functionality interface and base classes.
ContextOverflowStrategy
Strategy to apply when the chat history would exceed the model's context window.
Definition LLM_agent.h:58
@ Summarize
Summarise the full history (rolling chunks if needed), embed it in the system message,...
@ None
No automatic handling — may crash if context is exceeded.
@ Truncate
Remove oldest messages (in pairs) from the front until history fits within target_context_ratio.
Client interface for local and remote LLM access.
High-level conversational agent for LLM interactions.
Definition LLM_agent.h:83
std::string completion_json(const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
Generate completion (delegate to wrapped LLM)
Definition LLM_agent.h:117
virtual std::string save_slot(const std::string &filepath)
Save agent's slot state.
Definition LLM_agent.h:174
void set_history(const json &history_)
Set conversation history.
Definition LLM_agent.h:211
json get_history() const
Get conversation history.
Definition LLM_agent.h:216
void set_overflow_strategy(ContextOverflowStrategy strategy, float target_ratio=0.5f, const std::string &summarize_prompt=SUMMARY_PROMPT)
Configure how the agent handles context overflow.
Definition LLM_agent.h:259
int get_slot()
Get current processing slot ID.
Definition LLM_agent.h:190
void add_user_message(const std::string &content)
Add a user message to conversation history.
Definition LLM_agent.h:223
virtual std::string load_slot(const std::string &filepath)
Load agent's slot state.
Definition LLM_agent.h:180
virtual json build_completion_json(const std::string &prompt)
Build completion JSON with agent's slot.
Definition LLM_agent.h:150
std::string embeddings_json(const json &data) override
Generate embeddings with HTTP response support.
Definition LLM_agent.h:110
std::string apply_template_json(const json &data) override
Apply a chat template to message data.
Definition LLM_agent.h:123
ContextOverflowStrategy get_overflow_strategy() const
Get the current overflow strategy.
Definition LLM_agent.h:271
int get_next_available_slot() override
Get available slot (delegate to wrapped LLM)
Definition LLM_agent.h:137
void add_assistant_message(const std::string &content)
Add an assistant message to conversation history.
Definition LLM_agent.h:228
size_t get_history_size() const
Get number of messages in history.
Definition LLM_agent.h:251
void cancel(int id_slot) override
Cancel request (delegate to wrapped LLM)
Definition LLM_agent.h:133
void set_summary(const std::string &summary_)
Set the rolling summary directly (e.g. after loading from file)
Definition LLM_agent.h:280
std::string get_summary() const
Get the current rolling summary (empty if none has been generated yet)
Definition LLM_agent.h:277
virtual json build_slot_json(const std::string &action, const std::string &filepath)
Build slot operation JSON with agent's slot.
Definition LLM_agent.h:168
void set_system_prompt(const std::string &system_prompt_)
Set system prompt.
Definition LLM_agent.h:202
std::string detokenize_json(const json &data) override
Convert tokens back to text.
Definition LLM_agent.h:104
std::string get_system_prompt() const
Get current system prompt.
Definition LLM_agent.h:206
virtual void cancel()
Cancel agent's current request.
Definition LLM_agent.h:184
std::string get_summarize_prompt() const
Get the current summarize prompt.
Definition LLM_agent.h:274
std::string tokenize_json(const json &data) override
Tokenize input (override)
Definition LLM_agent.h:98
std::string slot_json(const json &data) override
Manage slots with HTTP response support.
Definition LLM_agent.h:129
int get_slot_context_size() override
Get slot context size (delegate to wrapped LLM)
Definition LLM_agent.h:141
virtual std::string completion(const std::string &prompt, CharArrayFn callback=nullptr, bool return_response_json=false)
Generate completion with agent's slot.
Definition LLM_agent.h:158
Abstract class for local LLM operations with slot management.
Definition LLM.h:222
virtual std::string slot_json(const json &data)=0
Manage slots with HTTP response support.
virtual int get_slot_context_size()=0
Get slot context size.
virtual std::string load_slot(int id_slot, const std::string &filepath)
Load slot state from file.
Definition LLM.h:242
virtual int get_next_available_slot()=0
Get an available processing slot.
virtual std::string save_slot(int id_slot, const std::string &filepath)
Save slot state to file.
Definition LLM.h:236
virtual void cancel(int id_slot)=0
Cancel request.
virtual json build_slot_json(int id_slot, const std::string &action, const std::string &filepath)
Build JSON for slot operations.
Definition LLM.cpp:297
virtual std::string embeddings_json(const json &data)=0
Generate embeddings with HTTP response support.
virtual std::string apply_template_json(const json &data)=0
Apply a chat template to message data.
virtual json build_completion_json(const std::string &prompt, int id_slot=-1)
Build JSON for completion generation.
Definition LLM.cpp:236
virtual std::string tokenize_json(const json &data)=0
Tokenize input (override)
virtual std::string completion(const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
Generate completion.
Definition LLM.cpp:284
virtual std::string completion_json(const json &data, CharArrayFn callback, bool callbackWithJSON)=0
Generate text completion.
virtual std::string detokenize_json(const json &data)=0
Convert tokens back to text.
void LLMAgent_Set_Overflow_Strategy(LLMAgent *llm, int strategy, float target_ratio, const char *summarize_prompt)
Configure the context overflow strategy (C API)
void LLMAgent_Save_History(LLMAgent *llm, const char *filepath)
Save conversation history to file (C API)
void LLMAgent_Load_History(LLMAgent *llm, const char *filepath)
Load conversation history from file (C API)
size_t LLMAgent_Get_History_Size(LLMAgent *llm)
Get conversation history size (C API)
void LLMAgent_Add_Assistant_Message(LLMAgent *llm, const char *content)
Add assistant message to history (C API)
LLMAgent * LLMAgent_Construct(LLMLocal *llm, const char *system_prompt="")
Construct LLMAgent (C API)
void LLMAgent_Clear_History(LLMAgent *llm)
Clear conversation history (C API)
void LLMAgent_Set_History(LLMAgent *llm, const char *history_json)
Set conversation history (C API)
const char * LLMAgent_Chat(LLMAgent *llm, const char *user_prompt, bool add_to_history=true, CharArrayFn callback=nullptr, bool return_response_json=false, bool debug_prompt=false)
Conduct chat interaction (C API)
const char * LLMAgent_Get_Summary(LLMAgent *llm)
Get the current rolling summary (C API)
int LLMAgent_Get_Slot(LLMAgent *llm)
Get processing slot (C API)
void LLMAgent_Set_Slot(LLMAgent *llm, int slot_id)
Set processing slot (C API)
const char * LLMAgent_Get_History(LLMAgent *llm)
Get conversation history (C API)
void LLMAgent_Add_User_Message(LLMAgent *llm, const char *content)
Add user message to history (C API)
void LLMAgent_Set_System_Prompt(LLMAgent *llm, const char *system_prompt)
Set system prompt (C API)
void LLMAgent_Set_Summary(LLMAgent *llm, const char *summary)
Set the rolling summary directly (C API)
const char * LLMAgent_Get_System_Prompt(LLMAgent *llm)
Get system prompt (C API)
void LLMAgent_Remove_Last_Message(LLMAgent *llm)
Remove last message from history (C API)
Structure representing a single chat message.
Definition LLM_agent.h:16
std::string role
Message role (e.g., "user", "assistant", "system")
Definition LLM_agent.h:17
ChatMessage(const std::string &role_, const std::string &content_)
Parameterized constructor.
Definition LLM_agent.h:26
std::string content
Message content text.
Definition LLM_agent.h:18
json to_json() const
Convert message to JSON representation.
Definition LLM_agent.h:32
bool operator==(const ChatMessage &other) const
Equality comparison operator.
Definition LLM_agent.h:50
ChatMessage()=default
Default constructor.
static ChatMessage from_json(const json &j)
Create message from JSON representation.
Definition LLM_agent.h:42