LlamaLib  v2.0.5
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM.h
Go to the documentation of this file.
1
6
7#pragma once
8
9#include "defs.h"
10#include "error_handling.h"
11#include <sstream>
12#if defined(__APPLE__)
13#include <TargetConditionals.h>
14#endif
15
16
20{
21 int id;
22 float scale;
23
27 bool operator==(const LoraIdScale &other) const
28 {
29 return id == other.id && scale == other.scale;
30 }
31};
32
36{
37 int id;
38 float scale;
39 std::string path;
40
44 bool operator==(const LoraIdScalePath &other) const
45 {
46 return id == other.id && scale == other.scale && path == other.path;
47 }
48};
49
54
59class UNDREAMAI_API LLM
60{
61public:
62 int32_t n_keep = 0;
63 std::string grammar = "";
65
67 virtual ~LLM() = default;
68
72 virtual std::vector<int> tokenize(const std::string &query);
73
77 virtual std::string tokenize_json(const json &data) = 0;
78
82 virtual std::string detokenize(const std::vector<int32_t> &tokens);
83
88 virtual std::string detokenize_json(const json &data) = 0;
89
93 virtual std::vector<float> embeddings(const std::string &query);
94
99 virtual std::string embeddings_json(const json &data) = 0;
100
104 // See https://github.com/ggml-org/llama.cpp/tree/master/tools/server#post-completion-given-a-prompt-it-returns-the-predicted-completion for the different parameters
105 virtual void set_completion_params(json completion_params_) { completion_params = completion_params_; }
106
109 virtual std::string get_completion_params() { return completion_params; }
110
117 virtual std::string completion(const std::string &prompt, CharArrayFn callback = nullptr, int id_slot = -1, bool return_response_json = false);
118
125 virtual std::string completion_json(const json &data, CharArrayFn callback, bool callbackWithJSON) = 0;
126
130 virtual void set_grammar(std::string grammar_) { grammar = grammar_; }
131
134 virtual std::string get_grammar() { return grammar; }
135
139 virtual std::string apply_template(const json &messages);
140
145 virtual std::string apply_template_json(const json &data) = 0;
146
150 static bool has_gpu_layers(const std::string &command);
151
163 static std::string LLM_args_to_command(const std::string &model_path, int num_slots = 1, int num_threads = -1, int num_GPU_layers = 0, bool flash_attention = false, int context_size = 4096, int batch_size = 2048, bool embedding_only = false, const std::vector<std::string> &lora_paths = {});
164
165protected:
169 virtual json build_apply_template_json(const json &messages);
170
174 virtual std::string parse_apply_template_json(const json &result);
175
179 virtual json build_tokenize_json(const std::string &query);
180
184 virtual std::vector<int> parse_tokenize_json(const json &result);
185
189 virtual json build_detokenize_json(const std::vector<int32_t> &tokens);
190
194 virtual std::string parse_detokenize_json(const json &result);
195
199 virtual json build_embeddings_json(const std::string &query);
200
204 virtual std::vector<float> parse_embeddings_json(const json &result);
205
210 virtual json build_completion_json(const std::string &prompt, int id_slot = -1);
211
215 virtual std::string parse_completion_json(const json &result);
216};
217
221class UNDREAMAI_API LLMLocal : public LLM
222{
223public:
226 virtual int get_next_available_slot() = 0;
227
230 virtual int get_slot_context_size() = 0;
231
236 virtual std::string save_slot(int id_slot, const std::string &filepath) { return slot(id_slot, "save", filepath); }
237
242 virtual std::string load_slot(int id_slot, const std::string &filepath) { return slot(id_slot, "restore", filepath); }
243
246 virtual void cancel(int id_slot) = 0;
247
252 virtual std::string slot_json(const json &data) = 0;
253
254protected:
260 virtual std::string slot(int id_slot, const std::string &action, const std::string &filepath);
261
267 virtual json build_slot_json(int id_slot, const std::string &action, const std::string &filepath);
268
272 virtual std::string parse_slot_json(const json &result);
273};
274
278class UNDREAMAI_API LLMProvider : public LLMLocal
279{
280public:
282 virtual ~LLMProvider();
283
287 virtual bool lora_weight(const std::vector<LoraIdScale> &loras);
288
293 virtual std::string lora_weight_json(const json &data) = 0;
294
297 virtual std::vector<LoraIdScalePath> lora_list();
298
301 virtual std::string lora_list_json() = 0;
302
305 virtual void enable_reasoning(bool reasoning) { reasoning_enabled = reasoning; }
306
309 virtual void debug(int debug_level) = 0;
310
313 virtual void logging_callback(CharArrayFn callback) = 0;
314
316 virtual void logging_stop();
317
319 virtual void start() = 0;
320
323 virtual bool started() = 0;
324
326 virtual void stop() = 0;
327
332 virtual void start_server(const std::string &host = "0.0.0.0", int port = -1, const std::string &API_key = "") = 0;
333
335 virtual void stop_server() = 0;
336
338 virtual void join_service() = 0;
339
341 virtual void join_server() = 0;
342
346 virtual void set_SSL(const std::string &SSL_cert, const std::string &SSL_key) = 0;
347
350 virtual int embedding_size() = 0;
351
354 virtual std::string debug_implementation() = 0;
355
356protected:
357 bool reasoning_enabled = false;
358
362 virtual bool parse_lora_weight_json(const json &result);
363
367 virtual json build_lora_weight_json(const std::vector<LoraIdScale> &loras);
368
372 virtual std::vector<LoraIdScalePath> parse_lora_list_json(const json &result);
373
377 virtual json build_lora_list_json(const std::vector<LoraIdScalePath> &loras);
378};
379
384{
385public:
386 static bool initialised;
387
392 {
393 custom_instance_ = instance;
394 initialised = true;
395 }
396
400 {
401 if (custom_instance_)
402 return *custom_instance_;
403
404 static LLMProviderRegistry registry;
405 initialised = true;
406 return registry;
407 }
408
413 {
414 std::lock_guard<std::mutex> lock(mutex_);
415 instances_.push_back(service);
416 }
417
422 {
423 std::lock_guard<std::mutex> lock(mutex_);
424 instances_.erase(std::remove(instances_.begin(), instances_.end(), service), instances_.end());
425 }
426
430 std::vector<LLMProvider *> get_instances()
431 {
432 std::lock_guard<std::mutex> lock(mutex_);
433 return instances_;
434 }
435
438 void set_debug_level(int level)
439 {
440 debug_level_ = level;
441 }
442
445 const int get_debug_level()
446 {
447 return debug_level_;
448 }
449
452 void set_log_callback(CharArrayFn callback)
453 {
454 log_callback_ = callback;
455 }
456
459 const CharArrayFn get_log_callback()
460 {
461 return log_callback_;
462 }
463
464private:
465 static LLMProviderRegistry *custom_instance_;
466
467 std::mutex mutex_;
468 std::vector<LLMProvider *> instances_;
469 int debug_level_ = 0;
470 CharArrayFn log_callback_ = nullptr;
471
473 LLMProviderRegistry() = default;
475 ~LLMProviderRegistry() = default;
479 LLMProviderRegistry &operator=(const LLMProviderRegistry &) = delete;
480};
481
484
485extern "C"
486{
490 UNDREAMAI_API bool Has_GPU_Layers(const char *command);
491
494 UNDREAMAI_API void LLM_Debug(int debug_level);
495
498 UNDREAMAI_API void LLM_Logging_Callback(CharArrayFn callback);
499
501 UNDREAMAI_API void LLM_Logging_Stop();
502
503#ifdef _DEBUG
506 UNDREAMAI_API const bool IsDebuggerAttached(void);
507#endif
508
512 UNDREAMAI_API void LLM_Set_Completion_Parameters(LLM *llm, const char *params_json = "{}");
513
517 UNDREAMAI_API const char *LLM_Get_Completion_Parameters(LLM *llm);
518
522 UNDREAMAI_API void LLM_Set_Grammar(LLM *llm, const char *grammar = "");
523
527 UNDREAMAI_API const char *LLM_Get_Grammar(LLM *llm);
528
533 UNDREAMAI_API const char *LLM_Apply_Template(LLM *llm, const char *messages_as_json);
534
539 UNDREAMAI_API const char *LLM_Tokenize(LLM *llm, const char *query);
540
545 UNDREAMAI_API const char *LLM_Detokenize(LLM *llm, const char *tokens_as_json);
546
551 UNDREAMAI_API const char *LLM_Embeddings(LLM *llm, const char *query);
552
560 UNDREAMAI_API const char *LLM_Completion(LLM *llm, const char *prompt, CharArrayFn callback = nullptr, int id_slot = -1, bool return_response_json = false);
561
567 UNDREAMAI_API const char *LLM_Save_Slot(LLMLocal *llm, int id_slot, const char *filepath);
568
574 UNDREAMAI_API const char *LLM_Load_Slot(LLMLocal *llm, int id_slot, const char *filepath);
575
579 UNDREAMAI_API void LLM_Cancel(LLMLocal *llm, int id_slot);
580
585 UNDREAMAI_API bool LLM_Lora_Weight(LLMProvider *llm, const char *loras_as_json);
586
590 UNDREAMAI_API void LLM_Enable_Reasoning(LLMProvider *llm, bool enable_reasoning);
591
595 UNDREAMAI_API const char *LLM_Lora_List(LLMProvider *llm);
596
599 UNDREAMAI_API void LLM_Delete(LLMProvider *llm);
600
603 UNDREAMAI_API void LLM_Start(LLMProvider *llm);
604
608 UNDREAMAI_API const bool LLM_Started(LLMProvider *llm);
609
612 UNDREAMAI_API void LLM_Stop(LLMProvider *llm);
613
619 UNDREAMAI_API void LLM_Start_Server(LLMProvider *llm, const char *host = "0.0.0.0", int port = -1, const char *API_key = "");
620
623 UNDREAMAI_API void LLM_Stop_Server(LLMProvider *llm);
624
627 UNDREAMAI_API void LLM_Join_Service(LLMProvider *llm);
628
631 UNDREAMAI_API void LLM_Join_Server(LLMProvider *llm);
632
637 UNDREAMAI_API void LLM_Set_SSL(LLMProvider *llm, const char *SSL_cert, const char *SSL_key);
638
641 UNDREAMAI_API const int LLM_Status_Code();
642
645 UNDREAMAI_API const char *LLM_Status_Message();
646
650 UNDREAMAI_API const int LLM_Embedding_Size(LLMProvider *llm);
651}
652
653
void ensure_error_handlers_initialized()
Ensures error handlers are properly initialized.
Definition LLM.cpp:25
Abstract class for local LLM operations with slot management.
Definition LLM.h:222
virtual std::string slot_json(const json &data)=0
Manage slots with HTTP response support.
virtual int get_slot_context_size()=0
Get slot context size.
virtual std::string load_slot(int id_slot, const std::string &filepath)
Load slot state from file.
Definition LLM.h:242
virtual int get_next_available_slot()=0
Get an available processing slot.
virtual std::string save_slot(int id_slot, const std::string &filepath)
Save slot state to file.
Definition LLM.h:236
virtual void cancel(int id_slot)=0
Cancel request.
Registry for managing LLM provider instances.
Definition LLM.h:384
void unregister_instance(LLMProvider *service)
Unregister an LLM provider instance.
Definition LLM.h:421
std::vector< LLMProvider * > get_instances()
Get all registered provider instances.
Definition LLM.h:430
void set_debug_level(int level)
Set global debug level.
Definition LLM.h:438
const int get_debug_level()
Get current debug level.
Definition LLM.h:445
static bool initialised
Whether the registry has been initialized.
Definition LLM.h:386
void set_log_callback(CharArrayFn callback)
Set global log callback.
Definition LLM.h:452
const CharArrayFn get_log_callback()
Get current log callback.
Definition LLM.h:459
void register_instance(LLMProvider *service)
Register an LLM provider instance.
Definition LLM.h:412
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Definition LLM.h:399
static void inject_registry(LLMProviderRegistry *instance)
Inject a custom registry instance.
Definition LLM.h:391
Abstract class for LLM service providers.
Definition LLM.h:279
virtual void logging_callback(CharArrayFn callback)=0
Set logging callback function.
virtual bool started()=0
Check if service is started.
virtual void start_server(const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="")=0
Start HTTP server.
virtual std::string debug_implementation()=0
Implementation debugging.
virtual void join_service()=0
Wait for service thread to complete.
virtual void stop_server()=0
Stop HTTP server.
virtual void debug(int debug_level)=0
Set debug level.
virtual void join_server()=0
Wait for server thread to complete.
virtual void enable_reasoning(bool reasoning)
enable reasoning
Definition LLM.h:305
virtual void stop()=0
Stop the LLM service.
virtual void set_SSL(const std::string &SSL_cert, const std::string &SSL_key)=0
Configure SSL certificates.
virtual std::string lora_list_json()=0
List available LoRA adapters.
virtual std::string lora_weight_json(const json &data)=0
Configure LoRA weights with HTTP response support.
virtual void start()=0
Start the LLM service.
virtual int embedding_size()=0
Get embedding vector size.
Abstract base class for Large Language Model operations.
Definition LLM.h:60
virtual std::string embeddings_json(const json &data)=0
Generate embeddings with HTTP response support.
virtual std::string get_completion_params()
Get current completion parameters.
Definition LLM.h:109
virtual std::string apply_template_json(const json &data)=0
Apply a chat template to message data.
virtual std::string tokenize_json(const json &data)=0
Tokenize input (override)
virtual void set_completion_params(json completion_params_)
Set completion parameters.
Definition LLM.h:105
json completion_params
JSON object containing completion parameters.
Definition LLM.h:64
virtual void set_grammar(std::string grammar_)
Set grammar for constrained generation.
Definition LLM.h:130
virtual ~LLM()=default
Virtual destructor.
virtual std::string completion_json(const json &data, CharArrayFn callback, bool callbackWithJSON)=0
Generate text completion.
virtual std::string detokenize_json(const json &data)=0
Convert tokens back to text.
virtual std::string get_grammar()
Get current grammar specification.
Definition LLM.h:134
File with basic definitions.
const char * LLM_Lora_List(LLMProvider *llm)
List LoRA adapters (C API)
Definition LLM.cpp:531
const char * LLM_Get_Grammar(LLM *llm)
Get grammar (C API)
Definition LLM.cpp:482
void LLM_Stop(LLMProvider *llm)
Stop LLM service (C API)
Definition LLM.cpp:581
void LLM_Enable_Reasoning(LLMProvider *llm, bool enable_reasoning)
Enable reasoning (C API)
Definition LLM.cpp:492
const char * LLM_Save_Slot(LLMLocal *llm, int id_slot, const char *filepath)
Save slot state (C API)
Definition LLM.cpp:497
void LLM_Logging_Callback(CharArrayFn callback)
Set global logging callback (C API)
Definition LLM.cpp:414
const char * LLM_Load_Slot(LLMLocal *llm, int id_slot, const char *filepath)
Load slot state (C API)
Definition LLM.cpp:502
void LLM_Join_Service(LLMProvider *llm)
Wait for service to complete (C API)
Definition LLM.cpp:561
void LLM_Set_SSL(LLMProvider *llm, const char *SSL_cert, const char *SSL_key)
Set SSL configuration (C API)
Definition LLM.cpp:586
bool Has_GPU_Layers(const char *command)
Check if command has GPU layers (C API)
Definition LLM.cpp:399
const char * LLM_Status_Message()
Get last operation status message (C API)
Definition LLM.cpp:596
void LLM_Set_Grammar(LLM *llm, const char *grammar="")
Set grammar (C API)
Definition LLM.cpp:477
const char * LLM_Apply_Template(LLM *llm, const char *messages_as_json)
Apply chat template (C API)
Definition LLM.cpp:487
void LLM_Cancel(LLMLocal *llm, int id_slot)
Cancel request (C API)
Definition LLM.cpp:507
void LLM_Logging_Stop()
Stop global logging (C API)
Definition LLM.cpp:424
void LLM_Start(LLMProvider *llm)
Start LLM service (C API)
Definition LLM.cpp:571
void LLM_Set_Completion_Parameters(LLM *llm, const char *params_json="{}")
Set completion parameters (C API)
Definition LLM.cpp:466
const int LLM_Embedding_Size(LLMProvider *llm)
Get embedding vector size (C API)
Definition LLM.cpp:602
void LLM_Delete(LLMProvider *llm)
Delete LLM provider (C API)
Definition LLM.cpp:543
void LLM_Debug(int debug_level)
Set global debug level (C API)
Definition LLM.cpp:404
bool LLM_Lora_Weight(LLMProvider *llm, const char *loras_as_json)
Configure LoRA weights (C API)
Definition LLM.cpp:512
const char * LLM_Completion(LLM *llm, const char *prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
Generate completion (C API)
Definition LLM.cpp:461
const char * LLM_Tokenize(LLM *llm, const char *query)
Tokenize text (C API)
Definition LLM.cpp:444
void LLM_Join_Server(LLMProvider *llm)
Wait for server to complete (C API)
Definition LLM.cpp:566
const bool LLM_Started(LLMProvider *llm)
Check if service is started (C API)
Definition LLM.cpp:576
const char * LLM_Get_Completion_Parameters(LLM *llm)
Get completion parameters (C API)
Definition LLM.cpp:472
const char * LLM_Embeddings(LLM *llm, const char *query)
Generate embeddings (C API)
Definition LLM.cpp:455
const int LLM_Status_Code()
Get last operation status code (C API)
Definition LLM.cpp:591
void LLM_Stop_Server(LLMProvider *llm)
Stop HTTP server (C API)
Definition LLM.cpp:556
const char * LLM_Detokenize(LLM *llm, const char *tokens_as_json)
Detokenize tokens (C API)
Definition LLM.cpp:450
void LLM_Start_Server(LLMProvider *llm, const char *host="0.0.0.0", int port=-1, const char *API_key="")
Start HTTP server (C API)
Definition LLM.cpp:551
Structure representing a LoRA adapter with ID, scale, and file path.
Definition LLM.h:36
bool operator==(const LoraIdScalePath &other) const
Equality comparison operator.
Definition LLM.h:44
std::string path
Filesystem path to the LoRA adapter file.
Definition LLM.h:39
int id
Unique identifier for the LoRA adapter.
Definition LLM.h:37
float scale
Scale factor for the LoRA adapter.
Definition LLM.h:38
Structure representing a LoRA adapter with ID and scale.
Definition LLM.h:20
bool operator==(const LoraIdScale &other) const
Equality comparison operator.
Definition LLM.h:27
float scale
Scale factor for the LoRA adapter (typically 0.0 to 1.0)
Definition LLM.h:22
int id
Unique identifier for the LoRA adapter.
Definition LLM.h:21