LlamaLib  v2.0.2
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM.cpp
1#include "LLM.h"
2
3#if !(TARGET_OS_IOS || TARGET_OS_VISION)
4std::atomic_flag sigint_terminating = ATOMIC_FLAG_INIT;
5
6void llm_sigint_signal_handler(int sig)
7{
8 if (sigint_terminating.test_and_set())
9 {
10 // in case it hangs, we can force terminate the server by hitting Ctrl+C twice
11 // this is for better developer experience, we can remove when the server is stable enough
12 fprintf(stderr, "Received second interrupt, terminating immediately.\n");
13 exit(1);
14 }
15
16 for (auto *inst : LLMProviderRegistry::instance().get_instances())
17 {
18 inst->stop();
19 inst->stop_server();
20 }
21}
22#endif
23
24// Use a function to ensure the setup only happens once across all libraries
26{
28 {
29 static std::once_flag initialized;
30 std::call_once(initialized, [](){
31 set_error_handlers();
32#if !(TARGET_OS_IOS || TARGET_OS_VISION)
33 register_sigint_hook(llm_sigint_signal_handler);
34#endif
35 });
36 }
37}
38
39LLMProviderRegistry *LLMProviderRegistry::custom_instance_ = nullptr;
41
43
44//=========================== Helpers ===========================//
45
46std::string LLM::LLM_args_to_command(const std::string &model_path, int num_slots, int num_threads, int num_GPU_layers, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string> &lora_paths)
47{
48 std::string command = "-m \"" + model_path + "\"" +
49 " -t " + std::to_string(num_threads) +
50 " -np " + std::to_string(num_slots) +
51 " -c " + std::to_string(context_size) +
52 " -b " + std::to_string(batch_size);
53
54 if (num_GPU_layers > 0)
55 command += " -ngl " + std::to_string(num_GPU_layers);
56 command += " -fa ";
57 command += flash_attention ? "on" : "off";
58 if (embedding_only)
59 command += " --embedding";
60 for (const auto &lora_path : lora_paths)
61 command += " --lora \"" + lora_path + "\"";
62 return command;
63}
64
65bool LLM::has_gpu_layers(const std::string &command)
66{
67 std::istringstream iss(command);
68 std::vector<std::string> args;
69 std::string token;
70
71 // Simple splitting (does not handle quoted args)
72 while (iss >> token)
73 {
74 args.push_back(token);
75 }
76
77 for (size_t i = 0; i < args.size(); ++i)
78 {
79 const std::string &arg = args[i];
80
81 // Match separate argument + value
82 if (arg == "-ngl" || arg == "--gpu-layers" || arg == "--n-gpu-layers")
83 {
84 if (i + 1 < args.size())
85 {
86 try
87 {
88 int val = std::stoi(args[i + 1]);
89 return val > 0;
90 }
91 catch (...)
92 {
93 continue;
94 }
95 }
96 }
97
98 // Match inline --flag=value
99 size_t eqPos = arg.find('=');
100 if (eqPos != std::string::npos)
101 {
102 std::string key = arg.substr(0, eqPos);
103 std::string value = arg.substr(eqPos + 1);
104
105 if (key == "-ngl" || key == "--gpu-layers" || key == "--n-gpu-layers")
106 {
107 try
108 {
109 int val = std::stoi(value);
110 return val > 0;
111 }
112 catch (...)
113 {
114 continue;
115 }
116 }
117 }
118 }
119
120 return false;
121}
122
123//=========================== Apply Template ===========================//
124
125json LLM::build_apply_template_json(const json &messages)
126{
127 json j;
128 j["messages"] = messages;
129 return j;
130}
131
132std::string LLM::parse_apply_template_json(const json &result)
133{
134 try
135 {
136 return result.at("prompt").get<std::string>();
137 }
138 catch (const std::exception &)
139 {
140 }
141 return "";
142}
143
144std::string LLM::apply_template(const json &messages)
145{
147}
148
149//=========================== Tokenize ===========================//
150
151json LLM::build_tokenize_json(const std::string &query)
152{
153 json j;
154 j["content"] = query;
155 return j;
156}
157
158std::vector<int> LLM::parse_tokenize_json(const json &result)
159{
160 try
161 {
162 return result.at("tokens").get<std::vector<int>>();
163 }
164 catch (const std::exception &)
165 {
166 }
167 return {};
168}
169
170std::vector<int> LLM::tokenize(const std::string &input)
171{
172 return parse_tokenize_json(json::parse(tokenize_json(build_tokenize_json(input))));
173}
174
175
176//=========================== Detokenize ===========================//
177
178json LLM::build_detokenize_json(const std::vector<int32_t> &tokens)
179{
180 json j;
181 j["tokens"] = tokens;
182 return j;
183}
184
185std::string LLM::parse_detokenize_json(const json &result)
186{
187 try
188 {
189 return result.at("content").get<std::string>();
190 }
191 catch (const std::exception &)
192 {
193 }
194 return "";
195}
196
197std::string LLM::detokenize(const std::vector<int32_t> &tokens)
198{
199 return parse_detokenize_json(json::parse(detokenize_json(build_detokenize_json(tokens))));
200}
201
202//=========================== Embeddings ===========================//
203
204json LLM::build_embeddings_json(const std::string &query)
205{
206 json j;
207 j["content"] = query;
208 return j;
209}
210
211std::vector<float> LLM::parse_embeddings_json(const json &result)
212{
213 try
214 {
215 const json& emb = result.at(0).at("embedding");
216 if (emb.is_array() && !emb.empty())
217 {
218 if (emb[0].is_number()) return emb.get<std::vector<float>>();
219 if (emb[0].is_array()) return emb.at(0).get<std::vector<float>>();
220 }
221 }
222 catch (const std::exception &)
223 {
224 }
225 return {};
226}
227
228std::vector<float> LLM::embeddings(const std::string &query)
229{
231}
232
233//=========================== Completion ===========================//
234
235json LLM::build_completion_json(const std::string &prompt, int id_slot)
236{
237 json j;
238 j["prompt"] = prompt;
239 j["id_slot"] = id_slot;
240 j["n_keep"] = n_keep;
241
242 if (!grammar.empty())
243 {
244 try
245 {
246 j["json_schema"] = json::parse(grammar);
247 }
248 catch (const json::parse_error &)
249 {
250 j["grammar"] = grammar;
251 }
252 }
253
254 if (completion_params.is_object())
255 {
256 for (json::const_iterator it = completion_params.begin(); it != completion_params.end(); ++it)
257 {
258 j[it.key()] = it.value();
259 }
260 }
261 return j;
262}
263
264std::string LLM::parse_completion_json(const json &result)
265{
266 try
267 {
268 if (result.contains("error")) {
269 json error = result.at("error");
270 int code = error.at("code").get<int>();
271 std::string message = error.at("message").get<std::string>();
272 fail(message, code);
273 return "";
274 }
275 return result.at("content").get<std::string>();
276 }
277 catch (const std::exception &)
278 {
279 }
280 return "";
281}
282
283std::string LLM::completion(const std::string &prompt, CharArrayFn callback, int id_slot, bool return_response_json)
284{
285 std::string response = completion_json(
286 build_completion_json(prompt, id_slot),
287 callback,
288 false);
289 if (return_response_json)
290 return response;
291 return parse_completion_json(json::parse(response));
292}
293
294//=========================== Slot Action ===========================//
295
296json LLMLocal::build_slot_json(int id_slot, const std::string &action, const std::string &filepath)
297{
298 json j;
299 j["id_slot"] = id_slot;
300 j["action"] = action;
301 j["filepath"] = filepath;
302 return j;
303}
304
305std::string LLMLocal::parse_slot_json(const json &result)
306{
307 try
308 {
309 return result.at("filename").get<std::string>();
310 }
311 catch (const std::exception &)
312 {
313 }
314 return "";
315}
316
317std::string LLMLocal::slot(int id_slot, const std::string &action, const std::string &filepath)
318{
319 return parse_slot_json(json::parse(slot_json(build_slot_json(id_slot, action, filepath))));
320}
321
322//=========================== Logging ===========================//
323
325{
326 logging_callback(nullptr);
327}
328
329//=========================== Lora Adapters Apply ===========================//
330
331json LLMProvider::build_lora_weight_json(const std::vector<LoraIdScale> &loras)
332{
333 json j = json::array();
334 for (const auto &lora : loras)
335 {
336 j.push_back({{"id", lora.id},
337 {"scale", lora.scale}});
338 }
339 return j;
340}
341
343{
344 try
345 {
346 return result.at("success").get<bool>();
347 }
348 catch (const std::exception &)
349 {
350 }
351 return false;
352}
353
354bool LLMProvider::lora_weight(const std::vector<LoraIdScale> &loras)
355{
357}
358
359//=========================== Lora Adapters List ===========================//
360
361json LLMProvider::build_lora_list_json(const std::vector<LoraIdScalePath> &loras)
362{
363 json j = json::array();
364 for (const auto &lora : loras)
365 {
366 j.push_back({{"id", lora.id},
367 {"scale", lora.scale},
368 {"path", lora.path}});
369 }
370 return j;
371}
372
373std::vector<LoraIdScalePath> LLMProvider::parse_lora_list_json(const json &result)
374{
375 std::vector<LoraIdScalePath> loras;
376 try
377 {
378 for (const auto &lora : result)
379 {
380 loras.push_back({lora["id"].get<int>(),
381 lora["scale"].get<float>(),
382 lora["path"].get<std::string>()});
383 }
384 }
385 catch (const std::exception &)
386 {
387 }
388 return loras;
389}
390
391std::vector<LoraIdScalePath> LLMProvider::lora_list()
392{
393 return parse_lora_list_json(json::parse(lora_list_json()));
394}
395
396//=========================== API ===========================//
397
398bool Has_GPU_Layers(const char *command)
399{
400 return LLM::has_gpu_layers(command);
401}
402
403void LLM_Debug(int debug_level)
404{
406 registry.set_debug_level(debug_level);
407 for (auto *inst : registry.get_instances())
408 {
409 inst->debug(debug_level);
410 }
411}
412
413void LLM_Logging_Callback(CharArrayFn callback)
414{
416 registry.set_log_callback(callback);
417 for (auto *inst : registry.get_instances())
418 {
419 inst->logging_callback(callback);
420 }
421}
422
424{
425 LLM_Logging_Callback(nullptr);
426}
427
428#ifdef _DEBUG
429const bool IsDebuggerAttached(void)
430{
431#ifdef _MSC_VER
432 return ::IsDebuggerPresent();
433#elif __APPLE__
434 return AmIBeingDebugged();
435#elif __linux__
436 return debuggerIsAttached();
437#else
438 return false;
439#endif
440}
441#endif
442
443const char *LLM_Tokenize(LLM *llm, const char *query)
444{
445 json result = llm->tokenize(query);
446 return stringToCharArray(result.dump());
447}
448
449const char *LLM_Detokenize(LLM *llm, const char *tokens_as_json)
450{
451 return stringToCharArray(llm->detokenize(json::parse(tokens_as_json)));
452}
453
454const char *LLM_Embeddings(LLM *llm, const char *query)
455{
456 json result = llm->embeddings(query);
457 return stringToCharArray(result.dump());
458}
459
460const char *LLM_Completion(LLM *llm, const char *prompt, CharArrayFn callback, int id_slot, bool return_response_json)
461{
462 return stringToCharArray(llm->completion(prompt, callback, id_slot, return_response_json));
463}
464
465void LLM_Set_Completion_Parameters(LLM *llm, const char *params_json)
466{
467 json params = json::parse(params_json ? params_json : "{}");
468 llm->set_completion_params(params);
469}
470
472{
473 return stringToCharArray((llm->completion_params).dump());
474}
475
476void LLM_Set_Grammar(LLM *llm, const char *grammar)
477{
478 llm->set_grammar(grammar);
479}
480
481const char *LLM_Get_Grammar(LLM *llm)
482{
483 return stringToCharArray(llm->grammar);
484}
485
486const char *LLM_Apply_Template(LLM *llm, const char *messages_as_json)
487{
488 return stringToCharArray(llm->apply_template(json::parse(messages_as_json)));
489}
490
491void LLM_Enable_Reasoning(LLMProvider *llm, bool enable_reasoning)
492{
493 llm->enable_reasoning(enable_reasoning);
494}
495
496const char *LLM_Save_Slot(LLMLocal *llm, int id_slot, const char *filepath)
497{
498 return stringToCharArray(llm->save_slot(id_slot, filepath));
499}
500
501const char *LLM_Load_Slot(LLMLocal *llm, int id_slot, const char *filepath)
502{
503 return stringToCharArray(llm->load_slot(id_slot, filepath));
504}
505
506void LLM_Cancel(LLMLocal *llm, int id_slot)
507{
508 llm->cancel(id_slot);
509}
510
511bool LLM_Lora_Weight(LLMProvider *llm, const char *loras_as_json)
512{
513 try
514 {
515 json loras_arr = json::array();
516 loras_arr = json::parse(loras_as_json);
517 std::vector<LoraIdScale> loras;
518 for (const auto &lora : loras_arr)
519 {
520 loras.push_back({lora["id"].get<int>(), lora["scale"].get<float>()});
521 }
522 return llm->lora_weight(loras);
523 }
524 catch (const std::exception &)
525 {
526 }
527 return false;
528}
529
530const char *LLM_Lora_List(LLMProvider *llm)
531{
532 std::vector<LoraIdScalePath> loras = llm->lora_list();
533 json j = json::array();
534 for (const auto &lora : loras)
535 {
536 j.push_back({{"id", lora.id},
537 {"scale", lora.scale}});
538 }
539 return stringToCharArray(j.dump());
540}
541
543{
544 if (llm != nullptr)
545 {
546 delete llm;
547 }
548}
549
550void LLM_Start_Server(LLMProvider *llm, const char *host, int port, const char *API_key)
551{
552 llm->start_server(host, port, API_key);
553}
554
556{
557 llm->stop_server();
558}
559
561{
562 llm->join_service();
563}
564
566{
567 llm->join_server();
568}
569
571{
572 llm->start();
573}
574
575const bool LLM_Started(LLMProvider *llm)
576{
577 return llm->started();
578}
579
581{
582 llm->stop();
583}
584
585void LLM_Set_SSL(LLMProvider *llm, const char *SSL_cert, const char *SSL_key)
586{
587 llm->set_SSL(SSL_cert, SSL_key);
588}
589
591{
592 return get_status_code();
593}
594
596{
597 std::string result = get_status_message();
598 return stringToCharArray(result);
599}
600
602{
603 return llm->embedding_size();
604}
Core LLM functionality interface and base classes.
void ensure_error_handlers_initialized()
Ensures error handlers are properly initialized.
Definition LLM.cpp:25
Abstract class for local LLM operations with slot management.
Definition LLM.h:222
virtual std::string slot_json(const json &data)=0
Manage slots with HTTP response support.
virtual std::string load_slot(int id_slot, const std::string &filepath)
Load slot state from file.
Definition LLM.h:238
virtual std::string save_slot(int id_slot, const std::string &filepath)
Save slot state to file.
Definition LLM.h:232
virtual std::string slot(int id_slot, const std::string &action, const std::string &filepath)
Perform slot operation.
Definition LLM.cpp:317
virtual void cancel(int id_slot)=0
Cancel request.
virtual json build_slot_json(int id_slot, const std::string &action, const std::string &filepath)
Build JSON for slot operations.
Definition LLM.cpp:296
virtual std::string parse_slot_json(const json &result)
Parse slot operation result.
Definition LLM.cpp:305
Registry for managing LLM provider instances.
Definition LLM.h:380
std::vector< LLMProvider * > get_instances()
Get all registered provider instances.
Definition LLM.h:426
void set_debug_level(int level)
Set global debug level.
Definition LLM.h:434
static bool initialised
Whether the registry has been initialized.
Definition LLM.h:382
void set_log_callback(CharArrayFn callback)
Set global log callback.
Definition LLM.h:448
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Definition LLM.h:395
Abstract class for LLM service providers.
Definition LLM.h:275
virtual void logging_callback(CharArrayFn callback)=0
Set logging callback function.
virtual bool started()=0
Check if service is started.
virtual void start_server(const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="")=0
Start HTTP server.
virtual void logging_stop()
Stop logging.
Definition LLM.cpp:324
virtual void join_service()=0
Wait for service thread to complete.
virtual void stop_server()=0
Stop HTTP server.
virtual bool parse_lora_weight_json(const json &result)
Parse LoRA weight configuration result.
Definition LLM.cpp:342
virtual void join_server()=0
Wait for server thread to complete.
virtual void enable_reasoning(bool reasoning)
enable reasoning
Definition LLM.h:301
virtual json build_lora_weight_json(const std::vector< LoraIdScale > &loras)
Build JSON for LoRA weight configuration.
Definition LLM.cpp:331
virtual bool lora_weight(const std::vector< LoraIdScale > &loras)
Configure LoRA weights.
Definition LLM.cpp:354
virtual std::vector< LoraIdScalePath > lora_list()
List available LoRA adapters.
Definition LLM.cpp:391
virtual void stop()=0
Stop the LLM service.
virtual void set_SSL(const std::string &SSL_cert, const std::string &SSL_key)=0
Configure SSL certificates.
virtual std::string lora_list_json()=0
List available LoRA adapters.
virtual ~LLMProvider()
Virtual destructor.
Definition LLM.cpp:42
virtual std::string lora_weight_json(const json &data)=0
Configure LoRA weights with HTTP response support.
virtual void start()=0
Start the LLM service.
virtual std::vector< LoraIdScalePath > parse_lora_list_json(const json &result)
Parse LoRA list result.
Definition LLM.cpp:373
virtual json build_lora_list_json(const std::vector< LoraIdScalePath > &loras)
Build JSON for LoRA list result.
Definition LLM.cpp:361
virtual int embedding_size()=0
Get embedding vector size.
Abstract base class for Large Language Model operations.
Definition LLM.h:60
virtual json build_tokenize_json(const std::string &query)
Build JSON for tokenization.
Definition LLM.cpp:151
virtual std::string embeddings_json(const json &data)=0
Generate embeddings with HTTP response support.
int32_t n_keep
Number of tokens to keep from the beginning of the context.
Definition LLM.h:62
virtual json build_apply_template_json(const json &messages)
Build JSON for template application.
Definition LLM.cpp:125
virtual std::string parse_apply_template_json(const json &result)
Parse template application result.
Definition LLM.cpp:132
virtual std::vector< int > parse_tokenize_json(const json &result)
Parse tokenization result.
Definition LLM.cpp:158
virtual std::string apply_template(const json &messages)
Apply template to messages.
Definition LLM.cpp:144
virtual json build_detokenize_json(const std::vector< int32_t > &tokens)
Build JSON for detokenization.
Definition LLM.cpp:178
virtual std::string parse_completion_json(const json &result)
Parse completion result.
Definition LLM.cpp:264
virtual std::string apply_template_json(const json &data)=0
Apply a chat template to message data.
virtual json build_completion_json(const std::string &prompt, int id_slot=-1)
Build JSON for completion generation.
Definition LLM.cpp:235
virtual std::string tokenize_json(const json &data)=0
Tokenize input (override)
virtual void set_completion_params(json completion_params_)
Set completion parameters.
Definition LLM.h:105
virtual std::string detokenize(const std::vector< int32_t > &tokens)
Convert tokens to text.
Definition LLM.cpp:197
json completion_params
JSON object containing completion parameters.
Definition LLM.h:64
virtual std::vector< int > tokenize(const std::string &query)
Tokenize text.
Definition LLM.cpp:170
virtual std::string completion(const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
Generate completion.
Definition LLM.cpp:283
virtual void set_grammar(std::string grammar_)
Set grammar for constrained generation.
Definition LLM.h:130
static std::string LLM_args_to_command(const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
Convert LLM parameters to command line arguments.
Definition LLM.cpp:46
virtual std::vector< float > embeddings(const std::string &query)
Generate embeddings.
Definition LLM.cpp:228
static bool has_gpu_layers(const std::string &command)
Check if command line arguments specify GPU layers.
Definition LLM.cpp:65
virtual std::string completion_json(const json &data, CharArrayFn callback, bool callbackWithJSON)=0
Generate text completion.
std::string grammar
Grammar specification in GBNF format or JSON schema.
Definition LLM.h:63
virtual std::string detokenize_json(const json &data)=0
Convert tokens back to text.
virtual std::vector< float > parse_embeddings_json(const json &result)
Parse embeddings result.
Definition LLM.cpp:211
virtual json build_embeddings_json(const std::string &query)
Build JSON for embeddings generation.
Definition LLM.cpp:204
virtual std::string parse_detokenize_json(const json &result)
Parse detokenization result.
Definition LLM.cpp:185
const char * LLM_Lora_List(LLMProvider *llm)
List LoRA adapters (C API)
Definition LLM.cpp:530
const char * LLM_Get_Grammar(LLM *llm)
Get grammar (C API)
Definition LLM.cpp:481
void LLM_Stop(LLMProvider *llm)
Stop LLM service (C API)
Definition LLM.cpp:580
void LLM_Enable_Reasoning(LLMProvider *llm, bool enable_reasoning)
Enable reasoning (C API)
Definition LLM.cpp:491
const char * LLM_Save_Slot(LLMLocal *llm, int id_slot, const char *filepath)
Save slot state (C API)
Definition LLM.cpp:496
void LLM_Logging_Callback(CharArrayFn callback)
Set global logging callback (C API)
Definition LLM.cpp:413
const char * LLM_Load_Slot(LLMLocal *llm, int id_slot, const char *filepath)
Load slot state (C API)
Definition LLM.cpp:501
void LLM_Join_Service(LLMProvider *llm)
Wait for service to complete (C API)
Definition LLM.cpp:560
void LLM_Set_SSL(LLMProvider *llm, const char *SSL_cert, const char *SSL_key)
Set SSL configuration (C API)
Definition LLM.cpp:585
bool Has_GPU_Layers(const char *command)
Check if command has GPU layers (C API)
Definition LLM.cpp:398
const char * LLM_Status_Message()
Get last operation status message (C API)
Definition LLM.cpp:595
void LLM_Set_Grammar(LLM *llm, const char *grammar="")
Set grammar (C API)
Definition LLM.cpp:476
const char * LLM_Apply_Template(LLM *llm, const char *messages_as_json)
Apply chat template (C API)
Definition LLM.cpp:486
void LLM_Cancel(LLMLocal *llm, int id_slot)
Cancel request (C API)
Definition LLM.cpp:506
void LLM_Logging_Stop()
Stop global logging (C API)
Definition LLM.cpp:423
void LLM_Start(LLMProvider *llm)
Start LLM service (C API)
Definition LLM.cpp:570
void LLM_Set_Completion_Parameters(LLM *llm, const char *params_json="{}")
Set completion parameters (C API)
Definition LLM.cpp:465
const int LLM_Embedding_Size(LLMProvider *llm)
Get embedding vector size (C API)
Definition LLM.cpp:601
void LLM_Delete(LLMProvider *llm)
Delete LLM provider (C API)
Definition LLM.cpp:542
void LLM_Debug(int debug_level)
Set global debug level (C API)
Definition LLM.cpp:403
bool LLM_Lora_Weight(LLMProvider *llm, const char *loras_as_json)
Configure LoRA weights (C API)
Definition LLM.cpp:511
const char * LLM_Completion(LLM *llm, const char *prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
Generate completion (C API)
Definition LLM.cpp:460
const char * LLM_Tokenize(LLM *llm, const char *query)
Tokenize text (C API)
Definition LLM.cpp:443
void LLM_Join_Server(LLMProvider *llm)
Wait for server to complete (C API)
Definition LLM.cpp:565
const bool LLM_Started(LLMProvider *llm)
Check if service is started (C API)
Definition LLM.cpp:575
const char * LLM_Get_Completion_Parameters(LLM *llm)
Get completion parameters (C API)
Definition LLM.cpp:471
const char * LLM_Embeddings(LLM *llm, const char *query)
Generate embeddings (C API)
Definition LLM.cpp:454
const int LLM_Status_Code()
Get last operation status code (C API)
Definition LLM.cpp:590
void LLM_Stop_Server(LLMProvider *llm)
Stop HTTP server (C API)
Definition LLM.cpp:555
const char * LLM_Detokenize(LLM *llm, const char *tokens_as_json)
Detokenize tokens (C API)
Definition LLM.cpp:449
void LLM_Start_Server(LLMProvider *llm, const char *host="0.0.0.0", int port=-1, const char *API_key="")
Start HTTP server (C API)
Definition LLM.cpp:550