LlamaLib  v2.0.5
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM.cpp
1#include "LLM.h"
2
3#if !(TARGET_OS_IOS || TARGET_OS_VISION)
4std::atomic_flag sigint_terminating = ATOMIC_FLAG_INIT;
5
6void llm_sigint_signal_handler(int sig)
7{
8 if (sigint_terminating.test_and_set())
9 {
10 // in case it hangs, we can force terminate the server by hitting Ctrl+C twice
11 // this is for better developer experience, we can remove when the server is stable enough
12 fprintf(stderr, "Received second interrupt, terminating immediately.\n");
13 exit(1);
14 }
15
16 for (auto *inst : LLMProviderRegistry::instance().get_instances())
17 {
18 inst->stop();
19 inst->stop_server();
20 }
21}
22#endif
23
24// Use a function to ensure the setup only happens once across all libraries
26{
28 {
29 static std::once_flag initialized;
30 std::call_once(initialized, [](){
31 set_error_handlers();
32#if !(TARGET_OS_IOS || TARGET_OS_VISION)
33 register_sigint_hook(llm_sigint_signal_handler);
34#endif
35 });
36 }
37}
38
39LLMProviderRegistry *LLMProviderRegistry::custom_instance_ = nullptr;
41
43
44//=========================== Helpers ===========================//
45
46std::string LLM::LLM_args_to_command(const std::string &model_path, int num_slots, int num_threads, int num_GPU_layers, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string> &lora_paths)
47{
48 std::string command = "-m \"" + model_path + "\"" +
49 " -t " + std::to_string(num_threads) +
50 " -np " + std::to_string(num_slots) +
51 " -c " + std::to_string(context_size) +
52 " -b " + std::to_string(batch_size);
53
54 if (num_GPU_layers > 0)
55 command += " -ngl " + std::to_string(num_GPU_layers);
56 command += " --context-shift";
57 command += " -fa ";
58 command += flash_attention ? "on" : "off";
59 if (embedding_only)
60 command += " --embedding";
61 for (const auto &lora_path : lora_paths)
62 command += " --lora \"" + lora_path + "\"";
63 return command;
64}
65
66bool LLM::has_gpu_layers(const std::string &command)
67{
68 std::istringstream iss(command);
69 std::vector<std::string> args;
70 std::string token;
71
72 // Simple splitting (does not handle quoted args)
73 while (iss >> token)
74 {
75 args.push_back(token);
76 }
77
78 for (size_t i = 0; i < args.size(); ++i)
79 {
80 const std::string &arg = args[i];
81
82 // Match separate argument + value
83 if (arg == "-ngl" || arg == "--gpu-layers" || arg == "--n-gpu-layers")
84 {
85 if (i + 1 < args.size())
86 {
87 try
88 {
89 int val = std::stoi(args[i + 1]);
90 return val > 0;
91 }
92 catch (...)
93 {
94 continue;
95 }
96 }
97 }
98
99 // Match inline --flag=value
100 size_t eqPos = arg.find('=');
101 if (eqPos != std::string::npos)
102 {
103 std::string key = arg.substr(0, eqPos);
104 std::string value = arg.substr(eqPos + 1);
105
106 if (key == "-ngl" || key == "--gpu-layers" || key == "--n-gpu-layers")
107 {
108 try
109 {
110 int val = std::stoi(value);
111 return val > 0;
112 }
113 catch (...)
114 {
115 continue;
116 }
117 }
118 }
119 }
120
121 return false;
122}
123
124//=========================== Apply Template ===========================//
125
126json LLM::build_apply_template_json(const json &messages)
127{
128 json j;
129 j["messages"] = messages;
130 return j;
131}
132
133std::string LLM::parse_apply_template_json(const json &result)
134{
135 try
136 {
137 return result.at("prompt").get<std::string>();
138 }
139 catch (const std::exception &)
140 {
141 }
142 return "";
143}
144
145std::string LLM::apply_template(const json &messages)
146{
148}
149
150//=========================== Tokenize ===========================//
151
152json LLM::build_tokenize_json(const std::string &query)
153{
154 json j;
155 j["content"] = query;
156 return j;
157}
158
159std::vector<int> LLM::parse_tokenize_json(const json &result)
160{
161 try
162 {
163 return result.at("tokens").get<std::vector<int>>();
164 }
165 catch (const std::exception &)
166 {
167 }
168 return {};
169}
170
171std::vector<int> LLM::tokenize(const std::string &input)
172{
173 return parse_tokenize_json(json::parse(tokenize_json(build_tokenize_json(input))));
174}
175
176
177//=========================== Detokenize ===========================//
178
179json LLM::build_detokenize_json(const std::vector<int32_t> &tokens)
180{
181 json j;
182 j["tokens"] = tokens;
183 return j;
184}
185
186std::string LLM::parse_detokenize_json(const json &result)
187{
188 try
189 {
190 return result.at("content").get<std::string>();
191 }
192 catch (const std::exception &)
193 {
194 }
195 return "";
196}
197
198std::string LLM::detokenize(const std::vector<int32_t> &tokens)
199{
200 return parse_detokenize_json(json::parse(detokenize_json(build_detokenize_json(tokens))));
201}
202
203//=========================== Embeddings ===========================//
204
205json LLM::build_embeddings_json(const std::string &query)
206{
207 json j;
208 j["content"] = query;
209 return j;
210}
211
212std::vector<float> LLM::parse_embeddings_json(const json &result)
213{
214 try
215 {
216 const json& emb = result.at(0).at("embedding");
217 if (emb.is_array() && !emb.empty())
218 {
219 if (emb[0].is_number()) return emb.get<std::vector<float>>();
220 if (emb[0].is_array()) return emb.at(0).get<std::vector<float>>();
221 }
222 }
223 catch (const std::exception &)
224 {
225 }
226 return {};
227}
228
229std::vector<float> LLM::embeddings(const std::string &query)
230{
232}
233
234//=========================== Completion ===========================//
235
236json LLM::build_completion_json(const std::string &prompt, int id_slot)
237{
238 json j;
239 j["prompt"] = prompt;
240 j["id_slot"] = id_slot;
241 j["n_keep"] = n_keep;
242
243 if (!grammar.empty())
244 {
245 try
246 {
247 j["json_schema"] = json::parse(grammar);
248 }
249 catch (const json::parse_error &)
250 {
251 j["grammar"] = grammar;
252 }
253 }
254
255 if (completion_params.is_object())
256 {
257 for (json::const_iterator it = completion_params.begin(); it != completion_params.end(); ++it)
258 {
259 j[it.key()] = it.value();
260 }
261 }
262 return j;
263}
264
265std::string LLM::parse_completion_json(const json &result)
266{
267 try
268 {
269 if (result.contains("error")) {
270 json error = result.at("error");
271 int code = error.at("code").get<int>();
272 std::string message = error.at("message").get<std::string>();
273 fail(message, code);
274 return "";
275 }
276 return result.at("content").get<std::string>();
277 }
278 catch (const std::exception &)
279 {
280 }
281 return "";
282}
283
284std::string LLM::completion(const std::string &prompt, CharArrayFn callback, int id_slot, bool return_response_json)
285{
286 std::string response = completion_json(
287 build_completion_json(prompt, id_slot),
288 callback,
289 false);
290 if (return_response_json)
291 return response;
292 return parse_completion_json(json::parse(response));
293}
294
295//=========================== Slot Action ===========================//
296
297json LLMLocal::build_slot_json(int id_slot, const std::string &action, const std::string &filepath)
298{
299 json j;
300 j["id_slot"] = id_slot;
301 j["action"] = action;
302 j["filepath"] = filepath;
303 return j;
304}
305
306std::string LLMLocal::parse_slot_json(const json &result)
307{
308 try
309 {
310 return result.at("filename").get<std::string>();
311 }
312 catch (const std::exception &)
313 {
314 }
315 return "";
316}
317
318std::string LLMLocal::slot(int id_slot, const std::string &action, const std::string &filepath)
319{
320 return parse_slot_json(json::parse(slot_json(build_slot_json(id_slot, action, filepath))));
321}
322
323//=========================== Logging ===========================//
324
326{
327 logging_callback(nullptr);
328}
329
330//=========================== Lora Adapters Apply ===========================//
331
332json LLMProvider::build_lora_weight_json(const std::vector<LoraIdScale> &loras)
333{
334 json j = json::array();
335 for (const auto &lora : loras)
336 {
337 j.push_back({{"id", lora.id},
338 {"scale", lora.scale}});
339 }
340 return j;
341}
342
344{
345 try
346 {
347 return result.at("success").get<bool>();
348 }
349 catch (const std::exception &)
350 {
351 }
352 return false;
353}
354
355bool LLMProvider::lora_weight(const std::vector<LoraIdScale> &loras)
356{
358}
359
360//=========================== Lora Adapters List ===========================//
361
362json LLMProvider::build_lora_list_json(const std::vector<LoraIdScalePath> &loras)
363{
364 json j = json::array();
365 for (const auto &lora : loras)
366 {
367 j.push_back({{"id", lora.id},
368 {"scale", lora.scale},
369 {"path", lora.path}});
370 }
371 return j;
372}
373
374std::vector<LoraIdScalePath> LLMProvider::parse_lora_list_json(const json &result)
375{
376 std::vector<LoraIdScalePath> loras;
377 try
378 {
379 for (const auto &lora : result)
380 {
381 loras.push_back({lora["id"].get<int>(),
382 lora["scale"].get<float>(),
383 lora["path"].get<std::string>()});
384 }
385 }
386 catch (const std::exception &)
387 {
388 }
389 return loras;
390}
391
392std::vector<LoraIdScalePath> LLMProvider::lora_list()
393{
394 return parse_lora_list_json(json::parse(lora_list_json()));
395}
396
397//=========================== API ===========================//
398
399bool Has_GPU_Layers(const char *command)
400{
401 return LLM::has_gpu_layers(command);
402}
403
404void LLM_Debug(int debug_level)
405{
407 registry.set_debug_level(debug_level);
408 for (auto *inst : registry.get_instances())
409 {
410 inst->debug(debug_level);
411 }
412}
413
414void LLM_Logging_Callback(CharArrayFn callback)
415{
417 registry.set_log_callback(callback);
418 for (auto *inst : registry.get_instances())
419 {
420 inst->logging_callback(callback);
421 }
422}
423
425{
426 LLM_Logging_Callback(nullptr);
427}
428
429#ifdef _DEBUG
430const bool IsDebuggerAttached(void)
431{
432#ifdef _MSC_VER
433 return ::IsDebuggerPresent();
434#elif __APPLE__
435 return AmIBeingDebugged();
436#elif __linux__
437 return debuggerIsAttached();
438#else
439 return false;
440#endif
441}
442#endif
443
444const char *LLM_Tokenize(LLM *llm, const char *query)
445{
446 json result = llm->tokenize(query);
447 return stringToCharArray(result.dump());
448}
449
450const char *LLM_Detokenize(LLM *llm, const char *tokens_as_json)
451{
452 return stringToCharArray(llm->detokenize(json::parse(tokens_as_json)));
453}
454
455const char *LLM_Embeddings(LLM *llm, const char *query)
456{
457 json result = llm->embeddings(query);
458 return stringToCharArray(result.dump());
459}
460
461const char *LLM_Completion(LLM *llm, const char *prompt, CharArrayFn callback, int id_slot, bool return_response_json)
462{
463 return stringToCharArray(llm->completion(prompt, callback, id_slot, return_response_json));
464}
465
466void LLM_Set_Completion_Parameters(LLM *llm, const char *params_json)
467{
468 json params = json::parse(params_json ? params_json : "{}");
469 llm->set_completion_params(params);
470}
471
473{
474 return stringToCharArray((llm->completion_params).dump());
475}
476
477void LLM_Set_Grammar(LLM *llm, const char *grammar)
478{
479 llm->set_grammar(grammar);
480}
481
482const char *LLM_Get_Grammar(LLM *llm)
483{
484 return stringToCharArray(llm->grammar);
485}
486
487const char *LLM_Apply_Template(LLM *llm, const char *messages_as_json)
488{
489 return stringToCharArray(llm->apply_template(json::parse(messages_as_json)));
490}
491
492void LLM_Enable_Reasoning(LLMProvider *llm, bool enable_reasoning)
493{
494 llm->enable_reasoning(enable_reasoning);
495}
496
497const char *LLM_Save_Slot(LLMLocal *llm, int id_slot, const char *filepath)
498{
499 return stringToCharArray(llm->save_slot(id_slot, filepath));
500}
501
502const char *LLM_Load_Slot(LLMLocal *llm, int id_slot, const char *filepath)
503{
504 return stringToCharArray(llm->load_slot(id_slot, filepath));
505}
506
507void LLM_Cancel(LLMLocal *llm, int id_slot)
508{
509 llm->cancel(id_slot);
510}
511
512bool LLM_Lora_Weight(LLMProvider *llm, const char *loras_as_json)
513{
514 try
515 {
516 json loras_arr = json::array();
517 loras_arr = json::parse(loras_as_json);
518 std::vector<LoraIdScale> loras;
519 for (const auto &lora : loras_arr)
520 {
521 loras.push_back({lora["id"].get<int>(), lora["scale"].get<float>()});
522 }
523 return llm->lora_weight(loras);
524 }
525 catch (const std::exception &)
526 {
527 }
528 return false;
529}
530
531const char *LLM_Lora_List(LLMProvider *llm)
532{
533 std::vector<LoraIdScalePath> loras = llm->lora_list();
534 json j = json::array();
535 for (const auto &lora : loras)
536 {
537 j.push_back({{"id", lora.id},
538 {"scale", lora.scale}});
539 }
540 return stringToCharArray(j.dump());
541}
542
544{
545 if (llm != nullptr)
546 {
547 delete llm;
548 }
549}
550
551void LLM_Start_Server(LLMProvider *llm, const char *host, int port, const char *API_key)
552{
553 llm->start_server(host, port, API_key);
554}
555
557{
558 llm->stop_server();
559}
560
562{
563 llm->join_service();
564}
565
567{
568 llm->join_server();
569}
570
572{
573 llm->start();
574}
575
576const bool LLM_Started(LLMProvider *llm)
577{
578 return llm->started();
579}
580
582{
583 llm->stop();
584}
585
586void LLM_Set_SSL(LLMProvider *llm, const char *SSL_cert, const char *SSL_key)
587{
588 llm->set_SSL(SSL_cert, SSL_key);
589}
590
592{
593 return get_status_code();
594}
595
597{
598 std::string result = get_status_message();
599 return stringToCharArray(result);
600}
601
603{
604 return llm->embedding_size();
605}
Core LLM functionality interface and base classes.
void ensure_error_handlers_initialized()
Ensures error handlers are properly initialized.
Definition LLM.cpp:25
Abstract class for local LLM operations with slot management.
Definition LLM.h:222
virtual std::string slot_json(const json &data)=0
Manage slots with HTTP response support.
virtual std::string load_slot(int id_slot, const std::string &filepath)
Load slot state from file.
Definition LLM.h:242
virtual std::string save_slot(int id_slot, const std::string &filepath)
Save slot state to file.
Definition LLM.h:236
virtual std::string slot(int id_slot, const std::string &action, const std::string &filepath)
Perform slot operation.
Definition LLM.cpp:318
virtual void cancel(int id_slot)=0
Cancel request.
virtual json build_slot_json(int id_slot, const std::string &action, const std::string &filepath)
Build JSON for slot operations.
Definition LLM.cpp:297
virtual std::string parse_slot_json(const json &result)
Parse slot operation result.
Definition LLM.cpp:306
Registry for managing LLM provider instances.
Definition LLM.h:384
std::vector< LLMProvider * > get_instances()
Get all registered provider instances.
Definition LLM.h:430
void set_debug_level(int level)
Set global debug level.
Definition LLM.h:438
static bool initialised
Whether the registry has been initialized.
Definition LLM.h:386
void set_log_callback(CharArrayFn callback)
Set global log callback.
Definition LLM.h:452
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Definition LLM.h:399
Abstract class for LLM service providers.
Definition LLM.h:279
virtual void logging_callback(CharArrayFn callback)=0
Set logging callback function.
virtual bool started()=0
Check if service is started.
virtual void start_server(const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="")=0
Start HTTP server.
virtual void logging_stop()
Stop logging.
Definition LLM.cpp:325
virtual void join_service()=0
Wait for service thread to complete.
virtual void stop_server()=0
Stop HTTP server.
virtual bool parse_lora_weight_json(const json &result)
Parse LoRA weight configuration result.
Definition LLM.cpp:343
virtual void join_server()=0
Wait for server thread to complete.
virtual void enable_reasoning(bool reasoning)
enable reasoning
Definition LLM.h:305
virtual json build_lora_weight_json(const std::vector< LoraIdScale > &loras)
Build JSON for LoRA weight configuration.
Definition LLM.cpp:332
virtual bool lora_weight(const std::vector< LoraIdScale > &loras)
Configure LoRA weights.
Definition LLM.cpp:355
virtual std::vector< LoraIdScalePath > lora_list()
List available LoRA adapters.
Definition LLM.cpp:392
virtual void stop()=0
Stop the LLM service.
virtual void set_SSL(const std::string &SSL_cert, const std::string &SSL_key)=0
Configure SSL certificates.
virtual std::string lora_list_json()=0
List available LoRA adapters.
virtual ~LLMProvider()
Virtual destructor.
Definition LLM.cpp:42
virtual std::string lora_weight_json(const json &data)=0
Configure LoRA weights with HTTP response support.
virtual void start()=0
Start the LLM service.
virtual std::vector< LoraIdScalePath > parse_lora_list_json(const json &result)
Parse LoRA list result.
Definition LLM.cpp:374
virtual json build_lora_list_json(const std::vector< LoraIdScalePath > &loras)
Build JSON for LoRA list result.
Definition LLM.cpp:362
virtual int embedding_size()=0
Get embedding vector size.
Abstract base class for Large Language Model operations.
Definition LLM.h:60
virtual json build_tokenize_json(const std::string &query)
Build JSON for tokenization.
Definition LLM.cpp:152
virtual std::string embeddings_json(const json &data)=0
Generate embeddings with HTTP response support.
int32_t n_keep
Number of tokens to keep from the beginning of the context.
Definition LLM.h:62
virtual json build_apply_template_json(const json &messages)
Build JSON for template application.
Definition LLM.cpp:126
virtual std::string parse_apply_template_json(const json &result)
Parse template application result.
Definition LLM.cpp:133
virtual std::vector< int > parse_tokenize_json(const json &result)
Parse tokenization result.
Definition LLM.cpp:159
virtual std::string apply_template(const json &messages)
Apply template to messages.
Definition LLM.cpp:145
virtual json build_detokenize_json(const std::vector< int32_t > &tokens)
Build JSON for detokenization.
Definition LLM.cpp:179
virtual std::string parse_completion_json(const json &result)
Parse completion result.
Definition LLM.cpp:265
virtual std::string apply_template_json(const json &data)=0
Apply a chat template to message data.
virtual json build_completion_json(const std::string &prompt, int id_slot=-1)
Build JSON for completion generation.
Definition LLM.cpp:236
virtual std::string tokenize_json(const json &data)=0
Tokenize input (override)
virtual void set_completion_params(json completion_params_)
Set completion parameters.
Definition LLM.h:105
virtual std::string detokenize(const std::vector< int32_t > &tokens)
Convert tokens to text.
Definition LLM.cpp:198
json completion_params
JSON object containing completion parameters.
Definition LLM.h:64
virtual std::vector< int > tokenize(const std::string &query)
Tokenize text.
Definition LLM.cpp:171
virtual std::string completion(const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
Generate completion.
Definition LLM.cpp:284
virtual void set_grammar(std::string grammar_)
Set grammar for constrained generation.
Definition LLM.h:130
static std::string LLM_args_to_command(const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
Convert LLM parameters to command line arguments.
Definition LLM.cpp:46
virtual std::vector< float > embeddings(const std::string &query)
Generate embeddings.
Definition LLM.cpp:229
static bool has_gpu_layers(const std::string &command)
Check if command line arguments specify GPU layers.
Definition LLM.cpp:66
virtual std::string completion_json(const json &data, CharArrayFn callback, bool callbackWithJSON)=0
Generate text completion.
std::string grammar
Grammar specification in GBNF format or JSON schema.
Definition LLM.h:63
virtual std::string detokenize_json(const json &data)=0
Convert tokens back to text.
virtual std::vector< float > parse_embeddings_json(const json &result)
Parse embeddings result.
Definition LLM.cpp:212
virtual json build_embeddings_json(const std::string &query)
Build JSON for embeddings generation.
Definition LLM.cpp:205
virtual std::string parse_detokenize_json(const json &result)
Parse detokenization result.
Definition LLM.cpp:186
const char * LLM_Lora_List(LLMProvider *llm)
List LoRA adapters (C API)
Definition LLM.cpp:531
const char * LLM_Get_Grammar(LLM *llm)
Get grammar (C API)
Definition LLM.cpp:482
void LLM_Stop(LLMProvider *llm)
Stop LLM service (C API)
Definition LLM.cpp:581
void LLM_Enable_Reasoning(LLMProvider *llm, bool enable_reasoning)
Enable reasoning (C API)
Definition LLM.cpp:492
const char * LLM_Save_Slot(LLMLocal *llm, int id_slot, const char *filepath)
Save slot state (C API)
Definition LLM.cpp:497
void LLM_Logging_Callback(CharArrayFn callback)
Set global logging callback (C API)
Definition LLM.cpp:414
const char * LLM_Load_Slot(LLMLocal *llm, int id_slot, const char *filepath)
Load slot state (C API)
Definition LLM.cpp:502
void LLM_Join_Service(LLMProvider *llm)
Wait for service to complete (C API)
Definition LLM.cpp:561
void LLM_Set_SSL(LLMProvider *llm, const char *SSL_cert, const char *SSL_key)
Set SSL configuration (C API)
Definition LLM.cpp:586
bool Has_GPU_Layers(const char *command)
Check if command has GPU layers (C API)
Definition LLM.cpp:399
const char * LLM_Status_Message()
Get last operation status message (C API)
Definition LLM.cpp:596
void LLM_Set_Grammar(LLM *llm, const char *grammar="")
Set grammar (C API)
Definition LLM.cpp:477
const char * LLM_Apply_Template(LLM *llm, const char *messages_as_json)
Apply chat template (C API)
Definition LLM.cpp:487
void LLM_Cancel(LLMLocal *llm, int id_slot)
Cancel request (C API)
Definition LLM.cpp:507
void LLM_Logging_Stop()
Stop global logging (C API)
Definition LLM.cpp:424
void LLM_Start(LLMProvider *llm)
Start LLM service (C API)
Definition LLM.cpp:571
void LLM_Set_Completion_Parameters(LLM *llm, const char *params_json="{}")
Set completion parameters (C API)
Definition LLM.cpp:466
const int LLM_Embedding_Size(LLMProvider *llm)
Get embedding vector size (C API)
Definition LLM.cpp:602
void LLM_Delete(LLMProvider *llm)
Delete LLM provider (C API)
Definition LLM.cpp:543
void LLM_Debug(int debug_level)
Set global debug level (C API)
Definition LLM.cpp:404
bool LLM_Lora_Weight(LLMProvider *llm, const char *loras_as_json)
Configure LoRA weights (C API)
Definition LLM.cpp:512
const char * LLM_Completion(LLM *llm, const char *prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
Generate completion (C API)
Definition LLM.cpp:461
const char * LLM_Tokenize(LLM *llm, const char *query)
Tokenize text (C API)
Definition LLM.cpp:444
void LLM_Join_Server(LLMProvider *llm)
Wait for server to complete (C API)
Definition LLM.cpp:566
const bool LLM_Started(LLMProvider *llm)
Check if service is started (C API)
Definition LLM.cpp:576
const char * LLM_Get_Completion_Parameters(LLM *llm)
Get completion parameters (C API)
Definition LLM.cpp:472
const char * LLM_Embeddings(LLM *llm, const char *query)
Generate embeddings (C API)
Definition LLM.cpp:455
const int LLM_Status_Code()
Get last operation status code (C API)
Definition LLM.cpp:591
void LLM_Stop_Server(LLMProvider *llm)
Stop HTTP server (C API)
Definition LLM.cpp:556
const char * LLM_Detokenize(LLM *llm, const char *tokens_as_json)
Detokenize tokens (C API)
Definition LLM.cpp:450
void LLM_Start_Server(LLMProvider *llm, const char *host="0.0.0.0", int port=-1, const char *API_key="")
Start HTTP server (C API)
Definition LLM.cpp:551