LlamaLib  v2.0.2
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM_runtime.h
Go to the documentation of this file.
1
6
7#pragma once
8
9#include <fstream>
10#include <sstream>
11#include <vector>
12#include <iostream>
13#include <setjmp.h>
14#include <type_traits>
15#include <algorithm>
16#include <cstdlib>
17
18#include "defs.h"
19#include "error_handling.h"
20#include "LLM.h"
21
22#if defined(_WIN32) || defined(__linux__)
23#include "archchecker.h"
24#endif
25
26// Platform-specific library loading definitions
27#if defined(_WIN32)
28#include <windows.h>
29#include <libloaderapi.h>
30using LibHandle = HMODULE;
31#define LOAD_LIB(path) LoadLibraryA(path)
32#define GET_SYM(handle, name) GetProcAddress(handle, name)
33#define CLOSE_LIB(handle) FreeLibrary(handle)
34#else
35#include <dlfcn.h>
36#include <unistd.h>
37#include <limits.h>
38#ifdef __APPLE__
39#include <mach-o/dyld.h>
40#endif
41
42using LibHandle = void *;
43#define LOAD_LIB(path) dlopen(path, RTLD_LAZY)
44#define GET_SYM(handle, name) dlsym(handle, name)
45#define CLOSE_LIB(handle) dlclose(handle)
46#endif
47
48//=================================== FUNCTION LISTS ===================================//
49
53#define LLM_FUNCTIONS_LIST(M) \
54 M(LLMService_Registry, void, LLMProviderRegistry *) \
55 M(LLMService_InjectErrorState, void, ErrorState *) \
56 M(LLMService_Construct, LLMProvider *, const char *, int, int, int, bool, int, int, bool, int, const char **) \
57 M(LLMService_From_Command, LLMProvider *, const char *)
58
62class UNDREAMAI_API LLMService : public LLMProvider
63{
64public:
67 LLMService();
68
80 LLMService(const std::string &model_path, int num_slots = 1, int num_threads = -1, int num_GPU_layers = 0, bool flash_attention = false, int context_size = 4096, int batch_size = 2048, bool embedding_only = false, const std::vector<std::string> &lora_paths = {});
81
84
90 static LLMService *from_command(const std::string &command);
91
97 static LLMService *from_command(int argc, char **argv);
98
99 LibHandle handle = nullptr;
100 LLMProvider *llm = nullptr;
101
105 bool create_LLM_library(const std::string &command);
106
107 //=================================== LLM METHODS START ===================================//
108
112 std::string tokenize_json(const json &data) override { return ((LLMProvider *)llm)->tokenize_json(data); }
113
118 std::string detokenize_json(const json &data) override { return ((LLMProvider *)llm)->detokenize_json(data); }
119
124 std::string embeddings_json(const json &data) override { return ((LLMProvider *)llm)->embeddings_json(data); }
125
131 std::string completion_json(const json &data, CharArrayFn callback = nullptr, bool callbackWithJSON = true) override { return ((LLMProvider *)llm)->completion_json(data, callback, callbackWithJSON); }
132
137 std::string apply_template_json(const json &data) override { return ((LLMProvider *)llm)->apply_template_json(data); }
138
141 void cancel(int id_slot) override { ((LLMProvider *)llm)->cancel(id_slot); }
142
147 std::string lora_weight_json(const json &data) override { return ((LLMProvider *)llm)->lora_weight_json(data); };
148
151 std::string lora_list_json() override { return ((LLMProvider *)llm)->lora_list_json(); }
152
157 std::string slot_json(const json &data) override { return ((LLMProvider *)llm)->slot_json(data); }
158
163 void start_server(const std::string &host = "0.0.0.0", int port = -1, const std::string &API_key = "") override { ((LLMProvider *)llm)->start_server(host, port, API_key); }
164
166 void stop_server() override { ((LLMProvider *)llm)->stop_server(); }
167
169 void start() override { ((LLMProvider *)llm)->start(); }
170
173 bool started() override { return ((LLMProvider *)llm)->started(); }
174
176 void stop() override
177 {
178 ((LLMProvider *)llm)->stop();
179 }
180
182 void join_service() override { ((LLMProvider *)llm)->join_service(); }
183
185 void join_server() override { ((LLMProvider *)llm)->join_server(); }
186
190 void set_SSL(const std::string &cert, const std::string &key) override { ((LLMProvider *)llm)->set_SSL(cert, key); }
191
194 int embedding_size() override { return ((LLMProvider *)llm)->embedding_size(); }
195
198 int get_next_available_slot() override { return ((LLMProvider *)llm)->get_next_available_slot(); }
199
202 void debug(int debug_level) override { ((LLMProvider *)llm)->debug(debug_level); }
203
206 void logging_callback(CharArrayFn callback) override { ((LLMProvider *)llm)->logging_callback(callback); }
207
208 std::string debug_implementation() override { return "runtime_detection"; }
209 //=================================== LLM METHODS END ===================================//
210
213#define DECLARE_FN(name, ret, ...) \
214 ret (*name)(__VA_ARGS__) = nullptr;
216#undef DECLARE_FN
217
218protected:
219 std::vector<std::string> search_paths;
220
226 bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename);
227};
228
232const std::string os_library_dir();
233
238const std::vector<std::string> available_architectures(bool gpu);
239
243static std::string get_executable_directory();
244
248static std::string get_current_directory();
249
254static std::vector<std::string> get_env_library_paths(const std::vector<std::string> &env_vars);
255
259static std::vector<std::string> get_search_directories();
260
264std::vector<std::string> get_default_library_env_vars();
265
266//=================================== EXTERNAL API ===================================//
267
270
271extern "C"
272{
276 UNDREAMAI_API const char *Available_Architectures(bool gpu);
277}
278
279
Core LLM functionality interface and base classes.
const std::string os_library_dir()
Get OS-specific library directory.
std::vector< std::string > get_default_library_env_vars()
Get default environment variables for library paths.
#define LLM_FUNCTIONS_LIST(M)
Macro defining the list of dynamically loaded LLM functions.
Definition LLM_runtime.h:53
#define DECLARE_FN(name, ret,...)
Declare function pointers for dynamically loaded functions.
const std::vector< std::string > available_architectures(bool gpu)
Get available architectures for the platform.
void * LibHandle
Unix library handle type.
Definition LLM_runtime.h:42
Abstract class for LLM service providers.
Definition LLM.h:275
Runtime loader for LLM libraries.
Definition LLM_runtime.h:63
void stop_server() override
Stop HTTP server (override - delegates to loaded library)
void set_SSL(const std::string &cert, const std::string &key) override
Set SSL configuration (override - delegates to loaded library)
std::string debug_implementation() override
Implementation debugging.
std::string lora_weight_json(const json &data) override
Configure LoRA weights with HTTP response support.
void join_service() override
Wait for service completion (override - delegates to loaded library)
void cancel(int id_slot) override
Cancel request (override - delegates to loaded library)
bool started() override
Check service status (override - delegates to loaded library)
void start() override
Start service (override - delegates to loaded library)
std::string lora_list_json() override
List available LoRA adapters.
void logging_callback(CharArrayFn callback) override
Set logging callback (override - delegates to loaded library)
std::string tokenize_json(const json &data) override
Tokenize input (override)
std::string slot_json(const json &data) override
Manage slots with HTTP response support.
std::vector< std::string > search_paths
Library search paths.
std::string detokenize_json(const json &data) override
Convert tokens back to text.
std::string embeddings_json(const json &data) override
Generate embeddings with HTTP response support.
int get_next_available_slot() override
Get available slot (override - delegates to loaded library)
void debug(int debug_level) override
Set debug level (override - delegates to loaded library)
void join_server() override
Wait for server completion (override - delegates to loaded library)
std::string apply_template_json(const json &data) override
Apply a chat template to message data.
void start_server(const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
Start HTTP server (override - delegates to loaded library)
std::string completion_json(const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
Generate completion (override - delegates to loaded library)
void stop() override
Stop service (override - delegates to loaded library)
int embedding_size() override
Get embedding size (override - delegates to loaded library)
File with basic definitions.
const char * Available_Architectures(bool gpu)
Get available architectures (C API)