LlamaLib  v2.0.2
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM_runtime.cpp
1#include "LLM_runtime.h"
2
3//============================= LIBRARY LOADING =============================//
4
5const std::string platform_name()
6{
7#if defined(_WIN32)
8 return "win-x64";
9#elif defined(__linux__)
10 return "linux-x64";
11#elif defined(__APPLE__)
12#if defined(__x86_64__)
13 return "osx-x64";
14#else
15 return "osx-arm64";
16#endif
17#else
18 std::cerr << "Unknown platform!" << std::endl;
19 return "";
20#endif
21}
22
23const std::vector<std::string> available_architectures(bool gpu)
24{
25 std::vector<std::string> architectures;
26#if defined(_WIN32)
27 std::string prefix = "";
28#else
29 std::string prefix = "lib";
30#endif
31
32#if defined(_WIN32)
33 std::string suffix = "dll";
34#elif defined(__linux__)
35 std::string suffix = "so";
36#elif defined(__APPLE__)
37 std::string suffix = "dylib";
38#else
39 std::cerr << "Unknown platform!" << std::endl;
40 return architectures;
41#endif
42
43 const auto add_library = [&](std::string arch)
44 {
45 std::string platform = platform_name();
46 std::string dash_arch = arch;
47 if (arch != "")
48 dash_arch = "_" + dash_arch;
49 std::string path = prefix + "llamalib_" + platform + dash_arch + "." + suffix;
50 architectures.push_back(path);
51 };
52
53#if defined(_WIN32) || defined(__linux__)
54 if (gpu)
55 {
56 add_library("cublas");
57 add_library("tinyblas");
58 add_library("hip");
59 add_library("vulkan");
60 }
61 if (has_avx512())
62 add_library("avx512");
63 if (has_avx2())
64 add_library("avx2");
65 if (has_avx())
66 add_library("avx");
67 add_library("noavx");
68#elif defined(__APPLE__)
69 add_library("acc");
70 add_library("no-acc");
71#endif
72 return architectures;
73}
74
75std::string get_current_directory()
76{
77 return std::filesystem::current_path().string();
78}
79
80std::string get_executable_directory()
81{
82#ifdef _WIN32
83 char path[MAX_PATH];
84 DWORD result = GetModuleFileNameA(nullptr, path, MAX_PATH);
85 if (result == 0 || result == MAX_PATH)
86 {
87 return get_current_directory();
88 }
89#elif defined(__APPLE__)
90 char path[PATH_MAX];
91 uint32_t size = sizeof(path);
92 if (_NSGetExecutablePath(path, &size) != 0)
93 {
94 return get_current_directory();
95 }
96#else
97 char path[PATH_MAX];
98 ssize_t count = readlink("/proc/self/exe", path, PATH_MAX);
99 if (count == -1)
100 {
101 return get_current_directory();
102 }
103 path[count] = '\0';
104#endif
105 return std::filesystem::path(path).parent_path().string();
106}
107
108std::vector<std::string> get_default_library_env_vars()
109{
110#ifdef _WIN32
111 return {"PATH"};
112#elif defined(__APPLE__)
113 return {"DYLD_LIBRARY_PATH", "DYLD_FALLBACK_LIBRARY_PATH", "LD_LIBRARY_PATH"};
114#else
115 return {"LD_LIBRARY_PATH", "LIBRARY_PATH"};
116#endif
117}
118
119std::vector<std::string> get_env_library_paths(const std::vector<std::string> &env_vars)
120{
121 std::vector<std::string> paths;
122
123 for (const auto &env_var : env_vars)
124 {
125 const char *env_value = std::getenv(env_var.c_str());
126 if (!env_value)
127 continue;
128
129 std::string env_string(env_value);
130 if (env_string.empty())
131 continue;
132
133 // Split by path separator
134#ifdef _WIN32
135 const char delimiter = ';';
136#else
137 const char delimiter = ':';
138#endif
139
140 std::stringstream ss(env_string);
141 std::string path_str;
142 while (std::getline(ss, path_str, delimiter))
143 {
144 if (!path_str.empty())
145 {
146 paths.emplace_back(path_str);
147 }
148 }
149 }
150
151 return paths;
152}
153
154std::vector<std::string> get_search_directories()
155{
156 std::vector<std::string> search_paths;
157 // Current directory
158 search_paths.push_back(get_current_directory());
159 // Executable directory
160 auto exe_dir = get_executable_directory();
161 search_paths.push_back(exe_dir);
162
163 std::string lib_folder_path = (std::filesystem::path("runtimes") / platform_name() / "native").string();
164
165 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
166 search_paths.push_back((std::filesystem::path(exe_dir) / ".." / lib_folder_path).string());
167
168 for (const std::string &lib_folder_name : {"lib", "libs", "runtimes"})
169 {
170 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
171 search_paths.push_back((std::filesystem::path(exe_dir) / ".." / lib_folder_path).string());
172 }
173 // Environment variable paths
174 auto default_env_vars = get_default_library_env_vars();
175 auto env_paths = get_env_library_paths(default_env_vars);
176 search_paths.insert(search_paths.end(), env_paths.begin(), env_paths.end());
177
178 std::vector<std::string> return_paths;
179 for (const std::string &search_path : search_paths)
180 {
181 if (std::filesystem::exists(search_path))
182 return_paths.push_back(search_path);
183 }
184 return return_paths;
185}
186
187inline LibHandle load_library(const char *path)
188{
189 return LOAD_LIB(path);
190}
191
192inline void *load_symbol(LibHandle handle, const char *symbol)
193{
194 return GET_SYM(handle, symbol);
195}
196
197inline void unload_library(LibHandle handle)
198{
199 CLOSE_LIB(handle);
200}
201
202LibHandle load_library_safe(const std::string &path)
203{
204 if (setjmp(get_jump_point()) != 0)
205 {
206 std::cerr << "Error loading library: " << path << std::endl;
207 return nullptr;
208 }
209
210 LibHandle handle_out = load_library(path.c_str());
211 if (!handle_out)
212 {
213 std::cerr << "Failed to load library: " << path << std::endl;
214 }
215 return handle_out;
216}
217
218bool LLMService::create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename)
219{
220 sigjmp_buf local_jump_point;
221 sigjmp_buf* old_jump_point = get_current_jump_point_ptr(); // Save the old one
222 set_current_jump_point(&local_jump_point); // Switch to our local one
223
224 if (sigsetjmp(local_jump_point, 1) != 0)
225 {
226 std::cerr << "Error occurred while loading backend: " << llm_lib_filename << std::endl;
227 if (handle)
228 {
229 try { unload_library(handle); } catch (...) {}
230 handle = nullptr;
231 }
232 fail("", 0);
233 set_current_jump_point(old_jump_point); // Restore old one
234 return false;
235 }
236
237 auto load_sym = [&](auto &fn_ptr, const char *name)
238 {
239 fn_ptr = reinterpret_cast<std::decay_t<decltype(fn_ptr)>>(load_symbol(handle, name));
240 if (!fn_ptr)
241 {
242 std::cerr << "Failed to load: " << name << std::endl;
243 }
244 };
245
246 std::vector<std::filesystem::path> full_paths;
247 full_paths.push_back(llm_lib_filename);
248 for (const std::filesystem::path &search_path : search_paths)
249 full_paths.push_back(search_path / llm_lib_filename);
250
252 std::cout << "Trying " << llm_lib_filename << std::endl;
253
254 bool success = false;
255 for (const std::filesystem::path &full_path : full_paths)
256 {
257 if (std::filesystem::exists(full_path) && std::filesystem::is_regular_file(full_path))
258 {
259 handle = load_library_safe(full_path.string());
260 if (!handle)
261 continue;
262
263#define DECLARE_AND_LOAD(name, ret, ...) \
264 load_sym(this->name, #name); \
265 if (!this->name) \
266 { \
267 set_current_jump_point(old_jump_point); \
268 return false; \
269 }
270 LLM_FUNCTIONS_LIST(DECLARE_AND_LOAD)
271#undef DECLARE_AND_LOAD
272
274 LLMService_InjectErrorState(&ErrorStateRegistry::get_error_state());
275 llm = (LLMProvider *)LLMService_From_Command(command.c_str());
276 if (llm == nullptr || get_status_code() != 0)
277 {
278 std::cerr << "Failed to construct LLM (error: " << get_status_code() << "): " << get_status_message() << std::endl;
279 if (handle)
280 {
281 unload_library(handle);
282 handle = nullptr;
283 }
284 fail("", 0);
285 continue;
286 }
287 success = true;
288 break;
289 }
290 }
291
292 set_current_jump_point(old_jump_point); // Always restore before returning
293 return success;
294}
295
296bool LLMService::create_LLM_library(const std::string &command)
297{
298 bool gpu = has_gpu_layers(command);
299 for (const auto &llm_lib_filename : available_architectures(gpu))
300 {
301 fail("", 0);
302 bool success = create_LLM_library_backend(command, llm_lib_filename);
303 if (success)
304 {
305 std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;
306 return true;
307 }
308 }
309 std::cerr << "Couldn't load a backend" << std::endl;
310 return false;
311}
312
313//============================= LLMService =============================//
314
316{
317 search_paths = get_search_directories();
318}
319
320LLMService::LLMService(const std::string &model_path, int num_slots, int num_threads, int num_GPU_layers, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string> &lora_paths)
321 : LLMService()
322{
323 std::string command = LLM::LLM_args_to_command(model_path, num_slots, num_threads, num_GPU_layers, flash_attention, context_size, batch_size, embedding_only, lora_paths);
324 create_LLM_library(command);
325}
326
327LLMService *LLMService::from_command(const std::string &command)
328{
329 LLMService *llmService = new LLMService();
330 llmService->create_LLM_library(command);
331 return llmService;
332}
333
335{
336 return from_command(args_to_command(argc, argv));
337}
338
340{
341 if (llm)
342 {
344 llm = nullptr;
345 }
346 if (handle)
347 {
348 unload_library(handle);
349 handle = nullptr;
350 }
351}
352
353//============================= API =============================//
354
355const char *Available_Architectures(bool gpu)
356{
357 const std::vector<std::string> &llmlibs = available_architectures(gpu);
358 thread_local static std::string result;
359
360 std::ostringstream oss;
361 for (size_t i = 0; i < llmlibs.size(); ++i)
362 {
363 if (i != 0)
364 oss << ",";
365 oss << llmlibs[i];
366 }
367 result = oss.str();
368 return result.c_str();
369}
void ensure_error_handlers_initialized()
Ensures error handlers are properly initialized.
Definition LLM.cpp:25
Runtime loading and management of LLM libraries.
std::vector< std::string > get_default_library_env_vars()
Get default environment variables for library paths.
#define LLM_FUNCTIONS_LIST(M)
Macro defining the list of dynamically loaded LLM functions.
Definition LLM_runtime.h:53
#define GET_SYM(handle, name)
Get symbol macro for Unix.
Definition LLM_runtime.h:44
#define LOAD_LIB(path)
Load library macro for Unix.
Definition LLM_runtime.h:43
#define CLOSE_LIB(handle)
Close library macro for Unix.
Definition LLM_runtime.h:45
const std::vector< std::string > available_architectures(bool gpu)
Get available architectures for the platform.
void * LibHandle
Unix library handle type.
Definition LLM_runtime.h:42
static ErrorState & get_error_state()
Get the error state instance.
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Definition LLM.h:395
Abstract class for LLM service providers.
Definition LLM.h:275
Runtime loader for LLM libraries.
Definition LLM_runtime.h:63
~LLMService()
Destructor.
bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename)
Load LLM library backend.
LibHandle handle
Handle to loaded library.
Definition LLM_runtime.h:99
LLMService()
Default constructor.
std::vector< std::string > search_paths
Library search paths.
bool create_LLM_library(const std::string &command)
Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the comm...
LLMProvider * llm
Pointer to loaded LLM provider instance.
static LLMService * from_command(const std::string &command)
Create runtime from command line string.
static std::string LLM_args_to_command(const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
Convert LLM parameters to command line arguments.
Definition LLM.cpp:46
static bool has_gpu_layers(const std::string &command)
Check if command line arguments specify GPU layers.
Definition LLM.cpp:65
void LLMService_Registry(LLMProviderRegistry *existing_instance)
Set registry for LLMService (C API)
const char * Available_Architectures(bool gpu)
Get available architectures (C API)
void LLM_Delete(LLMProvider *llm)
Delete LLM provider (C API)
Definition LLM.cpp:542
LLMService * LLMService_From_Command(const char *params_string)
Create LLMService from command string (C API)