LlamaLib  v2.0.5
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM_runtime.cpp
1#include "LLM_runtime.h"
2
3//============================= LIBRARY LOADING =============================//
4
5const std::string platform_name()
6{
7#if defined(_WIN32)
8 return "win-x64";
9#elif defined(__linux__)
10 return "linux-x64";
11#elif defined(__APPLE__)
12#if defined(__x86_64__)
13 return "osx-x64";
14#else
15 return "osx-arm64";
16#endif
17#else
18 std::cerr << "Unknown platform!" << std::endl;
19 return "";
20#endif
21}
22
23const std::vector<std::string> available_architectures(bool gpu)
24{
25 std::vector<std::string> architectures;
26#if defined(_WIN32)
27 std::string prefix = "";
28#else
29 std::string prefix = "lib";
30#endif
31
32#if defined(_WIN32)
33 std::string suffix = "dll";
34#elif defined(__linux__)
35 std::string suffix = "so";
36#elif defined(__APPLE__)
37 std::string suffix = "dylib";
38#else
39 std::cerr << "Unknown platform!" << std::endl;
40 return architectures;
41#endif
42
43 const auto add_library = [&](std::string arch)
44 {
45 std::string platform = platform_name();
46 std::string dash_arch = arch;
47 if (arch != "")
48 dash_arch = "_" + dash_arch;
49 std::string path = prefix + "llamalib_" + platform + dash_arch + "." + suffix;
50 architectures.push_back(path);
51 };
52
53 if (gpu)
54 {
55#if defined(_WIN32) || defined(__linux__)
56 add_library("cublas");
57 add_library("tinyblas");
58 add_library("hip");
59 add_library("vulkan");
60#endif
61 }
62 else
63 {
64#if defined(_WIN32) || defined(__linux__)
65 if (has_avx512())
66 add_library("avx512");
67 if (has_avx2())
68 add_library("avx2");
69 if (has_avx())
70 add_library("avx");
71 add_library("noavx");
72#elif defined(__APPLE__)
73 add_library("acc");
74 add_library("no-acc");
75#endif
76 }
77 return architectures;
78}
79
80std::string get_current_directory()
81{
82 return std::filesystem::current_path().string();
83}
84
85std::string get_executable_directory()
86{
87#ifdef _WIN32
88 char path[MAX_PATH];
89 DWORD result = GetModuleFileNameA(nullptr, path, MAX_PATH);
90 if (result == 0 || result == MAX_PATH)
91 {
92 return get_current_directory();
93 }
94#elif defined(__APPLE__)
95 char path[PATH_MAX];
96 uint32_t size = sizeof(path);
97 if (_NSGetExecutablePath(path, &size) != 0)
98 {
99 return get_current_directory();
100 }
101#else
102 char path[PATH_MAX];
103 ssize_t count = readlink("/proc/self/exe", path, PATH_MAX);
104 if (count == -1)
105 {
106 return get_current_directory();
107 }
108 path[count] = '\0';
109#endif
110 return std::filesystem::path(path).parent_path().string();
111}
112
113std::vector<std::string> get_default_library_env_vars()
114{
115#ifdef _WIN32
116 return {"PATH"};
117#elif defined(__APPLE__)
118 return {"DYLD_LIBRARY_PATH", "DYLD_FALLBACK_LIBRARY_PATH", "LD_LIBRARY_PATH"};
119#else
120 return {"LD_LIBRARY_PATH", "LIBRARY_PATH"};
121#endif
122}
123
124std::vector<std::string> get_env_library_paths(const std::vector<std::string> &env_vars)
125{
126 std::vector<std::string> paths;
127
128 for (const auto &env_var : env_vars)
129 {
130 const char *env_value = std::getenv(env_var.c_str());
131 if (!env_value)
132 continue;
133
134 std::string env_string(env_value);
135 if (env_string.empty())
136 continue;
137
138 // Split by path separator
139#ifdef _WIN32
140 const char delimiter = ';';
141#else
142 const char delimiter = ':';
143#endif
144
145 std::stringstream ss(env_string);
146 std::string path_str;
147 while (std::getline(ss, path_str, delimiter))
148 {
149 if (!path_str.empty())
150 {
151 paths.emplace_back(path_str);
152 }
153 }
154 }
155
156 return paths;
157}
158
159std::vector<std::string> get_search_directories()
160{
161 std::vector<std::string> search_paths;
162 // Current directory
163 search_paths.push_back(get_current_directory());
164 // Executable directory
165 auto exe_dir = get_executable_directory();
166 search_paths.push_back(exe_dir);
167
168 std::string lib_folder_path = (std::filesystem::path("runtimes") / platform_name() / "native").string();
169
170 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
171 search_paths.push_back((std::filesystem::path(exe_dir) / ".." / lib_folder_path).string());
172
173 for (const std::string &lib_folder_name : {"lib", "libs", "runtimes"})
174 {
175 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
176 search_paths.push_back((std::filesystem::path(exe_dir) / ".." / lib_folder_path).string());
177 }
178 // Environment variable paths
179 auto default_env_vars = get_default_library_env_vars();
180 auto env_paths = get_env_library_paths(default_env_vars);
181 search_paths.insert(search_paths.end(), env_paths.begin(), env_paths.end());
182
183 std::vector<std::string> return_paths;
184 for (const std::string &search_path : search_paths)
185 {
186 if (std::filesystem::exists(search_path))
187 return_paths.push_back(search_path);
188 }
189 return return_paths;
190}
191
192inline LibHandle load_library(const char *path)
193{
194 return LOAD_LIB(path);
195}
196
197inline void *load_symbol(LibHandle handle, const char *symbol)
198{
199 return GET_SYM(handle, symbol);
200}
201
202inline void unload_library(LibHandle handle)
203{
204 CLOSE_LIB(handle);
205}
206
207LibHandle load_library_safe(const std::string &path)
208{
209 if (setjmp(get_jump_point()) != 0)
210 {
211 std::cerr << "Error loading library: " << path << std::endl;
212 return nullptr;
213 }
214
215 LibHandle handle_out = load_library(path.c_str());
216 if (!handle_out)
217 {
218 std::cerr << "Failed to load library: " << path << std::endl;
219 }
220 return handle_out;
221}
222
223bool LLMService::create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename, bool is_gpu_library)
224{
225 sigjmp_buf local_jump_point;
226 sigjmp_buf* old_jump_point = get_current_jump_point_ptr(); // Save the old one
227 set_current_jump_point(&local_jump_point); // Switch to our local one
228
229 if (sigsetjmp(local_jump_point, 1) != 0)
230 {
231 std::cerr << "Error occurred while loading backend: " << llm_lib_filename << std::endl;
232 if (handle)
233 {
234 try { unload_library(handle); } catch (...) {}
235 handle = nullptr;
236 }
237 fail("", 0);
238 set_current_jump_point(old_jump_point); // Restore old one
239 return false;
240 }
241
242 auto load_sym = [&](auto &fn_ptr, const char *name)
243 {
244 fn_ptr = reinterpret_cast<std::decay_t<decltype(fn_ptr)>>(load_symbol(handle, name));
245 if (!fn_ptr)
246 {
247 std::cerr << "Failed to load: " << name << std::endl;
248 }
249 };
250
251 std::vector<std::filesystem::path> full_paths;
252 full_paths.push_back(llm_lib_filename);
253 for (const std::filesystem::path &search_path : search_paths)
254 full_paths.push_back(search_path / llm_lib_filename);
255
257 std::cout << "Trying " << llm_lib_filename << std::endl;
258
259 bool success = false;
260 for (const std::filesystem::path &full_path : full_paths)
261 {
262 if (std::filesystem::exists(full_path) && std::filesystem::is_regular_file(full_path))
263 {
264 handle = load_library_safe(full_path.string());
265 if (!handle)
266 continue;
267
268#define DECLARE_AND_LOAD(name, ret, ...) \
269 load_sym(this->name, #name); \
270 if (!this->name) \
271 { \
272 set_current_jump_point(old_jump_point); \
273 return false; \
274 }
275 LLM_FUNCTIONS_LIST(DECLARE_AND_LOAD)
276#undef DECLARE_AND_LOAD
277 if (is_gpu_library && !LLMService_Supports_GPU()) continue;
278
280 LLMService_InjectErrorState(&ErrorStateRegistry::get_error_state());
281 llm = (LLMProvider *)LLMService_From_Command(command.c_str());
282 if (llm == nullptr || get_status_code() != 0)
283 {
284 std::cerr << "Failed to construct LLM (error: " << get_status_code() << "): " << get_status_message() << std::endl;
285 if (handle)
286 {
287 unload_library(handle);
288 handle = nullptr;
289 }
290 fail("", 0);
291 continue;
292 }
293 success = true;
294 break;
295 }
296 }
297
298 set_current_jump_point(old_jump_point); // Always restore before returning
299 return success;
300}
301
302bool LLMService::create_LLM_library(const std::string &command)
303{
304 std::vector<std::string> archs_cpu = available_architectures(false);
305 std::vector<std::string> archs_gpu;
306 if (has_gpu_layers(command)) archs_gpu = available_architectures(true);
307
308 for (bool is_gpu_library: {true, false})
309 {
310 std::vector<std::string> archs = is_gpu_library? archs_gpu: archs_cpu;
311 for (const auto &llm_lib_filename : archs)
312 {
313 fail("", 0);
314 bool success = create_LLM_library_backend(command, llm_lib_filename, is_gpu_library);
315 if (success)
316 {
317 std::cout << "Successfully loaded: " << llm_lib_filename << std::endl;
318 return true;
319 }
320 }
321 }
322 std::cerr << "Couldn't load a backend" << std::endl;
323 return false;
324}
325
326//============================= LLMService =============================//
327
329{
330 search_paths = get_search_directories();
331}
332
333LLMService::LLMService(const std::string &model_path, int num_slots, int num_threads, int num_GPU_layers, bool flash_attention, int context_size, int batch_size, bool embedding_only, const std::vector<std::string> &lora_paths)
334 : LLMService()
335{
336 std::string command = LLM::LLM_args_to_command(model_path, num_slots, num_threads, num_GPU_layers, flash_attention, context_size, batch_size, embedding_only, lora_paths);
337 create_LLM_library(command);
338}
339
340LLMService *LLMService::from_command(const std::string &command)
341{
342 LLMService *llmService = new LLMService();
343 llmService->create_LLM_library(command);
344 return llmService;
345}
346
348{
349 return from_command(args_to_command(argc, argv));
350}
351
353{
354 if (llm)
355 {
357 llm = nullptr;
358 }
359 if (handle)
360 {
361 unload_library(handle);
362 handle = nullptr;
363 }
364}
365
366//============================= API =============================//
367
368const char *Available_Architectures(bool gpu)
369{
370 const std::vector<std::string> &llmlibs = available_architectures(gpu);
371 thread_local static std::string result;
372
373 std::ostringstream oss;
374 for (size_t i = 0; i < llmlibs.size(); ++i)
375 {
376 if (i != 0)
377 oss << ",";
378 oss << llmlibs[i];
379 }
380 result = oss.str();
381 return result.c_str();
382}
void ensure_error_handlers_initialized()
Ensures error handlers are properly initialized.
Definition LLM.cpp:25
Runtime loading and management of LLM libraries.
std::vector< std::string > get_default_library_env_vars()
Get default environment variables for library paths.
#define LLM_FUNCTIONS_LIST(M)
Macro defining the list of dynamically loaded LLM functions.
Definition LLM_runtime.h:53
#define GET_SYM(handle, name)
Get symbol macro for Unix.
Definition LLM_runtime.h:44
#define LOAD_LIB(path)
Load library macro for Unix.
Definition LLM_runtime.h:43
#define CLOSE_LIB(handle)
Close library macro for Unix.
Definition LLM_runtime.h:45
const std::vector< std::string > available_architectures(bool gpu)
Get available architectures for the platform.
void * LibHandle
Unix library handle type.
Definition LLM_runtime.h:42
static ErrorState & get_error_state()
Get the error state instance.
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Definition LLM.h:399
Abstract class for LLM service providers.
Definition LLM.h:279
Runtime loader for LLM libraries.
Definition LLM_runtime.h:64
bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename, bool is_gpu_library=false)
Load LLM library backend.
~LLMService()
Destructor.
LibHandle handle
Handle to loaded library.
LLMService()
Default constructor.
std::vector< std::string > search_paths
Library search paths.
bool create_LLM_library(const std::string &command)
Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the comm...
LLMProvider * llm
Pointer to loaded LLM provider instance.
static LLMService * from_command(const std::string &command)
Create runtime from command line string.
static std::string LLM_args_to_command(const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
Convert LLM parameters to command line arguments.
Definition LLM.cpp:46
static bool has_gpu_layers(const std::string &command)
Check if command line arguments specify GPU layers.
Definition LLM.cpp:66
void LLMService_Registry(LLMProviderRegistry *existing_instance)
Set registry for LLMService (C API)
const char * Available_Architectures(bool gpu)
Get available architectures (C API)
void LLM_Delete(LLMProvider *llm)
Delete LLM provider (C API)
Definition LLM.cpp:543
LLMService * LLMService_From_Command(const char *params_string)
Create LLMService from command string (C API)