5const std::string platform_name()
9#elif defined(__linux__)
11#elif defined(__APPLE__)
12#if defined(__x86_64__)
18 std::cerr <<
"Unknown platform!" << std::endl;
25 std::vector<std::string> architectures;
27 std::string prefix =
"";
29 std::string prefix =
"lib";
33 std::string suffix =
"dll";
34#elif defined(__linux__)
35 std::string suffix =
"so";
36#elif defined(__APPLE__)
37 std::string suffix =
"dylib";
39 std::cerr <<
"Unknown platform!" << std::endl;
43 const auto add_library = [&](std::string arch)
45 std::string platform = platform_name();
46 std::string dash_arch = arch;
48 dash_arch =
"_" + dash_arch;
49 std::string path = prefix +
"llamalib_" + platform + dash_arch +
"." + suffix;
50 architectures.push_back(path);
53#if defined(_WIN32) || defined(__linux__)
56 add_library(
"cublas");
57 add_library(
"tinyblas");
59 add_library(
"vulkan");
62 add_library(
"avx512");
68#elif defined(__APPLE__)
70 add_library(
"no-acc");
75std::string get_current_directory()
77 return std::filesystem::current_path().string();
80std::string get_executable_directory()
84 DWORD result = GetModuleFileNameA(
nullptr, path, MAX_PATH);
85 if (result == 0 || result == MAX_PATH)
87 return get_current_directory();
89#elif defined(__APPLE__)
91 uint32_t size =
sizeof(path);
92 if (_NSGetExecutablePath(path, &size) != 0)
94 return get_current_directory();
98 ssize_t count = readlink(
"/proc/self/exe", path, PATH_MAX);
101 return get_current_directory();
105 return std::filesystem::path(path).parent_path().string();
112#elif defined(__APPLE__)
113 return {
"DYLD_LIBRARY_PATH",
"DYLD_FALLBACK_LIBRARY_PATH",
"LD_LIBRARY_PATH"};
115 return {
"LD_LIBRARY_PATH",
"LIBRARY_PATH"};
119std::vector<std::string> get_env_library_paths(
const std::vector<std::string> &env_vars)
121 std::vector<std::string> paths;
123 for (
const auto &env_var : env_vars)
125 const char *env_value = std::getenv(env_var.c_str());
129 std::string env_string(env_value);
130 if (env_string.empty())
135 const char delimiter =
';';
137 const char delimiter =
':';
140 std::stringstream ss(env_string);
141 std::string path_str;
142 while (std::getline(ss, path_str, delimiter))
144 if (!path_str.empty())
146 paths.emplace_back(path_str);
154std::vector<std::string> get_search_directories()
156 std::vector<std::string> search_paths;
158 search_paths.push_back(get_current_directory());
160 auto exe_dir = get_executable_directory();
161 search_paths.push_back(exe_dir);
163 std::string lib_folder_path = (std::filesystem::path(
"runtimes") / platform_name() /
"native").
string();
165 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
166 search_paths.push_back((std::filesystem::path(exe_dir) /
".." / lib_folder_path).string());
168 for (
const std::string &lib_folder_name : {
"lib",
"libs",
"runtimes"})
170 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
171 search_paths.push_back((std::filesystem::path(exe_dir) /
".." / lib_folder_path).string());
175 auto env_paths = get_env_library_paths(default_env_vars);
176 search_paths.insert(search_paths.end(), env_paths.begin(), env_paths.end());
178 std::vector<std::string> return_paths;
179 for (
const std::string &search_path : search_paths)
181 if (std::filesystem::exists(search_path))
182 return_paths.push_back(search_path);
187inline LibHandle load_library(
const char *path)
192inline void *load_symbol(
LibHandle handle,
const char *symbol)
194 return GET_SYM(handle, symbol);
197inline void unload_library(
LibHandle handle)
202LibHandle load_library_safe(
const std::string &path)
204 if (setjmp(get_jump_point()) != 0)
206 std::cerr <<
"Error loading library: " << path << std::endl;
210 LibHandle handle_out = load_library(path.c_str());
213 std::cerr <<
"Failed to load library: " << path << std::endl;
220 sigjmp_buf local_jump_point;
221 sigjmp_buf* old_jump_point = get_current_jump_point_ptr();
222 set_current_jump_point(&local_jump_point);
224 if (sigsetjmp(local_jump_point, 1) != 0)
226 std::cerr <<
"Error occurred while loading backend: " << llm_lib_filename << std::endl;
229 try { unload_library(
handle); }
catch (...) {}
233 set_current_jump_point(old_jump_point);
237 auto load_sym = [&](
auto &fn_ptr,
const char *name)
239 fn_ptr =
reinterpret_cast<std::decay_t<decltype(fn_ptr)
>>(load_symbol(
handle, name));
242 std::cerr <<
"Failed to load: " << name << std::endl;
246 std::vector<std::filesystem::path> full_paths;
247 full_paths.push_back(llm_lib_filename);
248 for (
const std::filesystem::path &search_path :
search_paths)
249 full_paths.push_back(search_path / llm_lib_filename);
252 std::cout <<
"Trying " << llm_lib_filename << std::endl;
254 bool success =
false;
255 for (
const std::filesystem::path &full_path : full_paths)
257 if (std::filesystem::exists(full_path) && std::filesystem::is_regular_file(full_path))
259 handle = load_library_safe(full_path.string());
263#define DECLARE_AND_LOAD(name, ret, ...) \
264 load_sym(this->name, #name); \
267 set_current_jump_point(old_jump_point); \
271#undef DECLARE_AND_LOAD
276 if (
llm ==
nullptr || get_status_code() != 0)
278 std::cerr <<
"Failed to construct LLM (error: " << get_status_code() <<
"): " << get_status_message() << std::endl;
292 set_current_jump_point(old_jump_point);
305 std::cout <<
"Successfully loaded: " << llm_lib_filename << std::endl;
309 std::cerr <<
"Couldn't load a backend" << std::endl;
320LLMService::LLMService(
const std::string &model_path,
int num_slots,
int num_threads,
int num_GPU_layers,
bool flash_attention,
int context_size,
int batch_size,
bool embedding_only,
const std::vector<std::string> &lora_paths)
323 std::string command =
LLM::LLM_args_to_command(model_path, num_slots, num_threads, num_GPU_layers, flash_attention, context_size, batch_size, embedding_only, lora_paths);
358 thread_local static std::string result;
360 std::ostringstream oss;
361 for (
size_t i = 0; i < llmlibs.size(); ++i)
368 return result.c_str();
void ensure_error_handlers_initialized()
Ensures error handlers are properly initialized.
Runtime loading and management of LLM libraries.
std::vector< std::string > get_default_library_env_vars()
Get default environment variables for library paths.
#define LLM_FUNCTIONS_LIST(M)
Macro defining the list of dynamically loaded LLM functions.
#define GET_SYM(handle, name)
Get symbol macro for Unix.
#define LOAD_LIB(path)
Load library macro for Unix.
#define CLOSE_LIB(handle)
Close library macro for Unix.
const std::vector< std::string > available_architectures(bool gpu)
Get available architectures for the platform.
void * LibHandle
Unix library handle type.
static ErrorState & get_error_state()
Get the error state instance.
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Abstract class for LLM service providers.
Runtime loader for LLM libraries.
bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename)
Load LLM library backend.
LibHandle handle
Handle to loaded library.
LLMService()
Default constructor.
std::vector< std::string > search_paths
Library search paths.
bool create_LLM_library(const std::string &command)
Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the comm...
LLMProvider * llm
Pointer to loaded LLM provider instance.
static LLMService * from_command(const std::string &command)
Create runtime from command line string.
static std::string LLM_args_to_command(const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
Convert LLM parameters to command line arguments.
static bool has_gpu_layers(const std::string &command)
Check if command line arguments specify GPU layers.
void LLMService_Registry(LLMProviderRegistry *existing_instance)
Set registry for LLMService (C API)
const char * Available_Architectures(bool gpu)
Get available architectures (C API)
void LLM_Delete(LLMProvider *llm)
Delete LLM provider (C API)
LLMService * LLMService_From_Command(const char *params_string)
Create LLMService from command string (C API)