5const std::string platform_name()
9#elif defined(__linux__)
11#elif defined(__APPLE__)
12#if defined(__x86_64__)
18 std::cerr <<
"Unknown platform!" << std::endl;
25 std::vector<std::string> architectures;
27 std::string prefix =
"";
29 std::string prefix =
"lib";
33 std::string suffix =
"dll";
34#elif defined(__linux__)
35 std::string suffix =
"so";
36#elif defined(__APPLE__)
37 std::string suffix =
"dylib";
39 std::cerr <<
"Unknown platform!" << std::endl;
43 const auto add_library = [&](std::string arch)
45 std::string platform = platform_name();
46 std::string dash_arch = arch;
48 dash_arch =
"_" + dash_arch;
49 std::string path = prefix +
"llamalib_" + platform + dash_arch +
"." + suffix;
50 architectures.push_back(path);
55#if defined(_WIN32) || defined(__linux__)
56 add_library(
"cublas");
57 add_library(
"tinyblas");
59 add_library(
"vulkan");
64#if defined(_WIN32) || defined(__linux__)
66 add_library(
"avx512");
72#elif defined(__APPLE__)
74 add_library(
"no-acc");
80std::string get_current_directory()
82 return std::filesystem::current_path().string();
85std::string get_executable_directory()
89 DWORD result = GetModuleFileNameA(
nullptr, path, MAX_PATH);
90 if (result == 0 || result == MAX_PATH)
92 return get_current_directory();
94#elif defined(__APPLE__)
96 uint32_t size =
sizeof(path);
97 if (_NSGetExecutablePath(path, &size) != 0)
99 return get_current_directory();
103 ssize_t count = readlink(
"/proc/self/exe", path, PATH_MAX);
106 return get_current_directory();
110 return std::filesystem::path(path).parent_path().string();
117#elif defined(__APPLE__)
118 return {
"DYLD_LIBRARY_PATH",
"DYLD_FALLBACK_LIBRARY_PATH",
"LD_LIBRARY_PATH"};
120 return {
"LD_LIBRARY_PATH",
"LIBRARY_PATH"};
124std::vector<std::string> get_env_library_paths(
const std::vector<std::string> &env_vars)
126 std::vector<std::string> paths;
128 for (
const auto &env_var : env_vars)
130 const char *env_value = std::getenv(env_var.c_str());
134 std::string env_string(env_value);
135 if (env_string.empty())
140 const char delimiter =
';';
142 const char delimiter =
':';
145 std::stringstream ss(env_string);
146 std::string path_str;
147 while (std::getline(ss, path_str, delimiter))
149 if (!path_str.empty())
151 paths.emplace_back(path_str);
159std::vector<std::string> get_search_directories()
161 std::vector<std::string> search_paths;
163 search_paths.push_back(get_current_directory());
165 auto exe_dir = get_executable_directory();
166 search_paths.push_back(exe_dir);
168 std::string lib_folder_path = (std::filesystem::path(
"runtimes") / platform_name() /
"native").
string();
170 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
171 search_paths.push_back((std::filesystem::path(exe_dir) /
".." / lib_folder_path).string());
173 for (
const std::string &lib_folder_name : {
"lib",
"libs",
"runtimes"})
175 search_paths.push_back((std::filesystem::path(exe_dir) / lib_folder_path).string());
176 search_paths.push_back((std::filesystem::path(exe_dir) /
".." / lib_folder_path).string());
180 auto env_paths = get_env_library_paths(default_env_vars);
181 search_paths.insert(search_paths.end(), env_paths.begin(), env_paths.end());
183 std::vector<std::string> return_paths;
184 for (
const std::string &search_path : search_paths)
186 if (std::filesystem::exists(search_path))
187 return_paths.push_back(search_path);
192inline LibHandle load_library(
const char *path)
197inline void *load_symbol(
LibHandle handle,
const char *symbol)
199 return GET_SYM(handle, symbol);
202inline void unload_library(
LibHandle handle)
207LibHandle load_library_safe(
const std::string &path)
209 if (setjmp(get_jump_point()) != 0)
211 std::cerr <<
"Error loading library: " << path << std::endl;
215 LibHandle handle_out = load_library(path.c_str());
218 std::cerr <<
"Failed to load library: " << path << std::endl;
225 sigjmp_buf local_jump_point;
226 sigjmp_buf* old_jump_point = get_current_jump_point_ptr();
227 set_current_jump_point(&local_jump_point);
229 if (sigsetjmp(local_jump_point, 1) != 0)
231 std::cerr <<
"Error occurred while loading backend: " << llm_lib_filename << std::endl;
234 try { unload_library(
handle); }
catch (...) {}
238 set_current_jump_point(old_jump_point);
242 auto load_sym = [&](
auto &fn_ptr,
const char *name)
244 fn_ptr =
reinterpret_cast<std::decay_t<decltype(fn_ptr)
>>(load_symbol(
handle, name));
247 std::cerr <<
"Failed to load: " << name << std::endl;
251 std::vector<std::filesystem::path> full_paths;
252 full_paths.push_back(llm_lib_filename);
253 for (
const std::filesystem::path &search_path :
search_paths)
254 full_paths.push_back(search_path / llm_lib_filename);
257 std::cout <<
"Trying " << llm_lib_filename << std::endl;
259 bool success =
false;
260 for (
const std::filesystem::path &full_path : full_paths)
262 if (std::filesystem::exists(full_path) && std::filesystem::is_regular_file(full_path))
264 handle = load_library_safe(full_path.string());
268#define DECLARE_AND_LOAD(name, ret, ...) \
269 load_sym(this->name, #name); \
272 set_current_jump_point(old_jump_point); \
276#undef DECLARE_AND_LOAD
277 if (is_gpu_library && !LLMService_Supports_GPU())
continue;
282 if (
llm ==
nullptr || get_status_code() != 0)
284 std::cerr <<
"Failed to construct LLM (error: " << get_status_code() <<
"): " << get_status_message() << std::endl;
298 set_current_jump_point(old_jump_point);
305 std::vector<std::string> archs_gpu;
308 for (
bool is_gpu_library: {
true,
false})
310 std::vector<std::string> archs = is_gpu_library? archs_gpu: archs_cpu;
311 for (
const auto &llm_lib_filename : archs)
317 std::cout <<
"Successfully loaded: " << llm_lib_filename << std::endl;
322 std::cerr <<
"Couldn't load a backend" << std::endl;
333LLMService::LLMService(
const std::string &model_path,
int num_slots,
int num_threads,
int num_GPU_layers,
bool flash_attention,
int context_size,
int batch_size,
bool embedding_only,
const std::vector<std::string> &lora_paths)
336 std::string command =
LLM::LLM_args_to_command(model_path, num_slots, num_threads, num_GPU_layers, flash_attention, context_size, batch_size, embedding_only, lora_paths);
371 thread_local static std::string result;
373 std::ostringstream oss;
374 for (
size_t i = 0; i < llmlibs.size(); ++i)
381 return result.c_str();
void ensure_error_handlers_initialized()
Ensures error handlers are properly initialized.
Runtime loading and management of LLM libraries.
std::vector< std::string > get_default_library_env_vars()
Get default environment variables for library paths.
#define LLM_FUNCTIONS_LIST(M)
Macro defining the list of dynamically loaded LLM functions.
#define GET_SYM(handle, name)
Get symbol macro for Unix.
#define LOAD_LIB(path)
Load library macro for Unix.
#define CLOSE_LIB(handle)
Close library macro for Unix.
const std::vector< std::string > available_architectures(bool gpu)
Get available architectures for the platform.
void * LibHandle
Unix library handle type.
static ErrorState & get_error_state()
Get the error state instance.
static LLMProviderRegistry & instance()
Get the singleton registry instance.
Abstract class for LLM service providers.
Runtime loader for LLM libraries.
bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename, bool is_gpu_library=false)
Load LLM library backend.
LibHandle handle
Handle to loaded library.
LLMService()
Default constructor.
std::vector< std::string > search_paths
Library search paths.
bool create_LLM_library(const std::string &command)
Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the comm...
LLMProvider * llm
Pointer to loaded LLM provider instance.
static LLMService * from_command(const std::string &command)
Create runtime from command line string.
static std::string LLM_args_to_command(const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
Convert LLM parameters to command line arguments.
static bool has_gpu_layers(const std::string &command)
Check if command line arguments specify GPU layers.
void LLMService_Registry(LLMProviderRegistry *existing_instance)
Set registry for LLMService (C API)
const char * Available_Architectures(bool gpu)
Get available architectures (C API)
void LLM_Delete(LLMProvider *llm)
Delete LLM provider (C API)
LLMService * LLMService_From_Command(const char *params_string)
Create LLMService from command string (C API)