19#include "error_handling.h"
22#if defined(_WIN32) || defined(__linux__)
23#include "archchecker.h"
29#include <libloaderapi.h>
31#define LOAD_LIB(path) LoadLibraryA(path)
32#define GET_SYM(handle, name) GetProcAddress(handle, name)
33#define CLOSE_LIB(handle) FreeLibrary(handle)
39#include <mach-o/dyld.h>
43#define LOAD_LIB(path) dlopen(path, RTLD_LAZY)
44#define GET_SYM(handle, name) dlsym(handle, name)
45#define CLOSE_LIB(handle) dlclose(handle)
53#define LLM_FUNCTIONS_LIST(M) \
54 M(LLMService_Registry, void, LLMProviderRegistry *) \
55 M(LLMService_InjectErrorState, void, ErrorState *) \
56 M(LLMService_Construct, LLMProvider *, const char *, int, int, int, bool, int, int, bool, int, const char **) \
57 M(LLMService_From_Command, LLMProvider *, const char *)
80 LLMService(
const std::string &model_path,
int num_slots = 1,
int num_threads = -1,
int num_GPU_layers = 0,
bool flash_attention =
false,
int context_size = 4096,
int batch_size = 2048,
bool embedding_only =
false,
const std::vector<std::string> &lora_paths = {});
90 static LLMService *from_command(
const std::string &command);
97 static LLMService *from_command(
int argc,
char **argv);
105 bool create_LLM_library(
const std::string &command);
131 std::string
completion_json(
const json &data, CharArrayFn callback =
nullptr,
bool callbackWithJSON =
true)
override {
return ((
LLMProvider *)llm)->completion_json(data, callback, callbackWithJSON); }
163 void start_server(
const std::string &host =
"0.0.0.0",
int port = -1,
const std::string &API_key =
"")
override { ((
LLMProvider *)llm)->start_server(host, port, API_key); }
190 void set_SSL(
const std::string &cert,
const std::string &key)
override { ((
LLMProvider *)llm)->set_SSL(cert, key); }
213#define DECLARE_FN(name, ret, ...) \
214 ret (*name)(__VA_ARGS__) = nullptr;
226 bool create_LLM_library_backend(
const std::string &command,
const std::string &llm_lib_filename);
243static std::string get_executable_directory();
248static std::string get_current_directory();
254static std::vector<std::string> get_env_library_paths(
const std::vector<std::string> &env_vars);
259static std::vector<std::string> get_search_directories();
Core LLM functionality interface and base classes.
const std::string os_library_dir()
Get OS-specific library directory.
std::vector< std::string > get_default_library_env_vars()
Get default environment variables for library paths.
#define LLM_FUNCTIONS_LIST(M)
Macro defining the list of dynamically loaded LLM functions.
#define DECLARE_FN(name, ret,...)
Declare function pointers for dynamically loaded functions.
const std::vector< std::string > available_architectures(bool gpu)
Get available architectures for the platform.
void * LibHandle
Unix library handle type.
Abstract class for LLM service providers.
Runtime loader for LLM libraries.
void stop_server() override
Stop HTTP server (override - delegates to loaded library)
void set_SSL(const std::string &cert, const std::string &key) override
Set SSL configuration (override - delegates to loaded library)
std::string debug_implementation() override
Implementation debugging.
std::string lora_weight_json(const json &data) override
Configure LoRA weights with HTTP response support.
void join_service() override
Wait for service completion (override - delegates to loaded library)
void cancel(int id_slot) override
Cancel request (override - delegates to loaded library)
bool started() override
Check service status (override - delegates to loaded library)
void start() override
Start service (override - delegates to loaded library)
std::string lora_list_json() override
List available LoRA adapters.
void logging_callback(CharArrayFn callback) override
Set logging callback (override - delegates to loaded library)
std::string tokenize_json(const json &data) override
Tokenize input (override)
std::string slot_json(const json &data) override
Manage slots with HTTP response support.
std::vector< std::string > search_paths
Library search paths.
std::string detokenize_json(const json &data) override
Convert tokens back to text.
std::string embeddings_json(const json &data) override
Generate embeddings with HTTP response support.
int get_next_available_slot() override
Get available slot (override - delegates to loaded library)
void debug(int debug_level) override
Set debug level (override - delegates to loaded library)
void join_server() override
Wait for server completion (override - delegates to loaded library)
std::string apply_template_json(const json &data) override
Apply a chat template to message data.
void start_server(const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
Start HTTP server (override - delegates to loaded library)
std::string completion_json(const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
Generate completion (override - delegates to loaded library)
void stop() override
Stop service (override - delegates to loaded library)
int embedding_size() override
Get embedding size (override - delegates to loaded library)
File with basic definitions.
const char * Available_Architectures(bool gpu)
Get available architectures (C API)