LlamaLib/LLM__runtime_8h_source.html

#pragma once


#include <fstream>

#include <sstream>

#include <vector>

#include <iostream>

#include <setjmp.h>

#include <type_traits>

#include <algorithm>

#include <cstdlib>


#include "defs.h"

#include "error_handling.h"

#include "LLM.h"


#if defined(_WIN32) || defined(__linux__)

#include "archchecker.h"

#endif


// Platform-specific library loading definitions

#if defined(_WIN32)

#include <windows.h>

#include <libloaderapi.h>

using LibHandle = HMODULE;

#define LOAD_LIB(path) LoadLibraryA(path)

#define GET_SYM(handle, name) GetProcAddress(handle, name)

#define CLOSE_LIB(handle) FreeLibrary(handle)

#else

#include <dlfcn.h>

#include <unistd.h>

#include <limits.h>

#ifdef __APPLE__

#include <mach-o/dyld.h>

#endif


using LibHandle = void *;

#define LOAD_LIB(path) dlopen(path, RTLD_LAZY)

#define GET_SYM(handle, name) dlsym(handle, name)

#define CLOSE_LIB(handle) dlclose(handle)

#endif


//=================================== FUNCTION LISTS ===================================//


#define LLM_FUNCTIONS_LIST(M)                                                                                     \

    M(LLMService_Registry, void, LLMProviderRegistry *)                                                           \

    M(LLMService_InjectErrorState, void, ErrorState *)                                                            \

    M(LLMService_Construct, LLMProvider *, const char *, int, int, int, bool, int, int, bool, int, const char **) \

    M(LLMService_From_Command, LLMProvider *, const char *)


class UNDREAMAI_API LLMService : public LLMProvider

{

public:

    LLMService();


    LLMService(const std::string &model_path, int num_slots = 1, int num_threads = -1, int num_GPU_layers = 0, bool flash_attention = false, int context_size = 4096, int batch_size = 2048, bool embedding_only = false, const std::vector<std::string> &lora_paths = {});


    ~LLMService();


    static LLMService *from_command(const std::string &command);


    static LLMService *from_command(int argc, char **argv);


    LibHandle handle = nullptr;

    LLMProvider *llm = nullptr;


    bool create_LLM_library(const std::string &command);


    //=================================== LLM METHODS START ===================================//


    std::string tokenize_json(const json &data) override { return ((LLMProvider *)llm)->tokenize_json(data); }


    std::string detokenize_json(const json &data) override { return ((LLMProvider *)llm)->detokenize_json(data); }


    std::string embeddings_json(const json &data) override { return ((LLMProvider *)llm)->embeddings_json(data); }


    std::string completion_json(const json &data, CharArrayFn callback = nullptr, bool callbackWithJSON = true) override { return ((LLMProvider *)llm)->completion_json(data, callback, callbackWithJSON); }


    std::string apply_template_json(const json &data) override { return ((LLMProvider *)llm)->apply_template_json(data); }


    void cancel(int id_slot) override { ((LLMProvider *)llm)->cancel(id_slot); }


    std::string lora_weight_json(const json &data) override { return ((LLMProvider *)llm)->lora_weight_json(data); };


    std::string lora_list_json() override { return ((LLMProvider *)llm)->lora_list_json(); }


    std::string slot_json(const json &data) override { return ((LLMProvider *)llm)->slot_json(data); }


    void start_server(const std::string &host = "0.0.0.0", int port = -1, const std::string &API_key = "") override { ((LLMProvider *)llm)->start_server(host, port, API_key); }


    void stop_server() override { ((LLMProvider *)llm)->stop_server(); }


    void start() override { ((LLMProvider *)llm)->start(); }


    bool started() override { return ((LLMProvider *)llm)->started(); }


    void stop() override

    {

        ((LLMProvider *)llm)->stop();

    }


    void join_service() override { ((LLMProvider *)llm)->join_service(); }


    void join_server() override { ((LLMProvider *)llm)->join_server(); }


    void set_SSL(const std::string &cert, const std::string &key) override { ((LLMProvider *)llm)->set_SSL(cert, key); }


    int embedding_size() override { return ((LLMProvider *)llm)->embedding_size(); }


    int get_next_available_slot() override { return ((LLMProvider *)llm)->get_next_available_slot(); }


    void debug(int debug_level) override { ((LLMProvider *)llm)->debug(debug_level); }


    void logging_callback(CharArrayFn callback) override { ((LLMProvider *)llm)->logging_callback(callback); }


    std::string debug_implementation() override { return "runtime_detection"; }

    //=================================== LLM METHODS END ===================================//


#define DECLARE_FN(name, ret, ...) \

    ret (*name)(__VA_ARGS__) = nullptr;


    LLM_FUNCTIONS_LIST(DECLARE_FN)

#undef DECLARE_FN


protected:

    std::vector<std::string> search_paths;


    bool create_LLM_library_backend(const std::string &command, const std::string &llm_lib_filename);

};


const std::string os_library_dir();


const std::vector<std::string> available_architectures(bool gpu);


static std::string get_executable_directory();


static std::string get_current_directory();


static std::vector<std::string> get_env_library_paths(const std::vector<std::string> &env_vars);


static std::vector<std::string> get_search_directories();


std::vector<std::string> get_default_library_env_vars();


//=================================== EXTERNAL API ===================================//


extern "C"

{

    UNDREAMAI_API const char *Available_Architectures(bool gpu);

}


LLM.h
Core LLM functionality interface and base classes.

os_library_dir
const std::string os_library_dir()
Get OS-specific library directory.

get_default_library_env_vars
std::vector< std::string > get_default_library_env_vars()
Get default environment variables for library paths.
Definition LLM_runtime.cpp:108

LLM_FUNCTIONS_LIST
#define LLM_FUNCTIONS_LIST(M)
Macro defining the list of dynamically loaded LLM functions.
Definition LLM_runtime.h:53

DECLARE_FN
#define DECLARE_FN(name, ret,...)
Declare function pointers for dynamically loaded functions.
Definition LLM_runtime.h:213

available_architectures
const std::vector< std::string > available_architectures(bool gpu)
Get available architectures for the platform.
Definition LLM_runtime.cpp:23

LibHandle
void * LibHandle
Unix library handle type.
Definition LLM_runtime.h:42

LLMProvider
Abstract class for LLM service providers.
Definition LLM.h:275

LLMService
Runtime loader for LLM libraries.
Definition LLM_runtime.h:63

LLMService::stop_server
void stop_server() override
Stop HTTP server (override - delegates to loaded library)
Definition LLM_runtime.h:166

LLMService::set_SSL
void set_SSL(const std::string &cert, const std::string &key) override
Set SSL configuration (override - delegates to loaded library)
Definition LLM_runtime.h:190

LLMService::debug_implementation
std::string debug_implementation() override
Implementation debugging.
Definition LLM_runtime.h:208

LLMService::lora_weight_json
std::string lora_weight_json(const json &data) override
Configure LoRA weights with HTTP response support.
Definition LLM_runtime.h:147

LLMService::join_service
void join_service() override
Wait for service completion (override - delegates to loaded library)
Definition LLM_runtime.h:182

LLMService::cancel
void cancel(int id_slot) override
Cancel request (override - delegates to loaded library)
Definition LLM_runtime.h:141

LLMService::started
bool started() override
Check service status (override - delegates to loaded library)
Definition LLM_runtime.h:173

LLMService::start
void start() override
Start service (override - delegates to loaded library)
Definition LLM_runtime.h:169

LLMService::lora_list_json
std::string lora_list_json() override
List available LoRA adapters.
Definition LLM_runtime.h:151

LLMService::logging_callback
void logging_callback(CharArrayFn callback) override
Set logging callback (override - delegates to loaded library)
Definition LLM_runtime.h:206

LLMService::tokenize_json
std::string tokenize_json(const json &data) override
Tokenize input (override)
Definition LLM_runtime.h:112

LLMService::slot_json
std::string slot_json(const json &data) override
Manage slots with HTTP response support.
Definition LLM_runtime.h:157

LLMService::search_paths
std::vector< std::string > search_paths
Library search paths.
Definition LLM_runtime.h:219

LLMService::detokenize_json
std::string detokenize_json(const json &data) override
Convert tokens back to text.
Definition LLM_runtime.h:118

LLMService::embeddings_json
std::string embeddings_json(const json &data) override
Generate embeddings with HTTP response support.
Definition LLM_runtime.h:124

LLMService::get_next_available_slot
int get_next_available_slot() override
Get available slot (override - delegates to loaded library)
Definition LLM_runtime.h:198

LLMService::debug
void debug(int debug_level) override
Set debug level (override - delegates to loaded library)
Definition LLM_runtime.h:202

LLMService::join_server
void join_server() override
Wait for server completion (override - delegates to loaded library)
Definition LLM_runtime.h:185

LLMService::apply_template_json
std::string apply_template_json(const json &data) override
Apply a chat template to message data.
Definition LLM_runtime.h:137

LLMService::start_server
void start_server(const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
Start HTTP server (override - delegates to loaded library)
Definition LLM_runtime.h:163

LLMService::completion_json
std::string completion_json(const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
Generate completion (override - delegates to loaded library)
Definition LLM_runtime.h:131

LLMService::stop
void stop() override
Stop service (override - delegates to loaded library)
Definition LLM_runtime.h:176

LLMService::embedding_size
int embedding_size() override
Get embedding size (override - delegates to loaded library)
Definition LLM_runtime.h:194

defs.h
File with basic definitions.

Available_Architectures
const char * Available_Architectures(bool gpu)
Get available architectures (C API)
Definition LLM_runtime.cpp:355