LlamaLib/LLM__service_8h_source.html

#pragma once


#include <thread>

#include <condition_variable>


#include "LLM.h"

#include "completion_processor.h"


#define LLAMALIB_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, -1, __VA_ARGS__)


struct common_params;

struct server_context;

struct server_http_context;

struct server_routes;

struct server_http_req;

struct server_http_res;


using server_http_res_ptr = std::unique_ptr<server_http_res>;

using handler_t = std::function<server_http_res_ptr(const server_http_req & req)>;


class UNDREAMAI_API LLMService : public LLMProvider

{

public:

    LLMService();


    LLMService(const std::string &model_path, int num_slots = 1, int num_threads = -1, int num_GPU_layers = 0, bool flash_attention = false, int context_size = 4096, int batch_size = 2048, bool embedding_only = false, const std::vector<std::string> &lora_paths = {});


    ~LLMService();


    static LLMService *from_params(const json &params_json);


    static LLMService *from_command(const std::string &command);


    static LLMService *from_command(int argc, char **argv);


    static std::vector<char *> jsonToArguments(const json &params_json);


    void init(int argc, char **argv);


    void init(const std::string &params_string);


    void init(const char *params_string);


    std::string get_command() { return command; }


    std::string encapsulate_route(const json &body, handler_t route_handler);


    //=================================== LLM METHODS START ===================================//


    void enable_reasoning(bool reasoning) override;


    std::string tokenize_json(const json &data) override;


    std::string detokenize_json(const json &data) override;


    std::string embeddings_json(const json &data) override;


    std::string apply_template_json(const json &data) override;


    std::string completion_json(const json &data, CharArrayFn callback = nullptr, bool callbackWithJSON = true) override;


    std::string slot_json(const json &data) override;


    std::string lora_weight_json(const json &data) override;


    std::string lora_list_json() override;


    void cancel(int id_slot) override;


    void start() override;


    bool started() override;


    void stop() override;


    void start_server(const std::string &host = "0.0.0.0", int port = -1, const std::string &API_key = "") override;


    void stop_server() override;


    void join_service() override;


    void join_server() override;


    void set_SSL(const std::string &SSL_cert, const std::string &SSL_key) override;


    int embedding_size() override;


    int get_next_available_slot() override;


    void debug(int debug_level) override;


    void logging_callback(CharArrayFn callback) override;


    std::string debug_implementation() override { return "standalone"; }

    //=================================== LLM METHODS END ===================================//


private:

    std::string command = "";

    common_params *params;

    bool llama_backend_has_init;

    server_context *ctx_server = nullptr;

    server_http_context* ctx_http = nullptr;

    server_routes* routes = nullptr;


    std::mutex start_stop_mutex;

    std::thread service_thread;

    std::condition_variable service_stopped_cv;

    bool service_stopped = false;

    std::thread server_thread;

    std::condition_variable server_stopped_cv;

    bool server_stopped = false;


    int next_available_slot = 0;


    std::vector<std::string> splitArguments(const std::string &inputString);


    const std::string detect_chat_template();


    server_http_req escape_reasoning(server_http_req prompt);

};


extern "C"

{

    UNDREAMAI_API void LLMService_Registry(LLMProviderRegistry *existing_instance);


    UNDREAMAI_API LLMService *LLMService_Construct(const char *model_path, int num_slots = 1, int num_threads = -1, int num_GPU_layers = 0, bool flash_attention = false, int context_size = 4096, int batch_size = 2048, bool embedding_only = false, int lora_count = 0, const char **lora_paths = nullptr);


    UNDREAMAI_API LLMService *LLMService_From_Command(const char *params_string);


    UNDREAMAI_API const char *LLMService_Command(LLMService *llm_service);


    UNDREAMAI_API void LLMService_InjectErrorState(ErrorState *error_state);

}


LLM.h
Core LLM functionality interface and base classes.

LLMProviderRegistry
Registry for managing LLM provider instances.
Definition LLM.h:380

LLMProvider
Abstract class for LLM service providers.
Definition LLM.h:275

LLMProvider::enable_reasoning
virtual void enable_reasoning(bool reasoning)
enable reasoning
Definition LLM.h:301

LLMService
Runtime loader for LLM libraries.
Definition LLM_runtime.h:63

LLMService::stop_server
void stop_server() override
Stop HTTP server (override)

LLMService::from_command
static LLMService * from_command(int argc, char **argv)
Create LLMService from argc/argv.

LLMService::~LLMService
~LLMService()
Destructor.

LLMService::debug_implementation
std::string debug_implementation() override
Implementation debugging.
Definition LLM_service.h:201

LLMService::lora_weight_json
std::string lora_weight_json(const json &data) override
Configure LoRA weights with HTTP response support.

LLMService::join_service
void join_service() override
Wait for service thread completion (override)

LLMService::cancel
void cancel(int id_slot) override
Cancel running request (override)

LLMService::started
bool started() override
Check service status (override)

LLMService::start
void start() override
Start the LLM service (override)

LLMService::lora_list_json
std::string lora_list_json() override
List available LoRA adapters.

LLMService::logging_callback
void logging_callback(CharArrayFn callback) override
Set logging callback (override)

LLMService::set_SSL
void set_SSL(const std::string &SSL_cert, const std::string &SSL_key) override
Configure SSL certificates (override)

LLMService::tokenize_json
std::string tokenize_json(const json &data) override
Tokenize input (override)

LLMService::LLMService
LLMService(const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
Parameterized constructor.

LLMService::LLMService
LLMService()
Default constructor.

LLMService::slot_json
std::string slot_json(const json &data) override
Manage slots with HTTP response support.

LLMService::detokenize_json
std::string detokenize_json(const json &data) override
Convert tokens back to text.

LLMService::embeddings_json
std::string embeddings_json(const json &data) override
Generate embeddings with HTTP response support.

LLMService::get_next_available_slot
int get_next_available_slot() override
Get available processing slot (override)

LLMService::debug
void debug(int debug_level) override
Set debug level (override)

LLMService::join_server
void join_server() override
Wait for server thread completion (override)

LLMService::apply_template_json
std::string apply_template_json(const json &data) override
Apply a chat template to message data.

LLMService::start_server
void start_server(const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
Start HTTP server (override)

LLMService::completion_json
std::string completion_json(const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
Generate completion (override)

LLMService::get_command
std::string get_command()
Returns the construct command.
Definition LLM_service.h:97

LLMService::from_command
static LLMService * from_command(const std::string &command)
Create LLMService from command line string.

LLMService::stop
void stop() override
Stop the LLM service (override)

LLMService::embedding_size
int embedding_size() override
Get embedding vector dimensions (override)

LLMService_Construct
LLMService * LLMService_Construct(const char *model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, int lora_count=0, const char **lora_paths=nullptr)
Construct LLMService instance (C API)
Definition LLM_service.cpp:710

LLMService_Registry
void LLMService_Registry(LLMProviderRegistry *existing_instance)
Set registry for LLMService (C API)
Definition LLM_service.cpp:705

LLMService_Command
const char * LLMService_Command(LLMService *llm_service)
Returns the construct command (C API)
Definition LLM_service.cpp:751

LLMService_From_Command
LLMService * LLMService_From_Command(const char *params_string)
Create LLMService from command string (C API)
Definition LLM_service.cpp:729

ErrorState
Error state container for sharing between libraries.
Definition error_handling.h:22