LlamaLib  v2.0.2
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLMService Class Reference

Runtime loader for LLM libraries. More...

#include <LLM_runtime.h>

Inheritance diagram for LLMService:
[legend]

Public Member Functions

 LLMService ()
 Default constructor.
 
 LLMService (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
 Parameterized constructor.
 
 ~LLMService ()
 Destructor.
 
bool create_LLM_library (const std::string &command)
 Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the command.
 
std::string tokenize_json (const json &data) override
 Tokenize input (override)
 
std::string detokenize_json (const json &data) override
 Convert tokens back to text.
 
std::string embeddings_json (const json &data) override
 Generate embeddings with HTTP response support.
 
std::string completion_json (const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
 Generate completion (override - delegates to loaded library)
 
std::string apply_template_json (const json &data) override
 Apply a chat template to message data.
 
void cancel (int id_slot) override
 Cancel request (override - delegates to loaded library)
 
std::string lora_weight_json (const json &data) override
 Configure LoRA weights with HTTP response support.
 
std::string lora_list_json () override
 List available LoRA adapters.
 
std::string slot_json (const json &data) override
 Manage slots with HTTP response support.
 
void start_server (const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
 Start HTTP server (override - delegates to loaded library)
 
void stop_server () override
 Stop HTTP server (override - delegates to loaded library)
 
void start () override
 Start service (override - delegates to loaded library)
 
bool started () override
 Check service status (override - delegates to loaded library)
 
void stop () override
 Stop service (override - delegates to loaded library)
 
void join_service () override
 Wait for service completion (override - delegates to loaded library)
 
void join_server () override
 Wait for server completion (override - delegates to loaded library)
 
void set_SSL (const std::string &cert, const std::string &key) override
 Set SSL configuration (override - delegates to loaded library)
 
int embedding_size () override
 Get embedding size (override - delegates to loaded library)
 
int get_next_available_slot () override
 Get available slot (override - delegates to loaded library)
 
void debug (int debug_level) override
 Set debug level (override - delegates to loaded library)
 
void logging_callback (CharArrayFn callback) override
 Set logging callback (override - delegates to loaded library)
 
std::string debug_implementation () override
 Implementation debugging.
 
 LLMService ()
 Default constructor.
 
 LLMService (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
 Parameterized constructor.
 
 ~LLMService ()
 Destructor.
 
void init (int argc, char **argv)
 Initialize from argc/argv parameters.
 
void init (const std::string &params_string)
 Initialize from parameter string.
 
void init (const char *params_string)
 Initialize from C-style parameter string.
 
std::string get_command ()
 Returns the construct command.
 
std::string encapsulate_route (const json &body, handler_t route_handler)
 
void enable_reasoning (bool reasoning) override
 enable reasoning
 
std::string tokenize_json (const json &data) override
 Tokenize input (override)
 
std::string detokenize_json (const json &data) override
 Convert tokens back to text.
 
std::string embeddings_json (const json &data) override
 Generate embeddings with HTTP response support.
 
std::string apply_template_json (const json &data) override
 Apply a chat template to message data.
 
std::string completion_json (const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
 Generate completion (override)
 
std::string slot_json (const json &data) override
 Manage slots with HTTP response support.
 
std::string lora_weight_json (const json &data) override
 Configure LoRA weights with HTTP response support.
 
std::string lora_list_json () override
 List available LoRA adapters.
 
void cancel (int id_slot) override
 Cancel running request (override)
 
void start () override
 Start the LLM service (override)
 
bool started () override
 Check service status (override)
 
void stop () override
 Stop the LLM service (override)
 
void start_server (const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
 Start HTTP server (override)
 
void stop_server () override
 Stop HTTP server (override)
 
void join_service () override
 Wait for service thread completion (override)
 
void join_server () override
 Wait for server thread completion (override)
 
void set_SSL (const std::string &SSL_cert, const std::string &SSL_key) override
 Configure SSL certificates (override)
 
int embedding_size () override
 Get embedding vector dimensions (override)
 
int get_next_available_slot () override
 Get available processing slot (override)
 
void debug (int debug_level) override
 Set debug level (override)
 
void logging_callback (CharArrayFn callback) override
 Set logging callback (override)
 
std::string debug_implementation () override
 Implementation debugging.
 
- Public Member Functions inherited from LLMProvider
virtual ~LLMProvider ()
 Virtual destructor.
 
virtual bool lora_weight (const std::vector< LoraIdScale > &loras)
 Configure LoRA weights.
 
virtual std::vector< LoraIdScalePathlora_list ()
 List available LoRA adapters.
 
virtual void logging_stop ()
 Stop logging.
 
- Public Member Functions inherited from LLMLocal
virtual std::string save_slot (int id_slot, const std::string &filepath)
 Save slot state to file.
 
virtual std::string load_slot (int id_slot, const std::string &filepath)
 Load slot state from file.
 
- Public Member Functions inherited from LLM
virtual ~LLM ()=default
 Virtual destructor.
 
virtual std::vector< int > tokenize (const std::string &query)
 Tokenize text.
 
virtual std::string detokenize (const std::vector< int32_t > &tokens)
 Convert tokens to text.
 
virtual std::vector< float > embeddings (const std::string &query)
 Generate embeddings.
 
virtual void set_completion_params (json completion_params_)
 Set completion parameters.
 
virtual std::string get_completion_params ()
 Get current completion parameters.
 
virtual std::string completion (const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
 Generate completion.
 
virtual void set_grammar (std::string grammar_)
 Set grammar for constrained generation.
 
virtual std::string get_grammar ()
 Get current grammar specification.
 
virtual std::string apply_template (const json &messages)
 Apply template to messages.
 

Static Public Member Functions

static LLMServicefrom_command (const std::string &command)
 Create runtime from command line string.
 
static LLMServicefrom_command (int argc, char **argv)
 Create runtime from argc/argv.
 
static LLMServicefrom_params (const json &params_json)
 Create LLMService from JSON parameters.
 
static LLMServicefrom_command (const std::string &command)
 Create LLMService from command line string.
 
static LLMServicefrom_command (int argc, char **argv)
 Create LLMService from argc/argv.
 
static std::vector< char * > jsonToArguments (const json &params_json)
 Convert JSON parameters to command line arguments.
 
- Static Public Member Functions inherited from LLM
static bool has_gpu_layers (const std::string &command)
 Check if command line arguments specify GPU layers.
 
static std::string LLM_args_to_command (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
 Convert LLM parameters to command line arguments.
 

Public Attributes

LibHandle handle = nullptr
 Handle to loaded library.
 
LLMProviderllm = nullptr
 Pointer to loaded LLM provider instance.
 
- Public Attributes inherited from LLM
int32_t n_keep = 0
 Number of tokens to keep from the beginning of the context.
 
std::string grammar = ""
 Grammar specification in GBNF format or JSON schema.
 
json completion_params
 JSON object containing completion parameters.
 

Protected Member Functions

bool create_LLM_library_backend (const std::string &command, const std::string &llm_lib_filename)
 Load LLM library backend.
 
- Protected Member Functions inherited from LLMProvider
virtual bool parse_lora_weight_json (const json &result)
 Parse LoRA weight configuration result.
 
virtual json build_lora_weight_json (const std::vector< LoraIdScale > &loras)
 Build JSON for LoRA weight configuration.
 
virtual std::vector< LoraIdScalePathparse_lora_list_json (const json &result)
 Parse LoRA list result.
 
virtual json build_lora_list_json (const std::vector< LoraIdScalePath > &loras)
 Build JSON for LoRA list result.
 
- Protected Member Functions inherited from LLMLocal
virtual std::string slot (int id_slot, const std::string &action, const std::string &filepath)
 Perform slot operation.
 
virtual json build_slot_json (int id_slot, const std::string &action, const std::string &filepath)
 Build JSON for slot operations.
 
virtual std::string parse_slot_json (const json &result)
 Parse slot operation result.
 
- Protected Member Functions inherited from LLM
virtual json build_apply_template_json (const json &messages)
 Build JSON for template application.
 
virtual std::string parse_apply_template_json (const json &result)
 Parse template application result.
 
virtual json build_tokenize_json (const std::string &query)
 Build JSON for tokenization.
 
virtual std::vector< int > parse_tokenize_json (const json &result)
 Parse tokenization result.
 
virtual json build_detokenize_json (const std::vector< int32_t > &tokens)
 Build JSON for detokenization.
 
virtual std::string parse_detokenize_json (const json &result)
 Parse detokenization result.
 
virtual json build_embeddings_json (const std::string &query)
 Build JSON for embeddings generation.
 
virtual std::vector< float > parse_embeddings_json (const json &result)
 Parse embeddings result.
 
virtual json build_completion_json (const std::string &prompt, int id_slot=-1)
 Build JSON for completion generation.
 
virtual std::string parse_completion_json (const json &result)
 Parse completion result.
 

Protected Attributes

std::vector< std::string > search_paths
 Library search paths.
 
- Protected Attributes inherited from LLMProvider
bool reasoning_enabled = false
 Whether reasoning is enabled.
 

Detailed Description

Runtime loader for LLM libraries.

Concrete implementation of LLMProvider with server capabilities.

This class provides dynamic loading of LLM backend libraries, allowing for flexible deployment and architecture-specific optimizations

This class provides a full-featured LLM service with HTTP server, parameter configuration, and backend integration with llama.cpp

Definition at line 62 of file LLM_runtime.h.

Constructor & Destructor Documentation

◆ LLMService() [1/4]

LLMService::LLMService ( )

Default constructor.

Creates an uninitialized runtime that must load a library before use

Definition at line 315 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ LLMService() [2/4]

LLMService::LLMService ( const std::string & model_path,
int num_slots = 1,
int num_threads = -1,
int num_GPU_layers = 0,
bool flash_attention = false,
int context_size = 4096,
int batch_size = 2048,
bool embedding_only = false,
const std::vector< std::string > & lora_paths = {} )

Parameterized constructor.

Parameters
model_pathPath to the model file
num_threadsNumber of CPU threads (-1 for auto-detection)
num_GPU_layersNumber of layers to offload to GPU
num_slotsNumber of parallel slots
flash_attentionWhether to enable flash attention optimization
context_sizeMaximum context length in tokens
batch_sizeProcessing batch size
embedding_onlyWhether to run in embedding-only mode
lora_pathsVector of paths to LoRA adapter files

Creates and initializes a runtime with the specified parameters

Definition at line 320 of file LLM_runtime.cpp.

◆ ~LLMService()

LLMService::~LLMService ( )

Destructor.

Definition at line 339 of file LLM_runtime.cpp.

◆ LLMService() [3/4]

LLMService::LLMService ( )

Default constructor.

Creates an uninitialized LLMService that must be configured before use

◆ LLMService() [4/4]

LLMService::LLMService ( const std::string & model_path,
int num_slots = 1,
int num_threads = -1,
int num_GPU_layers = 0,
bool flash_attention = false,
int context_size = 4096,
int batch_size = 2048,
bool embedding_only = false,
const std::vector< std::string > & lora_paths = {} )

Parameterized constructor.

Parameters
model_pathPath to the model file
num_threadsNumber of CPU threads (-1 for auto-detection)
num_GPU_layersNumber of layers to offload to GPU
num_slotsNumber of parallel processing sequences
flash_attentionWhether to enable flash attention optimization
context_sizeMaximum context length in tokens
batch_sizeProcessing batch size
embedding_onlyWhether to run in embedding-only mode
lora_pathsVector of paths to LoRA adapter files

Member Function Documentation

◆ apply_template_json() [1/2]

std::string LLMService::apply_template_json ( const json & data)
inlineoverridevirtual

Apply a chat template to message data.

Parameters
dataJSON object containing messages to format
Returns
Formatted string with template applied

Pure virtual method for applying chat templates to conversation data

Implements LLM.

Definition at line 137 of file LLM_runtime.h.

◆ apply_template_json() [2/2]

std::string LLMService::apply_template_json ( const json & data)
overridevirtual

Apply a chat template to message data.

Parameters
dataJSON object containing messages to format
Returns
Formatted string with template applied

Pure virtual method for applying chat templates to conversation data

Implements LLM.

◆ cancel() [1/2]

void LLMService::cancel ( int id_slot)
inlineoverridevirtual

Cancel request (override - delegates to loaded library)

Parameters
dataJSON cancellation request

Implements LLMLocal.

Definition at line 141 of file LLM_runtime.h.

◆ cancel() [2/2]

void LLMService::cancel ( int id_slot)
overridevirtual

Cancel running request (override)

Parameters
dataJSON object with cancellation parameters

Implements LLMLocal.

◆ completion_json() [1/2]

std::string LLMService::completion_json ( const json & data,
CharArrayFn callback = nullptr,
bool callbackWithJSON = true )
inlineoverridevirtual

Generate completion (override - delegates to loaded library)

Parameters
dataJSON completion request
callbackOptional streaming callback
callbackWithJSONWhether callback uses JSON
Returns
Generated completion

Implements LLM.

Definition at line 131 of file LLM_runtime.h.

◆ completion_json() [2/2]

std::string LLMService::completion_json ( const json & data,
CharArrayFn callback = nullptr,
bool callbackWithJSON = true )
overridevirtual

Generate completion (override)

Parameters
dataJSON object with prompt and parameters
callbackOptional streaming callback function
callbackWithJSONWhether callback receives JSON format
Returns
Generated completion text or JSON

Implements LLM.

◆ create_LLM_library()

bool LLMService::create_LLM_library ( const std::string & command)

Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the command.

Parameters
commandCommand string containing model path and parameters
Returns
true if library loaded successfully, false otherwise

Definition at line 296 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ create_LLM_library_backend()

bool LLMService::create_LLM_library_backend ( const std::string & command,
const std::string & llm_lib_filename )
protected

Load LLM library backend.

Parameters
commandCommand string with parameters
llm_lib_filenameSpecific library filename to load
Returns
true if library loaded successfully, false otherwise

Internal method for loading specific library files

Definition at line 218 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ debug() [1/2]

void LLMService::debug ( int debug_level)
inlineoverridevirtual

Set debug level (override - delegates to loaded library)

Parameters
debug_levelDebug verbosity level

Implements LLMProvider.

Definition at line 202 of file LLM_runtime.h.

Here is the caller graph for this function:

◆ debug() [2/2]

void LLMService::debug ( int debug_level)
overridevirtual

Set debug level (override)

Parameters
debug_levelDebug verbosity level

Implements LLMProvider.

◆ debug_implementation() [1/2]

std::string LLMService::debug_implementation ( )
inlineoverridevirtual

Implementation debugging.

Returns
"standalore" or "runtime_detection" according to the implementation

Implements LLMProvider.

Definition at line 208 of file LLM_runtime.h.

◆ debug_implementation() [2/2]

std::string LLMService::debug_implementation ( )
inlineoverridevirtual

Implementation debugging.

Returns
"standalore" or "runtime_detection" according to the implementation

Implements LLMProvider.

Definition at line 201 of file LLM_service.h.

◆ detokenize_json() [1/2]

std::string LLMService::detokenize_json ( const json & data)
inlineoverridevirtual

Convert tokens back to text.

Parameters
dataJSON object containing token IDs
Returns
JSON string containing detokenized text

Pure virtual method for converting token sequences back to text

Implements LLM.

Definition at line 118 of file LLM_runtime.h.

◆ detokenize_json() [2/2]

std::string LLMService::detokenize_json ( const json & data)
overridevirtual

Convert tokens back to text.

Parameters
dataJSON object containing token IDs
Returns
JSON string containing detokenized text

Pure virtual method for converting token sequences back to text

Implements LLM.

◆ embedding_size() [1/2]

int LLMService::embedding_size ( )
inlineoverridevirtual

Get embedding size (override - delegates to loaded library)

Returns
Number of embedding dimensions

Implements LLMProvider.

Definition at line 194 of file LLM_runtime.h.

◆ embedding_size() [2/2]

int LLMService::embedding_size ( )
overridevirtual

Get embedding vector dimensions (override)

Returns
Number of dimensions in embedding vectors

Implements LLMProvider.

◆ embeddings_json() [1/2]

std::string LLMService::embeddings_json ( const json & data)
inlineoverridevirtual

Generate embeddings with HTTP response support.

Parameters
dataJSON object containing embedding request
Returns
JSON string with embedding data

Protected method used internally for server-based embedding generation

Implements LLM.

Definition at line 124 of file LLM_runtime.h.

◆ embeddings_json() [2/2]

std::string LLMService::embeddings_json ( const json & data)
overridevirtual

Generate embeddings with HTTP response support.

Parameters
dataJSON object containing embedding request
Returns
JSON string with embedding data

Protected method used internally for server-based embedding generation

Implements LLM.

◆ enable_reasoning()

void LLMService::enable_reasoning ( bool reasoning)
overridevirtual

enable reasoning

Parameters
reasoningwhether to enable reasoning

Reimplemented from LLMProvider.

Definition at line 262 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ encapsulate_route()

std::string LLMService::encapsulate_route ( const json & body,
handler_t route_handler )

Definition at line 520 of file LLM_service.cpp.

◆ from_command() [1/4]

LLMService * LLMService::from_command ( const std::string & command)
static

Create runtime from command line string.

Parameters
commandCommand line argument string
Returns
Pointer to newly created LLMService instance

Factory method for creating runtime instances from command arguments. See https://github.com/ggml-org/llama.cpp/tree/master/tools/server#usage for arguments.

Definition at line 327 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ from_command() [2/4]

static LLMService * LLMService::from_command ( const std::string & command)
static

Create LLMService from command line string.

Parameters
commandCommand line argument string
Returns
Pointer to newly created LLMService instance

Factory method for creating instances from command line arguments See https://github.com/ggml-org/llama.cpp/tree/master/tools/server#usage for arguments.

◆ from_command() [3/4]

LLMService * LLMService::from_command ( int argc,
char ** argv )
static

Create runtime from argc/argv.

Parameters
argcArgument count
argvArgument vector
Returns
Pointer to newly created LLMService instance

Factory method for creating runtime instances from main() parameters

Definition at line 334 of file LLM_runtime.cpp.

◆ from_command() [4/4]

static LLMService * LLMService::from_command ( int argc,
char ** argv )
static

Create LLMService from argc/argv.

Parameters
argcArgument count
argvArgument vector
Returns
Pointer to newly created LLMService instance

Factory method for creating instances from standard main() parameters

◆ from_params()

LLMService * LLMService::from_params ( const json & params_json)
static

Create LLMService from JSON parameters.

Parameters
params_jsonJSON object containing initialization parameters
Returns
Pointer to newly created LLMService instance

Factory method for creating instances from structured parameter data See https://github.com/ggml-org/llama.cpp/tree/master/tools/server#usage for arguments.

Definition at line 22 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ get_command()

std::string LLMService::get_command ( )
inline

Returns the construct command.

Definition at line 97 of file LLM_service.h.

Here is the caller graph for this function:

◆ get_next_available_slot() [1/2]

int LLMService::get_next_available_slot ( )
inlineoverridevirtual

Get available slot (override - delegates to loaded library)

Returns
Available slot ID

Implements LLMLocal.

Definition at line 198 of file LLM_runtime.h.

◆ get_next_available_slot() [2/2]

int LLMService::get_next_available_slot ( )
overridevirtual

Get available processing slot (override)

Returns
Available slot ID or -1 if none available

Implements LLMLocal.

◆ init() [1/3]

void LLMService::init ( const char * params_string)

Initialize from C-style parameter string.

Parameters
params_stringC-style string containing parameters

C-compatible version of string parameter initialization

Definition at line 175 of file LLM_service.cpp.

◆ init() [2/3]

void LLMService::init ( const std::string & params_string)

Initialize from parameter string.

Parameters
params_stringString containing space-separated parameters

Initialize the service by parsing a parameter string

Definition at line 160 of file LLM_service.cpp.

◆ init() [3/3]

void LLMService::init ( int argc,
char ** argv )

Initialize from argc/argv parameters.

Parameters
argcArgument count
argvArgument vector

Initialize the service with command line style parameters

Definition at line 180 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ join_server() [1/2]

void LLMService::join_server ( )
inlineoverridevirtual

Wait for server completion (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 185 of file LLM_runtime.h.

◆ join_server() [2/2]

void LLMService::join_server ( )
overridevirtual

Wait for server thread completion (override)

Implements LLMProvider.

◆ join_service() [1/2]

void LLMService::join_service ( )
inlineoverridevirtual

Wait for service completion (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 182 of file LLM_runtime.h.

◆ join_service() [2/2]

void LLMService::join_service ( )
overridevirtual

Wait for service thread completion (override)

Implements LLMProvider.

◆ jsonToArguments()

std::vector< char * > LLMService::jsonToArguments ( const json & params_json)
static

Convert JSON parameters to command line arguments.

Parameters
params_jsonJSON object with parameters
Returns
Vector of C-style argument strings

Utility function for converting structured parameters to argv format

Definition at line 58 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ logging_callback() [1/2]

void LLMService::logging_callback ( CharArrayFn callback)
inlineoverridevirtual

Set logging callback (override - delegates to loaded library)

Parameters
callbackFunction to receive log messages

Implements LLMProvider.

Definition at line 206 of file LLM_runtime.h.

Here is the caller graph for this function:

◆ logging_callback() [2/2]

void LLMService::logging_callback ( CharArrayFn callback)
overridevirtual

Set logging callback (override)

Parameters
callbackFunction to receive log messages

Implements LLMProvider.

◆ lora_list_json() [1/2]

std::string LLMService::lora_list_json ( )
inlineoverridevirtual

List available LoRA adapters.

Returns
JSON string containing list of available LoRA adapters

Implements LLMProvider.

Definition at line 151 of file LLM_runtime.h.

◆ lora_list_json() [2/2]

std::string LLMService::lora_list_json ( )
overridevirtual

List available LoRA adapters.

Returns
JSON string containing list of available LoRA adapters

Implements LLMProvider.

◆ lora_weight_json() [1/2]

std::string LLMService::lora_weight_json ( const json & data)
inlineoverridevirtual

Configure LoRA weights with HTTP response support.

Parameters
dataJSON object with LoRA configuration
Returns
JSON response string

Protected method used internally for server-based LoRA configuration

Implements LLMProvider.

Definition at line 147 of file LLM_runtime.h.

◆ lora_weight_json() [2/2]

std::string LLMService::lora_weight_json ( const json & data)
overridevirtual

Configure LoRA weights with HTTP response support.

Parameters
dataJSON object with LoRA configuration
Returns
JSON response string

Protected method used internally for server-based LoRA configuration

Implements LLMProvider.

◆ set_SSL() [1/2]

void LLMService::set_SSL ( const std::string & cert,
const std::string & key )
inlineoverridevirtual

Set SSL configuration (override - delegates to loaded library)

Parameters
certSSL certificate path
keySSL private key path

Implements LLMProvider.

Definition at line 190 of file LLM_runtime.h.

◆ set_SSL() [2/2]

void LLMService::set_SSL ( const std::string & SSL_cert,
const std::string & SSL_key )
overridevirtual

Configure SSL certificates (override)

Parameters
SSL_certPath to SSL certificate file
SSL_keyPath to SSL private key file

Implements LLMProvider.

◆ slot_json() [1/2]

std::string LLMService::slot_json ( const json & data)
inlineoverridevirtual

Manage slots with HTTP response support.

Parameters
dataJSON object with slot operation
Returns
JSON response string

Protected method used internally for server-based slot management

Implements LLMLocal.

Definition at line 157 of file LLM_runtime.h.

◆ slot_json() [2/2]

std::string LLMService::slot_json ( const json & data)
overridevirtual

Manage slots with HTTP response support.

Parameters
dataJSON object with slot operation
Returns
JSON response string

Protected method used internally for server-based slot management

Implements LLMLocal.

◆ start() [1/2]

void LLMService::start ( )
inlineoverridevirtual

Start service (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 169 of file LLM_runtime.h.

◆ start() [2/2]

void LLMService::start ( )
overridevirtual

Start the LLM service (override)

Implements LLMProvider.

◆ start_server() [1/2]

void LLMService::start_server ( const std::string & host = "0.0.0.0",
int port = -1,
const std::string & API_key = "" )
inlineoverridevirtual

Start HTTP server (override - delegates to loaded library)

Parameters
hostHost address (default: "0.0.0.0")
portPort number (0 for auto)
API_keyOptional API key

Implements LLMProvider.

Definition at line 163 of file LLM_runtime.h.

◆ start_server() [2/2]

void LLMService::start_server ( const std::string & host = "0.0.0.0",
int port = -1,
const std::string & API_key = "" )
overridevirtual

Start HTTP server (override)

Parameters
hostHost address to bind (default: "0.0.0.0")
portPort number (0 for auto-selection)
API_keyOptional API key for authentication

Implements LLMProvider.

◆ started() [1/2]

bool LLMService::started ( )
inlineoverridevirtual

Check service status (override - delegates to loaded library)

Returns
true if started, false otherwise

Implements LLMProvider.

Definition at line 173 of file LLM_runtime.h.

◆ started() [2/2]

bool LLMService::started ( )
overridevirtual

Check service status (override)

Returns
true if service is running, false otherwise

Implements LLMProvider.

◆ stop() [1/2]

void LLMService::stop ( )
inlineoverridevirtual

Stop service (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 176 of file LLM_runtime.h.

◆ stop() [2/2]

void LLMService::stop ( )
overridevirtual

Stop the LLM service (override)

Implements LLMProvider.

◆ stop_server() [1/2]

void LLMService::stop_server ( )
inlineoverridevirtual

Stop HTTP server (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 166 of file LLM_runtime.h.

◆ stop_server() [2/2]

void LLMService::stop_server ( )
overridevirtual

Stop HTTP server (override)

Implements LLMProvider.

◆ tokenize_json() [1/2]

std::string LLMService::tokenize_json ( const json & data)
inlineoverridevirtual

Tokenize input (override)

Parameters
dataJSON object containing text to tokenize
Returns
JSON string with token data

Implements LLM.

Definition at line 112 of file LLM_runtime.h.

◆ tokenize_json() [2/2]

std::string LLMService::tokenize_json ( const json & data)
overridevirtual

Tokenize input (override)

Parameters
dataJSON object containing text to tokenize
Returns
JSON string with token data

Implements LLM.

Member Data Documentation

◆ handle

LibHandle LLMService::handle = nullptr

Handle to loaded library.

Definition at line 99 of file LLM_runtime.h.

◆ llm

LLMProvider* LLMService::llm = nullptr

Pointer to loaded LLM provider instance.

Definition at line 100 of file LLM_runtime.h.

◆ search_paths

std::vector<std::string> LLMService::search_paths
protected

Library search paths.

Definition at line 219 of file LLM_runtime.h.


The documentation for this class was generated from the following files: