Runtime loader for LLM libraries. More...

#include <LLM_runtime.h>

Inheritance diagram for LLMService:

Public Member Functions
	LLMService ()
	Default constructor.

	LLMService (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
	Parameterized constructor.

	~LLMService ()
	Destructor.

bool	create_LLM_library (const std::string &command)
	Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the command.

std::string	tokenize_json (const json &data) override
	Tokenize input (override)

std::string	detokenize_json (const json &data) override
	Convert tokens back to text.

std::string	embeddings_json (const json &data) override
	Generate embeddings with HTTP response support.

std::string	completion_json (const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
	Generate completion (override - delegates to loaded library)

std::string	apply_template_json (const json &data) override
	Apply a chat template to message data.

void	cancel (int id_slot) override
	Cancel request (override - delegates to loaded library)

std::string	lora_weight_json (const json &data) override
	Configure LoRA weights with HTTP response support.

std::string	lora_list_json () override
	List available LoRA adapters.

std::string	slot_json (const json &data) override
	Manage slots with HTTP response support.

void	start_server (const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
	Start HTTP server (override - delegates to loaded library)

void	stop_server () override
	Stop HTTP server (override - delegates to loaded library)

void	start () override
	Start service (override - delegates to loaded library)

bool	started () override
	Check service status (override - delegates to loaded library)

void	stop () override
	Stop service (override - delegates to loaded library)

void	join_service () override
	Wait for service completion (override - delegates to loaded library)

void	join_server () override
	Wait for server completion (override - delegates to loaded library)

void	set_SSL (const std::string &cert, const std::string &key) override
	Set SSL configuration (override - delegates to loaded library)

int	embedding_size () override
	Get embedding size (override - delegates to loaded library)

int	get_next_available_slot () override
	Get available slot (override - delegates to loaded library)

void	debug (int debug_level) override
	Set debug level (override - delegates to loaded library)

void	logging_callback (CharArrayFn callback) override
	Set logging callback (override - delegates to loaded library)

std::string	debug_implementation () override
	Implementation debugging.

	LLMService ()
	Default constructor.

	LLMService (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
	Parameterized constructor.

	~LLMService ()
	Destructor.

void	init (int argc, char **argv)
	Initialize from argc/argv parameters.

void	init (const std::string &params_string)
	Initialize from parameter string.

void	init (const char *params_string)
	Initialize from C-style parameter string.

std::string	get_command ()
	Returns the construct command.

std::string	encapsulate_route (const json &body, handler_t route_handler)

void	enable_reasoning (bool reasoning) override
	enable reasoning

std::string	tokenize_json (const json &data) override
	Tokenize input (override)

std::string	detokenize_json (const json &data) override
	Convert tokens back to text.

std::string	embeddings_json (const json &data) override
	Generate embeddings with HTTP response support.

std::string	apply_template_json (const json &data) override
	Apply a chat template to message data.

std::string	completion_json (const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
	Generate completion (override)

std::string	slot_json (const json &data) override
	Manage slots with HTTP response support.

std::string	lora_weight_json (const json &data) override
	Configure LoRA weights with HTTP response support.

std::string	lora_list_json () override
	List available LoRA adapters.

void	cancel (int id_slot) override
	Cancel running request (override)

void	start () override
	Start the LLM service (override)

bool	started () override
	Check service status (override)

void	stop () override
	Stop the LLM service (override)

void	start_server (const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="") override
	Start HTTP server (override)

void	stop_server () override
	Stop HTTP server (override)

void	join_service () override
	Wait for service thread completion (override)

void	join_server () override
	Wait for server thread completion (override)

void	set_SSL (const std::string &SSL_cert, const std::string &SSL_key) override
	Configure SSL certificates (override)

int	embedding_size () override
	Get embedding vector dimensions (override)

int	get_next_available_slot () override
	Get available processing slot (override)

void	debug (int debug_level) override
	Set debug level (override)

void	logging_callback (CharArrayFn callback) override
	Set logging callback (override)

std::string	debug_implementation () override
	Implementation debugging.

Public Member Functions inherited from LLMProvider
virtual	~LLMProvider ()
	Virtual destructor.

virtual bool	lora_weight (const std::vector< LoraIdScale > &loras)
	Configure LoRA weights.

virtual std::vector< LoraIdScalePath >	lora_list ()
	List available LoRA adapters.

virtual void	logging_stop ()
	Stop logging.

Public Member Functions inherited from LLMLocal
virtual std::string	save_slot (int id_slot, const std::string &filepath)
	Save slot state to file.

virtual std::string	load_slot (int id_slot, const std::string &filepath)
	Load slot state from file.

Public Member Functions inherited from LLM
virtual	~LLM ()=default
	Virtual destructor.

virtual std::vector< int >	tokenize (const std::string &query)
	Tokenize text.

virtual std::string	detokenize (const std::vector< int32_t > &tokens)
	Convert tokens to text.

virtual std::vector< float >	embeddings (const std::string &query)
	Generate embeddings.

virtual void	set_completion_params (json completion_params_)
	Set completion parameters.

virtual std::string	get_completion_params ()
	Get current completion parameters.

virtual std::string	completion (const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
	Generate completion.

virtual void	set_grammar (std::string grammar_)
	Set grammar for constrained generation.

virtual std::string	get_grammar ()
	Get current grammar specification.

virtual std::string	apply_template (const json &messages)
	Apply template to messages.

Static Public Member Functions
static LLMService *	from_command (const std::string &command)
	Create runtime from command line string.

static LLMService *	from_command (int argc, char **argv)
	Create runtime from argc/argv.

static LLMService *	from_params (const json &params_json)
	Create LLMService from JSON parameters.

static LLMService *	from_command (const std::string &command)
	Create LLMService from command line string.

static LLMService *	from_command (int argc, char **argv)
	Create LLMService from argc/argv.

static std::vector< char * >	jsonToArguments (const json &params_json)
	Convert JSON parameters to command line arguments.

Static Public Member Functions inherited from LLM
static bool	has_gpu_layers (const std::string &command)
	Check if command line arguments specify GPU layers.

static std::string	LLM_args_to_command (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
	Convert LLM parameters to command line arguments.

Public Attributes
LibHandle	handle = nullptr
	Handle to loaded library.

LLMProvider *	llm = nullptr
	Pointer to loaded LLM provider instance.

Public Attributes inherited from LLM
int32_t	n_keep = 0
	Number of tokens to keep from the beginning of the context.

std::string	grammar = ""
	Grammar specification in GBNF format or JSON schema.

json	completion_params
	JSON object containing completion parameters.

Protected Member Functions
bool	create_LLM_library_backend (const std::string &command, const std::string &llm_lib_filename)
	Load LLM library backend.

Protected Member Functions inherited from LLMProvider
virtual bool	parse_lora_weight_json (const json &result)
	Parse LoRA weight configuration result.

virtual json	build_lora_weight_json (const std::vector< LoraIdScale > &loras)
	Build JSON for LoRA weight configuration.

virtual std::vector< LoraIdScalePath >	parse_lora_list_json (const json &result)
	Parse LoRA list result.

virtual json	build_lora_list_json (const std::vector< LoraIdScalePath > &loras)
	Build JSON for LoRA list result.

Protected Member Functions inherited from LLMLocal
virtual std::string	slot (int id_slot, const std::string &action, const std::string &filepath)
	Perform slot operation.

virtual json	build_slot_json (int id_slot, const std::string &action, const std::string &filepath)
	Build JSON for slot operations.

virtual std::string	parse_slot_json (const json &result)
	Parse slot operation result.

Protected Member Functions inherited from LLM
virtual json	build_apply_template_json (const json &messages)
	Build JSON for template application.

virtual std::string	parse_apply_template_json (const json &result)
	Parse template application result.

virtual json	build_tokenize_json (const std::string &query)
	Build JSON for tokenization.

virtual std::vector< int >	parse_tokenize_json (const json &result)
	Parse tokenization result.

virtual json	build_detokenize_json (const std::vector< int32_t > &tokens)
	Build JSON for detokenization.

virtual std::string	parse_detokenize_json (const json &result)
	Parse detokenization result.

virtual json	build_embeddings_json (const std::string &query)
	Build JSON for embeddings generation.

virtual std::vector< float >	parse_embeddings_json (const json &result)
	Parse embeddings result.

virtual json	build_completion_json (const std::string &prompt, int id_slot=-1)
	Build JSON for completion generation.

virtual std::string	parse_completion_json (const json &result)
	Parse completion result.

Protected Attributes
std::vector< std::string >	search_paths
	Library search paths.

Protected Attributes inherited from LLMProvider
bool	reasoning_enabled = false
	Whether reasoning is enabled.

Detailed Description

Runtime loader for LLM libraries.

Concrete implementation of LLMProvider with server capabilities.

This class provides dynamic loading of LLM backend libraries, allowing for flexible deployment and architecture-specific optimizations

This class provides a full-featured LLM service with HTTP server, parameter configuration, and backend integration with llama.cpp

Definition at line 62 of file LLM_runtime.h.

Constructor & Destructor Documentation

◆ LLMService() [1/4]

LLMService::LLMService ( )

Default constructor.

Creates an uninitialized runtime that must load a library before use

Definition at line 315 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ LLMService() [2/4]

LLMService::LLMService	(	const std::string &	model_path,
		int	num_slots = 1,
		int	num_threads = -1,
		int	num_GPU_layers = 0,
		bool	flash_attention = false,
		int	context_size = 4096,
		int	batch_size = 2048,
		bool	embedding_only = false,
		const std::vector< std::string > &	lora_paths = {} )

Parameterized constructor.

Parameters

model_path	Path to the model file
num_threads	Number of CPU threads (-1 for auto-detection)
num_GPU_layers	Number of layers to offload to GPU
num_slots	Number of parallel slots
flash_attention	Whether to enable flash attention optimization
context_size	Maximum context length in tokens
batch_size	Processing batch size
embedding_only	Whether to run in embedding-only mode
lora_paths	Vector of paths to LoRA adapter files

Creates and initializes a runtime with the specified parameters

Definition at line 320 of file LLM_runtime.cpp.

◆ ~LLMService()

LLMService::~LLMService ( )

Destructor.

Definition at line 339 of file LLM_runtime.cpp.

◆ LLMService() [3/4]

LLMService::LLMService ( )

Default constructor.

Creates an uninitialized LLMService that must be configured before use

◆ LLMService() [4/4]

LLMService::LLMService	(	const std::string &	model_path,
		int	num_slots = 1,
		int	num_threads = -1,
		int	num_GPU_layers = 0,
		bool	flash_attention = false,
		int	context_size = 4096,
		int	batch_size = 2048,
		bool	embedding_only = false,
		const std::vector< std::string > &	lora_paths = {} )

Parameterized constructor.

Parameters

model_path	Path to the model file
num_threads	Number of CPU threads (-1 for auto-detection)
num_GPU_layers	Number of layers to offload to GPU
num_slots	Number of parallel processing sequences
flash_attention	Whether to enable flash attention optimization
context_size	Maximum context length in tokens
batch_size	Processing batch size
embedding_only	Whether to run in embedding-only mode
lora_paths	Vector of paths to LoRA adapter files

Member Function Documentation

◆ apply_template_json() [1/2]

std::string LLMService::apply_template_json ( const json & data )

inlineoverridevirtual

Apply a chat template to message data.

Parameters

data	JSON object containing messages to format

Returns: Formatted string with template applied

Pure virtual method for applying chat templates to conversation data

Implements LLM.

Definition at line 137 of file LLM_runtime.h.

◆ apply_template_json() [2/2]

std::string LLMService::apply_template_json ( const json & data )

overridevirtual

Apply a chat template to message data.

Parameters

data	JSON object containing messages to format

Returns: Formatted string with template applied

Pure virtual method for applying chat templates to conversation data

Implements LLM.

◆ cancel() [1/2]

void LLMService::cancel ( int id_slot )

inlineoverridevirtual

Cancel request (override - delegates to loaded library)

Parameters

data	JSON cancellation request

Implements LLMLocal.

Definition at line 141 of file LLM_runtime.h.

◆ cancel() [2/2]

void LLMService::cancel ( int id_slot )

overridevirtual

Cancel running request (override)

Parameters

data	JSON object with cancellation parameters

Implements LLMLocal.

◆ completion_json() [1/2]

std::string LLMService::completion_json	(	const json &	data,
		CharArrayFn	callback = nullptr,
		bool	callbackWithJSON = true )

inlineoverridevirtual

Generate completion (override - delegates to loaded library)

Parameters

data	JSON completion request
callback	Optional streaming callback
callbackWithJSON	Whether callback uses JSON

Returns: Generated completion

Implements LLM.

Definition at line 131 of file LLM_runtime.h.

◆ completion_json() [2/2]

std::string LLMService::completion_json	(	const json &	data,
		CharArrayFn	callback = nullptr,
		bool	callbackWithJSON = true )

overridevirtual

Generate completion (override)

Parameters

data	JSON object with prompt and parameters
callback	Optional streaming callback function
callbackWithJSON	Whether callback receives JSON format

Returns: Generated completion text or JSON

Implements LLM.

◆ create_LLM_library()

bool LLMService::create_LLM_library ( const std::string & command )

Loads LLM library dynamically according to underlying achitecture and creates a LLM based on the command.

Parameters

command Command string containing model path and parameters

Returns: true if library loaded successfully, false otherwise

Definition at line 296 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ create_LLM_library_backend()

bool LLMService::create_LLM_library_backend	(	const std::string &	command,
		const std::string &	llm_lib_filename )

protected

Load LLM library backend.

Parameters

command	Command string with parameters
llm_lib_filename	Specific library filename to load

Returns: true if library loaded successfully, false otherwise

Internal method for loading specific library files

Definition at line 218 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ debug() [1/2]

void LLMService::debug ( int debug_level )

inlineoverridevirtual

Set debug level (override - delegates to loaded library)

Parameters

debug_level Debug verbosity level

Implements LLMProvider.

Definition at line 202 of file LLM_runtime.h.

Here is the caller graph for this function:

◆ debug() [2/2]

void LLMService::debug ( int debug_level )

overridevirtual

Set debug level (override)

Parameters

debug_level Debug verbosity level

Implements LLMProvider.

◆ debug_implementation() [1/2]

std::string LLMService::debug_implementation ( )

inlineoverridevirtual

Implementation debugging.

Returns: "standalore" or "runtime_detection" according to the implementation

Implements LLMProvider.

Definition at line 208 of file LLM_runtime.h.

◆ debug_implementation() [2/2]

std::string LLMService::debug_implementation ( )

inlineoverridevirtual

Implementation debugging.

Returns: "standalore" or "runtime_detection" according to the implementation

Implements LLMProvider.

Definition at line 201 of file LLM_service.h.

◆ detokenize_json() [1/2]

std::string LLMService::detokenize_json ( const json & data )

inlineoverridevirtual

Convert tokens back to text.

Parameters

data	JSON object containing token IDs

Returns: JSON string containing detokenized text

Pure virtual method for converting token sequences back to text

Implements LLM.

Definition at line 118 of file LLM_runtime.h.

◆ detokenize_json() [2/2]

std::string LLMService::detokenize_json ( const json & data )

overridevirtual

Convert tokens back to text.

Parameters

data	JSON object containing token IDs

Returns: JSON string containing detokenized text

Pure virtual method for converting token sequences back to text

Implements LLM.

◆ embedding_size() [1/2]

int LLMService::embedding_size ( )

inlineoverridevirtual

Get embedding size (override - delegates to loaded library)

Returns: Number of embedding dimensions

Implements LLMProvider.

Definition at line 194 of file LLM_runtime.h.

◆ embedding_size() [2/2]

int LLMService::embedding_size ( )

overridevirtual

Get embedding vector dimensions (override)

Returns: Number of dimensions in embedding vectors

Implements LLMProvider.

◆ embeddings_json() [1/2]

std::string LLMService::embeddings_json ( const json & data )

inlineoverridevirtual

Generate embeddings with HTTP response support.

Parameters

data	JSON object containing embedding request

Returns: JSON string with embedding data

Protected method used internally for server-based embedding generation

Implements LLM.

Definition at line 124 of file LLM_runtime.h.

◆ embeddings_json() [2/2]

std::string LLMService::embeddings_json ( const json & data )

overridevirtual

Generate embeddings with HTTP response support.

Parameters

data	JSON object containing embedding request

Returns: JSON string with embedding data

Protected method used internally for server-based embedding generation

Implements LLM.

◆ enable_reasoning()

void LLMService::enable_reasoning ( bool reasoning )

overridevirtual

enable reasoning

Parameters

reasoning whether to enable reasoning

Reimplemented from LLMProvider.

Definition at line 262 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ encapsulate_route()

std::string LLMService::encapsulate_route	(	const json &	body,
		handler_t	route_handler )

Definition at line 520 of file LLM_service.cpp.

◆ from_command() [1/4]

LLMService * LLMService::from_command ( const std::string & command )

static

Create runtime from command line string.

Parameters

command Command line argument string

Returns: Pointer to newly created LLMService instance

Factory method for creating runtime instances from command arguments. See https://github.com/ggml-org/llama.cpp/tree/master/tools/server#usage for arguments.

Definition at line 327 of file LLM_runtime.cpp.

Here is the caller graph for this function:

◆ from_command() [2/4]

static LLMService * LLMService::from_command ( const std::string & command )

static

Create LLMService from command line string.

Parameters

command Command line argument string

Returns: Pointer to newly created LLMService instance

Factory method for creating instances from command line arguments See https://github.com/ggml-org/llama.cpp/tree/master/tools/server#usage for arguments.

◆ from_command() [3/4]

LLMService * LLMService::from_command	(	int	argc,
		char **	argv )

static

Create runtime from argc/argv.

Parameters

argc	Argument count
argv	Argument vector

Returns: Pointer to newly created LLMService instance

Factory method for creating runtime instances from main() parameters

Definition at line 334 of file LLM_runtime.cpp.

◆ from_command() [4/4]

static LLMService * LLMService::from_command	(	int	argc,
		char **	argv )

static

Create LLMService from argc/argv.

Parameters

argc	Argument count
argv	Argument vector

Returns: Pointer to newly created LLMService instance

Factory method for creating instances from standard main() parameters

◆ from_params()

LLMService * LLMService::from_params ( const json & params_json )

static

Create LLMService from JSON parameters.

Parameters

params_json JSON object containing initialization parameters

Returns: Pointer to newly created LLMService instance

Factory method for creating instances from structured parameter data See https://github.com/ggml-org/llama.cpp/tree/master/tools/server#usage for arguments.

Definition at line 22 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ get_command()

std::string LLMService::get_command ( )

inline

Returns the construct command.

Definition at line 97 of file LLM_service.h.

Here is the caller graph for this function:

◆ get_next_available_slot() [1/2]

int LLMService::get_next_available_slot ( )

inlineoverridevirtual

Get available slot (override - delegates to loaded library)

Returns: Available slot ID

Implements LLMLocal.

Definition at line 198 of file LLM_runtime.h.

◆ get_next_available_slot() [2/2]

int LLMService::get_next_available_slot ( )

overridevirtual

Get available processing slot (override)

Returns: Available slot ID or -1 if none available

Implements LLMLocal.

◆ init() [1/3]

void LLMService::init ( const char * params_string )

Initialize from C-style parameter string.

Parameters

params_string C-style string containing parameters

C-compatible version of string parameter initialization

Definition at line 175 of file LLM_service.cpp.

◆ init() [2/3]

void LLMService::init ( const std::string & params_string )

Initialize from parameter string.

Parameters

params_string String containing space-separated parameters

Initialize the service by parsing a parameter string

Definition at line 160 of file LLM_service.cpp.

◆ init() [3/3]

void LLMService::init	(	int	argc,
		char **	argv )

Initialize from argc/argv parameters.

Parameters

argc	Argument count
argv	Argument vector

Initialize the service with command line style parameters

Definition at line 180 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ join_server() [1/2]

void LLMService::join_server ( )

inlineoverridevirtual

Wait for server completion (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 185 of file LLM_runtime.h.

◆ join_server() [2/2]

void LLMService::join_server ( )

overridevirtual

Wait for server thread completion (override)

Implements LLMProvider.

◆ join_service() [1/2]

void LLMService::join_service ( )

inlineoverridevirtual

Wait for service completion (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 182 of file LLM_runtime.h.

◆ join_service() [2/2]

void LLMService::join_service ( )

overridevirtual

Wait for service thread completion (override)

Implements LLMProvider.

◆ jsonToArguments()

std::vector< char * > LLMService::jsonToArguments ( const json & params_json )

static

Convert JSON parameters to command line arguments.

Parameters

params_json JSON object with parameters

Returns: Vector of C-style argument strings

Utility function for converting structured parameters to argv format

Definition at line 58 of file LLM_service.cpp.

Here is the caller graph for this function:

◆ logging_callback() [1/2]

void LLMService::logging_callback ( CharArrayFn callback )

inlineoverridevirtual

Set logging callback (override - delegates to loaded library)

Parameters

callback Function to receive log messages

Implements LLMProvider.

Definition at line 206 of file LLM_runtime.h.

Here is the caller graph for this function:

◆ logging_callback() [2/2]

void LLMService::logging_callback ( CharArrayFn callback )

overridevirtual

Set logging callback (override)

Parameters

callback Function to receive log messages

Implements LLMProvider.

◆ lora_list_json() [1/2]

std::string LLMService::lora_list_json ( )

inlineoverridevirtual

List available LoRA adapters.

Returns: JSON string containing list of available LoRA adapters

Implements LLMProvider.

Definition at line 151 of file LLM_runtime.h.

◆ lora_list_json() [2/2]

std::string LLMService::lora_list_json ( )

overridevirtual

List available LoRA adapters.

Returns: JSON string containing list of available LoRA adapters

Implements LLMProvider.

◆ lora_weight_json() [1/2]

std::string LLMService::lora_weight_json ( const json & data )

inlineoverridevirtual

Configure LoRA weights with HTTP response support.

Parameters

data	JSON object with LoRA configuration

Returns: JSON response string

Protected method used internally for server-based LoRA configuration

Implements LLMProvider.

Definition at line 147 of file LLM_runtime.h.

◆ lora_weight_json() [2/2]

std::string LLMService::lora_weight_json ( const json & data )

overridevirtual

Configure LoRA weights with HTTP response support.

Parameters

data	JSON object with LoRA configuration

Returns: JSON response string

Protected method used internally for server-based LoRA configuration

Implements LLMProvider.

◆ set_SSL() [1/2]

void LLMService::set_SSL	(	const std::string &	cert,
		const std::string &	key )

inlineoverridevirtual

Set SSL configuration (override - delegates to loaded library)

Parameters

cert	SSL certificate path
key	SSL private key path

Implements LLMProvider.

Definition at line 190 of file LLM_runtime.h.

◆ set_SSL() [2/2]

void LLMService::set_SSL	(	const std::string &	SSL_cert,
		const std::string &	SSL_key )

overridevirtual

Configure SSL certificates (override)

Parameters

SSL_cert	Path to SSL certificate file
SSL_key	Path to SSL private key file

Implements LLMProvider.

◆ slot_json() [1/2]

std::string LLMService::slot_json ( const json & data )

inlineoverridevirtual

Manage slots with HTTP response support.

Parameters

data	JSON object with slot operation

Returns: JSON response string

Protected method used internally for server-based slot management

Implements LLMLocal.

Definition at line 157 of file LLM_runtime.h.

◆ slot_json() [2/2]

std::string LLMService::slot_json ( const json & data )

overridevirtual

Manage slots with HTTP response support.

Parameters

data	JSON object with slot operation

Returns: JSON response string

Protected method used internally for server-based slot management

Implements LLMLocal.

◆ start() [1/2]

void LLMService::start ( )

inlineoverridevirtual

Start service (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 169 of file LLM_runtime.h.

◆ start() [2/2]

void LLMService::start ( )

overridevirtual

Start the LLM service (override)

Implements LLMProvider.

◆ start_server() [1/2]

void LLMService::start_server	(	const std::string &	host = "0.0.0.0",
		int	port = -1,
		const std::string &	API_key = "" )

inlineoverridevirtual

Start HTTP server (override - delegates to loaded library)

Parameters

host	Host address (default: "0.0.0.0")
port	Port number (0 for auto)
API_key	Optional API key

Implements LLMProvider.

Definition at line 163 of file LLM_runtime.h.

◆ start_server() [2/2]

void LLMService::start_server	(	const std::string &	host = "0.0.0.0",
		int	port = -1,
		const std::string &	API_key = "" )

overridevirtual

Start HTTP server (override)

Parameters

host	Host address to bind (default: "0.0.0.0")
port	Port number (0 for auto-selection)
API_key	Optional API key for authentication

Implements LLMProvider.

◆ started() [1/2]

bool LLMService::started ( )

inlineoverridevirtual

Check service status (override - delegates to loaded library)

Returns: true if started, false otherwise

Implements LLMProvider.

Definition at line 173 of file LLM_runtime.h.

◆ started() [2/2]

bool LLMService::started ( )

overridevirtual

Check service status (override)

Returns: true if service is running, false otherwise

Implements LLMProvider.

◆ stop() [1/2]

void LLMService::stop ( )

inlineoverridevirtual

Stop service (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 176 of file LLM_runtime.h.

◆ stop() [2/2]

void LLMService::stop ( )

overridevirtual

Stop the LLM service (override)

Implements LLMProvider.

◆ stop_server() [1/2]

void LLMService::stop_server ( )

inlineoverridevirtual

Stop HTTP server (override - delegates to loaded library)

Implements LLMProvider.

Definition at line 166 of file LLM_runtime.h.

◆ stop_server() [2/2]

void LLMService::stop_server ( )

overridevirtual

Stop HTTP server (override)

Implements LLMProvider.

◆ tokenize_json() [1/2]

std::string LLMService::tokenize_json ( const json & data )

inlineoverridevirtual

Tokenize input (override)

Parameters

data	JSON object containing text to tokenize

Returns: JSON string with token data

Implements LLM.

Definition at line 112 of file LLM_runtime.h.

◆ tokenize_json() [2/2]

std::string LLMService::tokenize_json ( const json & data )

overridevirtual

Tokenize input (override)

Parameters

data	JSON object containing text to tokenize

Returns: JSON string with token data

Implements LLM.

Member Data Documentation

◆ handle

LibHandle LLMService::handle = nullptr

Handle to loaded library.

Definition at line 99 of file LLM_runtime.h.

◆ llm

LLMProvider* LLMService::llm = nullptr

Pointer to loaded LLM provider instance.

Definition at line 100 of file LLM_runtime.h.

◆ search_paths

std::vector<std::string> LLMService::search_paths

protected

Library search paths.

Definition at line 219 of file LLM_runtime.h.

The documentation for this class was generated from the following files:

Public Member Functions

Static Public Member Functions

Public Attributes

Protected Member Functions

Protected Attributes

Detailed Description

Constructor & Destructor Documentation

◆ LLMService() [1/4]

◆ LLMService() [2/4]

◆ ~LLMService()

◆ LLMService() [3/4]

◆ LLMService() [4/4]

Member Function Documentation

◆ apply_template_json() [1/2]

◆ apply_template_json() [2/2]

◆ cancel() [1/2]

◆ cancel() [2/2]

◆ completion_json() [1/2]

◆ completion_json() [2/2]

◆ create_LLM_library()

◆ create_LLM_library_backend()

◆ debug() [1/2]

◆ debug() [2/2]

◆ debug_implementation() [1/2]

◆ debug_implementation() [2/2]

◆ detokenize_json() [1/2]

◆ detokenize_json() [2/2]

◆ embedding_size() [1/2]

◆ embedding_size() [2/2]

◆ embeddings_json() [1/2]

◆ embeddings_json() [2/2]

◆ enable_reasoning()

◆ encapsulate_route()

◆ from_command() [1/4]

◆ from_command() [2/4]

◆ from_command() [3/4]

◆ from_command() [4/4]

◆ from_params()

◆ get_command()

◆ get_next_available_slot() [1/2]

◆ get_next_available_slot() [2/2]

◆ init() [1/3]

◆ init() [2/3]

◆ init() [3/3]

◆ join_server() [1/2]

◆ join_server() [2/2]

◆ join_service() [1/2]

◆ join_service() [2/2]

◆ jsonToArguments()

◆ logging_callback() [1/2]

◆ logging_callback() [2/2]

◆ lora_list_json() [1/2]

◆ lora_list_json() [2/2]

◆ lora_weight_json() [1/2]

◆ lora_weight_json() [2/2]

◆ set_SSL() [1/2]

◆ set_SSL() [2/2]

◆ slot_json() [1/2]

◆ slot_json() [2/2]

◆ start() [1/2]

◆ start() [2/2]

◆ start_server() [1/2]

◆ start_server() [2/2]

◆ started() [1/2]

◆ started() [2/2]

◆ stop() [1/2]

◆ stop() [2/2]

◆ stop_server() [1/2]

◆ stop_server() [2/2]

◆ tokenize_json() [1/2]

◆ tokenize_json() [2/2]

Member Data Documentation

◆ handle

◆ llm

◆ search_paths