Abstract class for LLM service providers. More...

#include <LLM.h>

Inheritance diagram for LLMProvider:

Public Member Functions
virtual	~LLMProvider ()
	Virtual destructor.

virtual bool	lora_weight (const std::vector< LoraIdScale > &loras)
	Configure LoRA weights.

virtual std::string	lora_weight_json (const json &data)=0
	Configure LoRA weights with HTTP response support.

virtual std::vector< LoraIdScalePath >	lora_list ()
	List available LoRA adapters.

virtual std::string	lora_list_json ()=0
	List available LoRA adapters.

virtual void	enable_reasoning (bool reasoning)
	enable reasoning

virtual void	debug (int debug_level)=0
	Set debug level.

virtual void	logging_callback (CharArrayFn callback)=0
	Set logging callback function.

virtual void	logging_stop ()
	Stop logging.

virtual void	start ()=0
	Start the LLM service.

virtual bool	started ()=0
	Check if service is started.

virtual void	stop ()=0
	Stop the LLM service.

virtual void	start_server (const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="")=0
	Start HTTP server.

virtual void	stop_server ()=0
	Stop HTTP server.

virtual void	join_service ()=0
	Wait for service thread to complete.

virtual void	join_server ()=0
	Wait for server thread to complete.

virtual void	set_SSL (const std::string &SSL_cert, const std::string &SSL_key)=0
	Configure SSL certificates.

virtual int	embedding_size ()=0
	Get embedding vector size.

virtual std::string	debug_implementation ()=0
	Implementation debugging.

Public Member Functions inherited from LLMLocal
virtual int	get_next_available_slot ()=0
	Get an available processing slot.

virtual std::string	save_slot (int id_slot, const std::string &filepath)
	Save slot state to file.

virtual std::string	load_slot (int id_slot, const std::string &filepath)
	Load slot state from file.

virtual void	cancel (int id_slot)=0
	Cancel request.

virtual std::string	slot_json (const json &data)=0
	Manage slots with HTTP response support.

Public Member Functions inherited from LLM
virtual	~LLM ()=default
	Virtual destructor.

virtual std::vector< int >	tokenize (const std::string &query)
	Tokenize text.

virtual std::string	tokenize_json (const json &data)=0
	Tokenize input (override)

virtual std::string	detokenize (const std::vector< int32_t > &tokens)
	Convert tokens to text.

virtual std::string	detokenize_json (const json &data)=0
	Convert tokens back to text.

virtual std::vector< float >	embeddings (const std::string &query)
	Generate embeddings.

virtual std::string	embeddings_json (const json &data)=0
	Generate embeddings with HTTP response support.

virtual void	set_completion_params (json completion_params_)
	Set completion parameters.

virtual std::string	get_completion_params ()
	Get current completion parameters.

virtual std::string	completion (const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
	Generate completion.

virtual std::string	completion_json (const json &data, CharArrayFn callback, bool callbackWithJSON)=0
	Generate text completion.

virtual void	set_grammar (std::string grammar_)
	Set grammar for constrained generation.

virtual std::string	get_grammar ()
	Get current grammar specification.

virtual std::string	apply_template (const json &messages)
	Apply template to messages.

virtual std::string	apply_template_json (const json &data)=0
	Apply a chat template to message data.

Protected Member Functions
virtual bool	parse_lora_weight_json (const json &result)
	Parse LoRA weight configuration result.

virtual json	build_lora_weight_json (const std::vector< LoraIdScale > &loras)
	Build JSON for LoRA weight configuration.

virtual std::vector< LoraIdScalePath >	parse_lora_list_json (const json &result)
	Parse LoRA list result.

virtual json	build_lora_list_json (const std::vector< LoraIdScalePath > &loras)
	Build JSON for LoRA list result.

Protected Member Functions inherited from LLMLocal
virtual std::string	slot (int id_slot, const std::string &action, const std::string &filepath)
	Perform slot operation.

virtual json	build_slot_json (int id_slot, const std::string &action, const std::string &filepath)
	Build JSON for slot operations.

virtual std::string	parse_slot_json (const json &result)
	Parse slot operation result.

Protected Member Functions inherited from LLM
virtual json	build_apply_template_json (const json &messages)
	Build JSON for template application.

virtual std::string	parse_apply_template_json (const json &result)
	Parse template application result.

virtual json	build_tokenize_json (const std::string &query)
	Build JSON for tokenization.

virtual std::vector< int >	parse_tokenize_json (const json &result)
	Parse tokenization result.

virtual json	build_detokenize_json (const std::vector< int32_t > &tokens)
	Build JSON for detokenization.

virtual std::string	parse_detokenize_json (const json &result)
	Parse detokenization result.

virtual json	build_embeddings_json (const std::string &query)
	Build JSON for embeddings generation.

virtual std::vector< float >	parse_embeddings_json (const json &result)
	Parse embeddings result.

virtual json	build_completion_json (const std::string &prompt, int id_slot=-1)
	Build JSON for completion generation.

virtual std::string	parse_completion_json (const json &result)
	Parse completion result.

Protected Attributes
bool	reasoning_enabled = false
	Whether reasoning is enabled.

Additional Inherited Members
Static Public Member Functions inherited from LLM
static bool	has_gpu_layers (const std::string &command)
	Check if command line arguments specify GPU layers.

static std::string	LLM_args_to_command (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
	Convert LLM parameters to command line arguments.

Public Attributes inherited from LLM
int32_t	n_keep = 0
	Number of tokens to keep from the beginning of the context.

std::string	grammar = ""
	Grammar specification in GBNF format or JSON schema.

json	completion_params
	JSON object containing completion parameters.

Detailed Description

Abstract class for LLM service providers.

Extends LLMLocal with server functionality, debugging, logging, and advanced features like LoRA management

Definition at line 274 of file LLM.h.

Constructor & Destructor Documentation

◆ ~LLMProvider()

LLMProvider::~LLMProvider ( )

virtual

Virtual destructor.

Definition at line 42 of file LLM.cpp.

Member Function Documentation

◆ build_lora_list_json()

json LLMProvider::build_lora_list_json ( const std::vector< LoraIdScalePath > & loras )

protectedvirtual

Build JSON for LoRA list result.

Parameters

loras Vector of LoRA adapters with their scales and paths

Returns: JSON object ready for lora_list_json

Definition at line 361 of file LLM.cpp.

◆ build_lora_weight_json()

json LLMProvider::build_lora_weight_json ( const std::vector< LoraIdScale > & loras )

protectedvirtual

Build JSON for LoRA weight configuration.

Parameters

loras Vector of LoRA adapters with their scales

Returns: JSON object ready for lora_weight_json

Definition at line 331 of file LLM.cpp.

Here is the caller graph for this function:

◆ debug()

virtual void LLMProvider::debug ( int debug_level )

pure virtual

Set debug level.

Parameters

debug_level Debug verbosity level (0 = off, 1 = LlamaLib messages, 2 and higher = llama.cpp messages and more verbose)

Implemented in LLMService, and LLMService.

◆ debug_implementation()

virtual std::string LLMProvider::debug_implementation ( )

pure virtual

Implementation debugging.

Returns: "standalore" or "runtime_detection" according to the implementation

Implemented in LLMService, and LLMService.

◆ embedding_size()

virtual int LLMProvider::embedding_size ( )

pure virtual

Get embedding vector size.

Returns: Number of dimensions in embedding vectors

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ enable_reasoning()

virtual void LLMProvider::enable_reasoning ( bool reasoning )

inlinevirtual

enable reasoning

Parameters

reasoning whether to enable reasoning

Reimplemented in LLMService.

Definition at line 301 of file LLM.h.

Here is the caller graph for this function:

◆ join_server()

virtual void LLMProvider::join_server ( )

pure virtual

Wait for server thread to complete.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ join_service()

virtual void LLMProvider::join_service ( )

pure virtual

Wait for service thread to complete.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ logging_callback()

virtual void LLMProvider::logging_callback ( CharArrayFn callback )

pure virtual

Set logging callback function.

Parameters

callback Function to receive log messages

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ logging_stop()

void LLMProvider::logging_stop ( )

virtual

Stop logging.

Definition at line 324 of file LLM.cpp.

◆ lora_list()

std::vector< LoraIdScalePath > LLMProvider::lora_list ( )

virtual

List available LoRA adapters.

Returns: Vector of available LoRA adapters with paths

Definition at line 391 of file LLM.cpp.

Here is the caller graph for this function:

◆ lora_list_json()

virtual std::string LLMProvider::lora_list_json ( )

pure virtual

List available LoRA adapters.

Returns: JSON string containing list of available LoRA adapters

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ lora_weight()

bool LLMProvider::lora_weight ( const std::vector< LoraIdScale > & loras )

virtual

Configure LoRA weights.

Parameters

loras Vector of LoRA adapters with their scales

Returns: true if configuration was successful, false otherwise

Definition at line 354 of file LLM.cpp.

Here is the caller graph for this function:

◆ lora_weight_json()

virtual std::string LLMProvider::lora_weight_json ( const json & data )

pure virtual

Configure LoRA weights with HTTP response support.

Parameters

data	JSON object with LoRA configuration

Returns: JSON response string

Protected method used internally for server-based LoRA configuration

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ parse_lora_list_json()

std::vector< LoraIdScalePath > LLMProvider::parse_lora_list_json ( const json & result )

protectedvirtual

Parse LoRA list result.

Parameters

result JSON response from lora_list_json

Returns: Vector of available LoRA adapters with paths

Definition at line 373 of file LLM.cpp.

Here is the caller graph for this function:

◆ parse_lora_weight_json()

bool LLMProvider::parse_lora_weight_json ( const json & result )

protectedvirtual

Parse LoRA weight configuration result.

Parameters

result JSON response from lora_weight_json

Returns: true if configuration was successful, false otherwise

Definition at line 342 of file LLM.cpp.

Here is the caller graph for this function:

◆ set_SSL()

virtual void LLMProvider::set_SSL	(	const std::string &	SSL_cert,
		const std::string &	SSL_key )

pure virtual

Configure SSL certificates.

Parameters

SSL_cert	SSL certificate
SSL_key	SSL private key

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ start()

virtual void LLMProvider::start ( )

pure virtual

Start the LLM service.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ start_server()

virtual void LLMProvider::start_server	(	const std::string &	host = "0.0.0.0",
		int	port = -1,
		const std::string &	API_key = "" )

pure virtual

Start HTTP server.

Parameters

host	Host address to bind to (default: "0.0.0.0")
port	Port number to bind to (0 for auto-select)
API_key	Optional API key for authentication

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ started()

virtual bool LLMProvider::started ( )

pure virtual

Check if service is started.

Returns: true if service is running, false otherwise

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ stop()

virtual void LLMProvider::stop ( )

pure virtual

Stop the LLM service.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ stop_server()

virtual void LLMProvider::stop_server ( )

pure virtual

Stop HTTP server.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

Member Data Documentation

◆ reasoning_enabled

bool LLMProvider::reasoning_enabled = false

protected

Whether reasoning is enabled.

Definition at line 353 of file LLM.h.

The documentation for this class was generated from the following files:

include/LLM.h
src/LLM.cpp

Public Member Functions

Protected Member Functions

Protected Attributes

Additional Inherited Members

Detailed Description

Constructor & Destructor Documentation

◆ ~LLMProvider()

Member Function Documentation

◆ build_lora_list_json()

◆ build_lora_weight_json()

◆ debug()

◆ debug_implementation()

◆ embedding_size()

◆ enable_reasoning()

◆ join_server()

◆ join_service()

◆ logging_callback()

◆ logging_stop()

◆ lora_list()

◆ lora_list_json()

◆ lora_weight()

◆ lora_weight_json()

◆ parse_lora_list_json()

◆ parse_lora_weight_json()

◆ set_SSL()

◆ start()

◆ start_server()

◆ started()

◆ stop()

◆ stop_server()

Member Data Documentation

◆ reasoning_enabled