LlamaLib  v2.0.2
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLMProvider Class Referenceabstract

Abstract class for LLM service providers. More...

#include <LLM.h>

Inheritance diagram for LLMProvider:
[legend]

Public Member Functions

virtual ~LLMProvider ()
 Virtual destructor.
 
virtual bool lora_weight (const std::vector< LoraIdScale > &loras)
 Configure LoRA weights.
 
virtual std::string lora_weight_json (const json &data)=0
 Configure LoRA weights with HTTP response support.
 
virtual std::vector< LoraIdScalePathlora_list ()
 List available LoRA adapters.
 
virtual std::string lora_list_json ()=0
 List available LoRA adapters.
 
virtual void enable_reasoning (bool reasoning)
 enable reasoning
 
virtual void debug (int debug_level)=0
 Set debug level.
 
virtual void logging_callback (CharArrayFn callback)=0
 Set logging callback function.
 
virtual void logging_stop ()
 Stop logging.
 
virtual void start ()=0
 Start the LLM service.
 
virtual bool started ()=0
 Check if service is started.
 
virtual void stop ()=0
 Stop the LLM service.
 
virtual void start_server (const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="")=0
 Start HTTP server.
 
virtual void stop_server ()=0
 Stop HTTP server.
 
virtual void join_service ()=0
 Wait for service thread to complete.
 
virtual void join_server ()=0
 Wait for server thread to complete.
 
virtual void set_SSL (const std::string &SSL_cert, const std::string &SSL_key)=0
 Configure SSL certificates.
 
virtual int embedding_size ()=0
 Get embedding vector size.
 
virtual std::string debug_implementation ()=0
 Implementation debugging.
 
- Public Member Functions inherited from LLMLocal
virtual int get_next_available_slot ()=0
 Get an available processing slot.
 
virtual std::string save_slot (int id_slot, const std::string &filepath)
 Save slot state to file.
 
virtual std::string load_slot (int id_slot, const std::string &filepath)
 Load slot state from file.
 
virtual void cancel (int id_slot)=0
 Cancel request.
 
virtual std::string slot_json (const json &data)=0
 Manage slots with HTTP response support.
 
- Public Member Functions inherited from LLM
virtual ~LLM ()=default
 Virtual destructor.
 
virtual std::vector< int > tokenize (const std::string &query)
 Tokenize text.
 
virtual std::string tokenize_json (const json &data)=0
 Tokenize input (override)
 
virtual std::string detokenize (const std::vector< int32_t > &tokens)
 Convert tokens to text.
 
virtual std::string detokenize_json (const json &data)=0
 Convert tokens back to text.
 
virtual std::vector< float > embeddings (const std::string &query)
 Generate embeddings.
 
virtual std::string embeddings_json (const json &data)=0
 Generate embeddings with HTTP response support.
 
virtual void set_completion_params (json completion_params_)
 Set completion parameters.
 
virtual std::string get_completion_params ()
 Get current completion parameters.
 
virtual std::string completion (const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
 Generate completion.
 
virtual std::string completion_json (const json &data, CharArrayFn callback, bool callbackWithJSON)=0
 Generate text completion.
 
virtual void set_grammar (std::string grammar_)
 Set grammar for constrained generation.
 
virtual std::string get_grammar ()
 Get current grammar specification.
 
virtual std::string apply_template (const json &messages)
 Apply template to messages.
 
virtual std::string apply_template_json (const json &data)=0
 Apply a chat template to message data.
 

Protected Member Functions

virtual bool parse_lora_weight_json (const json &result)
 Parse LoRA weight configuration result.
 
virtual json build_lora_weight_json (const std::vector< LoraIdScale > &loras)
 Build JSON for LoRA weight configuration.
 
virtual std::vector< LoraIdScalePathparse_lora_list_json (const json &result)
 Parse LoRA list result.
 
virtual json build_lora_list_json (const std::vector< LoraIdScalePath > &loras)
 Build JSON for LoRA list result.
 
- Protected Member Functions inherited from LLMLocal
virtual std::string slot (int id_slot, const std::string &action, const std::string &filepath)
 Perform slot operation.
 
virtual json build_slot_json (int id_slot, const std::string &action, const std::string &filepath)
 Build JSON for slot operations.
 
virtual std::string parse_slot_json (const json &result)
 Parse slot operation result.
 
- Protected Member Functions inherited from LLM
virtual json build_apply_template_json (const json &messages)
 Build JSON for template application.
 
virtual std::string parse_apply_template_json (const json &result)
 Parse template application result.
 
virtual json build_tokenize_json (const std::string &query)
 Build JSON for tokenization.
 
virtual std::vector< int > parse_tokenize_json (const json &result)
 Parse tokenization result.
 
virtual json build_detokenize_json (const std::vector< int32_t > &tokens)
 Build JSON for detokenization.
 
virtual std::string parse_detokenize_json (const json &result)
 Parse detokenization result.
 
virtual json build_embeddings_json (const std::string &query)
 Build JSON for embeddings generation.
 
virtual std::vector< float > parse_embeddings_json (const json &result)
 Parse embeddings result.
 
virtual json build_completion_json (const std::string &prompt, int id_slot=-1)
 Build JSON for completion generation.
 
virtual std::string parse_completion_json (const json &result)
 Parse completion result.
 

Protected Attributes

bool reasoning_enabled = false
 Whether reasoning is enabled.
 

Additional Inherited Members

- Static Public Member Functions inherited from LLM
static bool has_gpu_layers (const std::string &command)
 Check if command line arguments specify GPU layers.
 
static std::string LLM_args_to_command (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
 Convert LLM parameters to command line arguments.
 
- Public Attributes inherited from LLM
int32_t n_keep = 0
 Number of tokens to keep from the beginning of the context.
 
std::string grammar = ""
 Grammar specification in GBNF format or JSON schema.
 
json completion_params
 JSON object containing completion parameters.
 

Detailed Description

Abstract class for LLM service providers.

Extends LLMLocal with server functionality, debugging, logging, and advanced features like LoRA management

Definition at line 274 of file LLM.h.

Constructor & Destructor Documentation

◆ ~LLMProvider()

LLMProvider::~LLMProvider ( )
virtual

Virtual destructor.

Definition at line 42 of file LLM.cpp.

Member Function Documentation

◆ build_lora_list_json()

json LLMProvider::build_lora_list_json ( const std::vector< LoraIdScalePath > & loras)
protectedvirtual

Build JSON for LoRA list result.

Parameters
lorasVector of LoRA adapters with their scales and paths
Returns
JSON object ready for lora_list_json

Definition at line 361 of file LLM.cpp.

◆ build_lora_weight_json()

json LLMProvider::build_lora_weight_json ( const std::vector< LoraIdScale > & loras)
protectedvirtual

Build JSON for LoRA weight configuration.

Parameters
lorasVector of LoRA adapters with their scales
Returns
JSON object ready for lora_weight_json

Definition at line 331 of file LLM.cpp.

Here is the caller graph for this function:

◆ debug()

virtual void LLMProvider::debug ( int debug_level)
pure virtual

Set debug level.

Parameters
debug_levelDebug verbosity level (0 = off, 1 = LlamaLib messages, 2 and higher = llama.cpp messages and more verbose)

Implemented in LLMService, and LLMService.

◆ debug_implementation()

virtual std::string LLMProvider::debug_implementation ( )
pure virtual

Implementation debugging.

Returns
"standalore" or "runtime_detection" according to the implementation

Implemented in LLMService, and LLMService.

◆ embedding_size()

virtual int LLMProvider::embedding_size ( )
pure virtual

Get embedding vector size.

Returns
Number of dimensions in embedding vectors

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ enable_reasoning()

virtual void LLMProvider::enable_reasoning ( bool reasoning)
inlinevirtual

enable reasoning

Parameters
reasoningwhether to enable reasoning

Reimplemented in LLMService.

Definition at line 301 of file LLM.h.

Here is the caller graph for this function:

◆ join_server()

virtual void LLMProvider::join_server ( )
pure virtual

Wait for server thread to complete.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ join_service()

virtual void LLMProvider::join_service ( )
pure virtual

Wait for service thread to complete.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ logging_callback()

virtual void LLMProvider::logging_callback ( CharArrayFn callback)
pure virtual

Set logging callback function.

Parameters
callbackFunction to receive log messages

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ logging_stop()

void LLMProvider::logging_stop ( )
virtual

Stop logging.

Definition at line 324 of file LLM.cpp.

◆ lora_list()

std::vector< LoraIdScalePath > LLMProvider::lora_list ( )
virtual

List available LoRA adapters.

Returns
Vector of available LoRA adapters with paths

Definition at line 391 of file LLM.cpp.

Here is the caller graph for this function:

◆ lora_list_json()

virtual std::string LLMProvider::lora_list_json ( )
pure virtual

List available LoRA adapters.

Returns
JSON string containing list of available LoRA adapters

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ lora_weight()

bool LLMProvider::lora_weight ( const std::vector< LoraIdScale > & loras)
virtual

Configure LoRA weights.

Parameters
lorasVector of LoRA adapters with their scales
Returns
true if configuration was successful, false otherwise

Definition at line 354 of file LLM.cpp.

Here is the caller graph for this function:

◆ lora_weight_json()

virtual std::string LLMProvider::lora_weight_json ( const json & data)
pure virtual

Configure LoRA weights with HTTP response support.

Parameters
dataJSON object with LoRA configuration
Returns
JSON response string

Protected method used internally for server-based LoRA configuration

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ parse_lora_list_json()

std::vector< LoraIdScalePath > LLMProvider::parse_lora_list_json ( const json & result)
protectedvirtual

Parse LoRA list result.

Parameters
resultJSON response from lora_list_json
Returns
Vector of available LoRA adapters with paths

Definition at line 373 of file LLM.cpp.

Here is the caller graph for this function:

◆ parse_lora_weight_json()

bool LLMProvider::parse_lora_weight_json ( const json & result)
protectedvirtual

Parse LoRA weight configuration result.

Parameters
resultJSON response from lora_weight_json
Returns
true if configuration was successful, false otherwise

Definition at line 342 of file LLM.cpp.

Here is the caller graph for this function:

◆ set_SSL()

virtual void LLMProvider::set_SSL ( const std::string & SSL_cert,
const std::string & SSL_key )
pure virtual

Configure SSL certificates.

Parameters
SSL_certSSL certificate
SSL_keySSL private key

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ start()

virtual void LLMProvider::start ( )
pure virtual

Start the LLM service.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ start_server()

virtual void LLMProvider::start_server ( const std::string & host = "0.0.0.0",
int port = -1,
const std::string & API_key = "" )
pure virtual

Start HTTP server.

Parameters
hostHost address to bind to (default: "0.0.0.0")
portPort number to bind to (0 for auto-select)
API_keyOptional API key for authentication

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ started()

virtual bool LLMProvider::started ( )
pure virtual

Check if service is started.

Returns
true if service is running, false otherwise

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ stop()

virtual void LLMProvider::stop ( )
pure virtual

Stop the LLM service.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

◆ stop_server()

virtual void LLMProvider::stop_server ( )
pure virtual

Stop HTTP server.

Implemented in LLMService, and LLMService.

Here is the caller graph for this function:

Member Data Documentation

◆ reasoning_enabled

bool LLMProvider::reasoning_enabled = false
protected

Whether reasoning is enabled.

Definition at line 353 of file LLM.h.


The documentation for this class was generated from the following files: