![]() |
LlamaLib
v2.0.2
Cross-platform library for local LLMs
|
Abstract class for LLM service providers. More...
#include <LLM.h>
Public Member Functions | |
| virtual | ~LLMProvider () |
| Virtual destructor. | |
| virtual bool | lora_weight (const std::vector< LoraIdScale > &loras) |
| Configure LoRA weights. | |
| virtual std::string | lora_weight_json (const json &data)=0 |
| Configure LoRA weights with HTTP response support. | |
| virtual std::vector< LoraIdScalePath > | lora_list () |
| List available LoRA adapters. | |
| virtual std::string | lora_list_json ()=0 |
| List available LoRA adapters. | |
| virtual void | enable_reasoning (bool reasoning) |
| enable reasoning | |
| virtual void | debug (int debug_level)=0 |
| Set debug level. | |
| virtual void | logging_callback (CharArrayFn callback)=0 |
| Set logging callback function. | |
| virtual void | logging_stop () |
| Stop logging. | |
| virtual void | start ()=0 |
| Start the LLM service. | |
| virtual bool | started ()=0 |
| Check if service is started. | |
| virtual void | stop ()=0 |
| Stop the LLM service. | |
| virtual void | start_server (const std::string &host="0.0.0.0", int port=-1, const std::string &API_key="")=0 |
| Start HTTP server. | |
| virtual void | stop_server ()=0 |
| Stop HTTP server. | |
| virtual void | join_service ()=0 |
| Wait for service thread to complete. | |
| virtual void | join_server ()=0 |
| Wait for server thread to complete. | |
| virtual void | set_SSL (const std::string &SSL_cert, const std::string &SSL_key)=0 |
| Configure SSL certificates. | |
| virtual int | embedding_size ()=0 |
| Get embedding vector size. | |
| virtual std::string | debug_implementation ()=0 |
| Implementation debugging. | |
Public Member Functions inherited from LLMLocal | |
| virtual int | get_next_available_slot ()=0 |
| Get an available processing slot. | |
| virtual std::string | save_slot (int id_slot, const std::string &filepath) |
| Save slot state to file. | |
| virtual std::string | load_slot (int id_slot, const std::string &filepath) |
| Load slot state from file. | |
| virtual void | cancel (int id_slot)=0 |
| Cancel request. | |
| virtual std::string | slot_json (const json &data)=0 |
| Manage slots with HTTP response support. | |
Public Member Functions inherited from LLM | |
| virtual | ~LLM ()=default |
| Virtual destructor. | |
| virtual std::vector< int > | tokenize (const std::string &query) |
| Tokenize text. | |
| virtual std::string | tokenize_json (const json &data)=0 |
| Tokenize input (override) | |
| virtual std::string | detokenize (const std::vector< int32_t > &tokens) |
| Convert tokens to text. | |
| virtual std::string | detokenize_json (const json &data)=0 |
| Convert tokens back to text. | |
| virtual std::vector< float > | embeddings (const std::string &query) |
| Generate embeddings. | |
| virtual std::string | embeddings_json (const json &data)=0 |
| Generate embeddings with HTTP response support. | |
| virtual void | set_completion_params (json completion_params_) |
| Set completion parameters. | |
| virtual std::string | get_completion_params () |
| Get current completion parameters. | |
| virtual std::string | completion (const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false) |
| Generate completion. | |
| virtual std::string | completion_json (const json &data, CharArrayFn callback, bool callbackWithJSON)=0 |
| Generate text completion. | |
| virtual void | set_grammar (std::string grammar_) |
| Set grammar for constrained generation. | |
| virtual std::string | get_grammar () |
| Get current grammar specification. | |
| virtual std::string | apply_template (const json &messages) |
| Apply template to messages. | |
| virtual std::string | apply_template_json (const json &data)=0 |
| Apply a chat template to message data. | |
Protected Member Functions | |
| virtual bool | parse_lora_weight_json (const json &result) |
| Parse LoRA weight configuration result. | |
| virtual json | build_lora_weight_json (const std::vector< LoraIdScale > &loras) |
| Build JSON for LoRA weight configuration. | |
| virtual std::vector< LoraIdScalePath > | parse_lora_list_json (const json &result) |
| Parse LoRA list result. | |
| virtual json | build_lora_list_json (const std::vector< LoraIdScalePath > &loras) |
| Build JSON for LoRA list result. | |
Protected Member Functions inherited from LLMLocal | |
| virtual std::string | slot (int id_slot, const std::string &action, const std::string &filepath) |
| Perform slot operation. | |
| virtual json | build_slot_json (int id_slot, const std::string &action, const std::string &filepath) |
| Build JSON for slot operations. | |
| virtual std::string | parse_slot_json (const json &result) |
| Parse slot operation result. | |
Protected Member Functions inherited from LLM | |
| virtual json | build_apply_template_json (const json &messages) |
| Build JSON for template application. | |
| virtual std::string | parse_apply_template_json (const json &result) |
| Parse template application result. | |
| virtual json | build_tokenize_json (const std::string &query) |
| Build JSON for tokenization. | |
| virtual std::vector< int > | parse_tokenize_json (const json &result) |
| Parse tokenization result. | |
| virtual json | build_detokenize_json (const std::vector< int32_t > &tokens) |
| Build JSON for detokenization. | |
| virtual std::string | parse_detokenize_json (const json &result) |
| Parse detokenization result. | |
| virtual json | build_embeddings_json (const std::string &query) |
| Build JSON for embeddings generation. | |
| virtual std::vector< float > | parse_embeddings_json (const json &result) |
| Parse embeddings result. | |
| virtual json | build_completion_json (const std::string &prompt, int id_slot=-1) |
| Build JSON for completion generation. | |
| virtual std::string | parse_completion_json (const json &result) |
| Parse completion result. | |
Protected Attributes | |
| bool | reasoning_enabled = false |
| Whether reasoning is enabled. | |
Additional Inherited Members | |
Static Public Member Functions inherited from LLM | |
| static bool | has_gpu_layers (const std::string &command) |
| Check if command line arguments specify GPU layers. | |
| static std::string | LLM_args_to_command (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={}) |
| Convert LLM parameters to command line arguments. | |
Public Attributes inherited from LLM | |
| int32_t | n_keep = 0 |
| Number of tokens to keep from the beginning of the context. | |
| std::string | grammar = "" |
| Grammar specification in GBNF format or JSON schema. | |
| json | completion_params |
| JSON object containing completion parameters. | |
Abstract class for LLM service providers.
Extends LLMLocal with server functionality, debugging, logging, and advanced features like LoRA management
|
protectedvirtual |
|
protectedvirtual |
|
pure virtual |
Set debug level.
| debug_level | Debug verbosity level (0 = off, 1 = LlamaLib messages, 2 and higher = llama.cpp messages and more verbose) |
Implemented in LLMService, and LLMService.
|
pure virtual |
Implementation debugging.
Implemented in LLMService, and LLMService.
|
pure virtual |
Get embedding vector size.
Implemented in LLMService, and LLMService.
|
inlinevirtual |
enable reasoning
| reasoning | whether to enable reasoning |
Reimplemented in LLMService.
Definition at line 301 of file LLM.h.
|
pure virtual |
Wait for server thread to complete.
Implemented in LLMService, and LLMService.
|
pure virtual |
Wait for service thread to complete.
Implemented in LLMService, and LLMService.
|
pure virtual |
Set logging callback function.
| callback | Function to receive log messages |
Implemented in LLMService, and LLMService.
|
virtual |
|
pure virtual |
List available LoRA adapters.
Implemented in LLMService, and LLMService.
|
virtual |
|
pure virtual |
Configure LoRA weights with HTTP response support.
| data | JSON object with LoRA configuration |
Protected method used internally for server-based LoRA configuration
Implemented in LLMService, and LLMService.
|
protectedvirtual |
|
protectedvirtual |
|
pure virtual |
Configure SSL certificates.
| SSL_cert | SSL certificate |
| SSL_key | SSL private key |
Implemented in LLMService, and LLMService.
|
pure virtual |
Start the LLM service.
Implemented in LLMService, and LLMService.
|
pure virtual |
Start HTTP server.
| host | Host address to bind to (default: "0.0.0.0") |
| port | Port number to bind to (0 for auto-select) |
| API_key | Optional API key for authentication |
Implemented in LLMService, and LLMService.
|
pure virtual |
Check if service is started.
Implemented in LLMService, and LLMService.
|
pure virtual |
Stop the LLM service.
Implemented in LLMService, and LLMService.
|
pure virtual |
Stop HTTP server.
Implemented in LLMService, and LLMService.
|
protected |