Abstract class for local LLM operations with slot management. More...

#include <LLM.h>

Inheritance diagram for LLMLocal:

Public Member Functions
virtual int	get_next_available_slot ()=0
	Get an available processing slot.

virtual std::string	save_slot (int id_slot, const std::string &filepath)
	Save slot state to file.

virtual std::string	load_slot (int id_slot, const std::string &filepath)
	Load slot state from file.

virtual void	cancel (int id_slot)=0
	Cancel request.

virtual std::string	slot_json (const json &data)=0
	Manage slots with HTTP response support.

Public Member Functions inherited from LLM
virtual	~LLM ()=default
	Virtual destructor.

virtual std::vector< int >	tokenize (const std::string &query)
	Tokenize text.

virtual std::string	tokenize_json (const json &data)=0
	Tokenize input (override)

virtual std::string	detokenize (const std::vector< int32_t > &tokens)
	Convert tokens to text.

virtual std::string	detokenize_json (const json &data)=0
	Convert tokens back to text.

virtual std::vector< float >	embeddings (const std::string &query)
	Generate embeddings.

virtual std::string	embeddings_json (const json &data)=0
	Generate embeddings with HTTP response support.

virtual void	set_completion_params (json completion_params_)
	Set completion parameters.

virtual std::string	get_completion_params ()
	Get current completion parameters.

virtual std::string	completion (const std::string &prompt, CharArrayFn callback=nullptr, int id_slot=-1, bool return_response_json=false)
	Generate completion.

virtual std::string	completion_json (const json &data, CharArrayFn callback, bool callbackWithJSON)=0
	Generate text completion.

virtual void	set_grammar (std::string grammar_)
	Set grammar for constrained generation.

virtual std::string	get_grammar ()
	Get current grammar specification.

virtual std::string	apply_template (const json &messages)
	Apply template to messages.

virtual std::string	apply_template_json (const json &data)=0
	Apply a chat template to message data.

Protected Member Functions
virtual std::string	slot (int id_slot, const std::string &action, const std::string &filepath)
	Perform slot operation.

virtual json	build_slot_json (int id_slot, const std::string &action, const std::string &filepath)
	Build JSON for slot operations.

virtual std::string	parse_slot_json (const json &result)
	Parse slot operation result.

Protected Member Functions inherited from LLM
virtual json	build_apply_template_json (const json &messages)
	Build JSON for template application.

virtual std::string	parse_apply_template_json (const json &result)
	Parse template application result.

virtual json	build_tokenize_json (const std::string &query)
	Build JSON for tokenization.

virtual std::vector< int >	parse_tokenize_json (const json &result)
	Parse tokenization result.

virtual json	build_detokenize_json (const std::vector< int32_t > &tokens)
	Build JSON for detokenization.

virtual std::string	parse_detokenize_json (const json &result)
	Parse detokenization result.

virtual json	build_embeddings_json (const std::string &query)
	Build JSON for embeddings generation.

virtual std::vector< float >	parse_embeddings_json (const json &result)
	Parse embeddings result.

virtual json	build_completion_json (const std::string &prompt, int id_slot=-1)
	Build JSON for completion generation.

virtual std::string	parse_completion_json (const json &result)
	Parse completion result.

Additional Inherited Members
Static Public Member Functions inherited from LLM
static bool	has_gpu_layers (const std::string &command)
	Check if command line arguments specify GPU layers.

static std::string	LLM_args_to_command (const std::string &model_path, int num_slots=1, int num_threads=-1, int num_GPU_layers=0, bool flash_attention=false, int context_size=4096, int batch_size=2048, bool embedding_only=false, const std::vector< std::string > &lora_paths={})
	Convert LLM parameters to command line arguments.

Public Attributes inherited from LLM
int32_t	n_keep = 0
	Number of tokens to keep from the beginning of the context.

std::string	grammar = ""
	Grammar specification in GBNF format or JSON schema.

json	completion_params
	JSON object containing completion parameters.

Detailed Description

Abstract class for local LLM operations with slot management.

Extends the base LLM class with local-specific functionality including slot management for concurrent requests and state persistence

Definition at line 221 of file LLM.h.

Member Function Documentation

◆ build_slot_json()

json LLMLocal::build_slot_json	(	int	id_slot,
		const std::string &	action,
		const std::string &	filepath )

protectedvirtual

Build JSON for slot operations.

Parameters

id_slot	Slot ID to operate on
action	Action to perform ("save" or "restore")
filepath	Path to save/load slot state

Returns: JSON object ready for slot_json

Definition at line 296 of file LLM.cpp.

Here is the caller graph for this function:

◆ cancel()

virtual void LLMLocal::cancel ( int id_slot )

pure virtual

Cancel request.

Parameters

id_slot Slot ID to cancel

Implemented in LLMAgent, LLMClient, LLMService, and LLMService.

Here is the caller graph for this function:

◆ get_next_available_slot()

virtual int LLMLocal::get_next_available_slot ( )

pure virtual

Get an available processing slot.

Returns: Available slot ID, or -1 if none determined

Implemented in LLMAgent, LLMClient, LLMService, and LLMService.

Here is the caller graph for this function:

◆ load_slot()

virtual std::string LLMLocal::load_slot	(	int	id_slot,
		const std::string &	filepath )

inlinevirtual

Load slot state from file.

Parameters

id_slot	Slot ID to restore
filepath	Path to state file

Returns: Operation result string

Definition at line 238 of file LLM.h.

Here is the caller graph for this function:

◆ parse_slot_json()

std::string LLMLocal::parse_slot_json ( const json & result )

protectedvirtual

Parse slot operation result.

Parameters

result JSON response from slot_json

Returns: Operation result string

Definition at line 305 of file LLM.cpp.

Here is the caller graph for this function:

◆ save_slot()

virtual std::string LLMLocal::save_slot	(	int	id_slot,
		const std::string &	filepath )

inlinevirtual

Save slot state to file.

Parameters

id_slot	Slot ID to save
filepath	Path to save state file

Returns: Operation result string

Definition at line 232 of file LLM.h.

Here is the caller graph for this function:

◆ slot()

std::string LLMLocal::slot	(	int	id_slot,
		const std::string &	action,
		const std::string &	filepath )

protectedvirtual

Perform slot operation.

Parameters

id_slot	Slot ID to operate on
action	Action to perform ("save" or "restore")
filepath	Path for save/load operation

Returns: Operation result string

Definition at line 317 of file LLM.cpp.

◆ slot_json()

virtual std::string LLMLocal::slot_json ( const json & data )

pure virtual

Manage slots with HTTP response support.

Parameters

data	JSON object with slot operation

Returns: JSON response string

Protected method used internally for server-based slot management

Implemented in LLMAgent, LLMClient, LLMService, and LLMService.

Here is the caller graph for this function:

The documentation for this class was generated from the following files:

include/LLM.h
src/LLM.cpp

Public Member Functions

Protected Member Functions

Additional Inherited Members

Detailed Description

Member Function Documentation

◆ build_slot_json()

◆ cancel()

◆ get_next_available_slot()

◆ load_slot()

◆ parse_slot_json()

◆ save_slot()

◆ slot()

◆ slot_json()