|
async void | Awake () |
| The Unity Awake function that starts the LLM server.
|
|
async Task | WaitUntilReady () |
| Allows to wait until the LLM is ready.
|
|
void | SetModel (string path) |
| Allows to set the model used by the LLM. The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. Models supported are in .gguf format.
|
|
void | SetLora (string path, float weight=1) |
| Allows to set a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. Models supported are in .gguf format.
|
|
void | AddLora (string path, float weight=1) |
| Allows to add a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. Models supported are in .gguf format.
|
|
void | RemoveLora (string path) |
| Allows to remove a LORA model from the LLM. Models supported are in .gguf format.
|
|
void | RemoveLoras () |
| Allows to remove all LORA models from the LLM.
|
|
void | SetLoraWeight (string path, float weight) |
| Allows to change the weight (scale) of a LORA model in the LLM.
|
|
void | SetLoraWeights (Dictionary< string, float > loraToWeight) |
| Allows to change the weights (scale) of the LORA models in the LLM.
|
|
void | UpdateLoras () |
|
void | SetTemplate (string templateName, bool setDirty=true) |
| Set the chat template for the LLM.
|
|
void | SetEmbeddings (int embeddingLength, bool embeddingsOnly) |
| Set LLM Embedding parameters.
|
|
void | SetSSLCert (string path) |
| Use a SSL certificate for the LLM server.
|
|
void | SetSSLKey (string path) |
| Use a SSL key for the LLM server.
|
|
string | GetTemplate () |
| Returns the chat template of the LLM.
|
|
int | Register (LLMCaller llmCaller) |
| Registers a local LLMCaller object. This allows to bind the LLMCaller "client" to a specific slot of the LLM.
|
|
void | Update () |
| The Unity Update function. It is used to retrieve the LLM replies.
|
|
async Task< string > | Tokenize (string json) |
| Tokenises the provided query.
|
|
async Task< string > | Detokenize (string json) |
| Detokenises the provided query.
|
|
async Task< string > | Embeddings (string json) |
| Computes the embeddings of the provided query.
|
|
void | ApplyLoras () |
| Sets the lora scale, only works after the LLM service has started.
|
|
async Task< List< LoraWeightResult > > | ListLoras () |
| Gets a list of the lora adapters.
|
|
async Task< string > | Slot (string json) |
| Allows to save / restore the state of a slot.
|
|
async Task< string > | Completion (string json, Callback< string > streamCallback=null) |
| Allows to use the chat and completion functionality of the LLM.
|
|
async Task | SetBasePrompt (string base_prompt) |
|
void | CancelRequest (int id_slot) |
| Allows to cancel the requests in a specific slot of the LLM.
|
|
void | Destroy () |
| Stops and destroys the LLM.
|
|
void | OnDestroy () |
| The Unity OnDestroy function called when the onbject is destroyed. The function StopProcess is called to stop the LLM server.
|
|
|
bool | advancedOptions = false |
| toggle to show/hide advanced options in the GameObject
|
|
bool | remote = false |
| toggle to enable remote server functionality
|
|
int | port = 13333 |
| port to use for the LLM server
|
|
int | numThreads = -1 |
| number of threads to use (-1 = all)
|
|
int | numGPULayers = 0 |
| number of model layers to offload to the GPU (0 = GPU not used). Use a large number i.e. >30 to utilise the GPU as much as possible. If the user's GPU is not supported, the LLM will fall back to the CPU
|
|
bool | debug = false |
| select to log the output of the LLM in the Unity Editor.
|
|
int | parallelPrompts = -1 |
| number of prompts that can happen in parallel (-1 = number of LLMCaller objects)
|
|
bool | dontDestroyOnLoad = true |
| select to not destroy the LLM GameObject when loading a new Scene.
|
|
int | contextSize = 8192 |
| Size of the prompt context (0 = context size of the model). This is the number of tokens the model can take as input when generating responses.
|
|
int | batchSize = 512 |
| Batch size for prompt processing.
|
|
string | basePrompt = "" |
| a base prompt to use as a base for all LLMCaller objects
|
|
string | model = "" |
| the LLM model to use. Models with .gguf format are allowed.
|
|
string | chatTemplate = ChatTemplate.DefaultTemplate |
| Chat template used for the model.
|
|
string | lora = "" |
| the paths of the LORA models being used (relative to the Assets/StreamingAssets folder). Models with .gguf format are allowed.
|
|
string | loraWeights = "" |
| the weights of the LORA models being used.
|
|
bool | flashAttention = false |
| enable use of flash attention
|
|
string | APIKey |
| API key to use for the server (optional)
|
|
string | SSLCertPath = "" |
|
string | SSLKeyPath = "" |
|
Class implementing the LLM server.
Definition at line 18 of file LLM.cs.