|
| async void | Awake () |
| | The Unity Awake function that starts the LLM server.
|
| |
| async Task | WaitUntilReady () |
| | Allows to wait until the LLM is ready.
|
| |
| void | SetModel (string path) |
| | Allows to set the model used by the LLM. The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. Models supported are in .gguf format.
|
| |
| void | SetLora (string path, float weight=1) |
| | Allows to set a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. Models supported are in .gguf format.
|
| |
| void | AddLora (string path, float weight=1) |
| | Allows to add a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAssets folder that allows it to also work in the build. Models supported are in .gguf format.
|
| |
| void | RemoveLora (string path) |
| | Allows to remove a LORA model from the LLM. Models supported are in .gguf format.
|
| |
| void | RemoveLoras () |
| | Allows to remove all LORA models from the LLM.
|
| |
| void | SetLoraWeight (string path, float weight) |
| | Allows to change the weight (scale) of a LORA model in the LLM.
|
| |
| void | SetLoraWeights (Dictionary< string, float > loraToWeight) |
| | Allows to change the weights (scale) of the LORA models in the LLM.
|
| |
| void | UpdateLoras () |
| |
| void | SetTemplate (string templateName, bool setDirty=true) |
| | Set the chat template for the LLM.
|
| |
| void | SetEmbeddings (int embeddingLength, bool embeddingsOnly) |
| | Set LLM Embedding parameters.
|
| |
| void | SetSSLCert (string path) |
| | Use a SSL certificate for the LLM server.
|
| |
| void | SetSSLKey (string path) |
| | Use a SSL key for the LLM server.
|
| |
| string | GetTemplate () |
| | Returns the chat template of the LLM.
|
| |
| int | Register (LLMCaller llmCaller) |
| | Registers a local LLMCaller object. This allows to bind the LLMCaller "client" to a specific slot of the LLM.
|
| |
| void | Update () |
| | The Unity Update function. It is used to retrieve the LLM replies.
|
| |
| async Task< string > | Tokenize (string json) |
| | Tokenises the provided query.
|
| |
| async Task< string > | Detokenize (string json) |
| | Detokenises the provided query.
|
| |
| async Task< string > | Embeddings (string json) |
| | Computes the embeddings of the provided query.
|
| |
| void | ApplyLoras () |
| | Sets the lora scale, only works after the LLM service has started.
|
| |
| async Task< List< LoraWeightResult > > | ListLoras () |
| | Gets a list of the lora adapters.
|
| |
| async Task< string > | Slot (string json) |
| | Allows to save / restore the state of a slot.
|
| |
| async Task< string > | Completion (string json, Callback< string > streamCallback=null) |
| | Allows to use the chat and completion functionality of the LLM.
|
| |
| void | CancelRequest (int id_slot) |
| | Allows to cancel the requests in a specific slot of the LLM.
|
| |
| void | Destroy () |
| | Stops and destroys the LLM.
|
| |
| void | OnDestroy () |
| | The Unity OnDestroy function called when the onbject is destroyed. The function StopProcess is called to stop the LLM server.
|
| |
|
| bool | advancedOptions = false |
| | show/hide advanced options in the GameObject
|
| |
| bool | remote = false |
| | enable remote server functionality
|
| |
| int | port = 13333 |
| | port to use for the remote LLM server
|
| |
| int | numThreads = -1 |
| | number of threads to use (-1 = all)
|
| |
| int | numGPULayers = 0 |
| | number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported, the LLM will fall back to the CPU
|
| |
| bool | debug = false |
| | log the output of the LLM in the Unity Editor.
|
| |
| int | parallelPrompts = -1 |
| | number of prompts that can happen in parallel (-1 = number of LLMCaller objects)
|
| |
| bool | dontDestroyOnLoad = true |
| | do not destroy the LLM GameObject when loading a new Scene.
|
| |
| int | contextSize = 8192 |
| | Size of the prompt context (0 = context size of the model). This is the number of tokens the model can take as input when generating responses.
|
| |
| int | batchSize = 512 |
| | Batch size for prompt processing.
|
| |
| string | model = "" |
| | LLM model to use (.gguf format)
|
| |
| string | chatTemplate = ChatTemplate.DefaultTemplate |
| | Chat template for the model.
|
| |
| string | lora = "" |
| | LORA models to use (.gguf format)
|
| |
| string | loraWeights = "" |
| | the weights of the LORA models being used.
|
| |
| bool | flashAttention = false |
| | enable use of flash attention
|
| |
| string | APIKey |
| | API key to use for the server.
|
| |
| string | SSLCertPath = "" |
| |
| string | SSLKeyPath = "" |
| |
Class implementing the LLM server.
Definition at line 18 of file LLM.cs.