4using System.Collections.Generic;
7using System.Threading.Tasks;
8using UndreamAI.LlamaLib;
20 public class LLM : MonoBehaviour
22 #region Inspector Fields
24 [Tooltip(
"Show/hide advanced options in the inspector")]
28 [Tooltip(
"Enable remote server functionality to allow external connections")]
29 [LocalRemote, SerializeField]
private bool _remote =
false;
32 [Tooltip(
"Port to use for the remote LLM server")]
33 [Remote, SerializeField]
private int _port = 13333;
36 [Tooltip(
"API key required for server access (leave empty to disable authentication)")]
37 [SerializeField]
private string _APIKey =
"";
40 [Tooltip(
"SSL certificate for the remote LLM server")]
41 [SerializeField]
private string _SSLCert =
"";
44 [Tooltip(
"SSL key for the remote LLM server")]
45 [SerializeField]
private string _SSLKey =
"";
48 [Tooltip(
"Number of threads to use for processing (-1 = use all available threads)")]
49 [
LLM, SerializeField]
private int _numThreads = -1;
52 [Tooltip(
"Number of model layers to offload to GPU (0 = CPU only). Falls back to CPU if GPU unsupported")]
53 [
LLM, SerializeField]
private int _numGPULayers = 0;
56 [Tooltip(
"Number of prompts that can be processed in parallel (-1 = auto-detect from clients)")]
57 [
LLM, SerializeField]
private int _parallelPrompts = -1;
60 [Tooltip(
"Size of the prompt context in tokens (0 = use model's default context size)")]
61 [DynamicRange(
"minContextLength",
"maxContextLength",
false), Model, SerializeField]
private int _contextSize = 8192;
64 [Tooltip(
"Batch size for prompt processing (larger = more memory, potentially faster)")]
65 [ModelAdvanced, SerializeField]
private int _batchSize = 512;
68 [Tooltip(
"LLM model file path (.gguf format)")]
69 [ModelAdvanced, SerializeField]
private string _model =
"";
72 [Tooltip(
"Enable flash attention optimization (requires compatible model)")]
73 [ModelExtras, SerializeField]
private bool _flashAttention =
false;
76 [Tooltip(
"Enable LLM reasoning ('thinking' mode)")]
77 [ModelAdvanced, SerializeField]
private bool _reasoning =
false;
80 [Tooltip(
"LORA adapter model paths (.gguf format), separated by commas")]
81 [ModelAdvanced, SerializeField]
private string _lora =
"";
84 [Tooltip(
"Weights for LORA adapters, separated by commas (default: 1.0 for each)")]
85 [ModelAdvanced, SerializeField]
private string _loraWeights =
"";
88 [Tooltip(
"Persist this LLM GameObject across scene transitions")]
93 [Tooltip(
"True if this model only supports embeddings (no text generation)")]
94 private bool _embeddingsOnly =
false;
98 [Tooltip(
"Number of dimensions in embedding vectors (0 if not an embedding model)")]
99 private int _embeddingLength = 0;
102 #region Public Properties with Validation
120 get => _numGPULayers;
126 _numGPULayers = value;
133 get => _parallelPrompts;
138 LLMUnitySetup.LogError(
"parallelPrompts must be >= -1",
true);
139 _parallelPrompts = value;
152 _contextSize = value;
172 get => _flashAttention;
176 _flashAttention = value;
200 if (value == _lora)
return;
203 UpdateLoraManagerFromStrings();
213 if (value == _loraWeights)
return;
214 _loraWeights = value;
215 UpdateLoraManagerFromStrings();
226 if (value == _remote)
return;
238 if (value == _port)
return;
239 if (value < 0 || value > 65535)
240 LLMUnitySetup.LogError(
"port must be between 0 and 65535",
true);
252 if (value == _APIKey)
return;
265 if (value == _SSLCert)
return;
277 if (value == _SSLKey)
return;
284 #region Other Public Properties
286 public bool started {
get;
private set; } =
false;
289 public bool failed {
get;
private set; } =
false;
301 [Tooltip(
"Model architecture name (e.g., llama, mistral)")]
305 [Tooltip(
"True if this model only supports embeddings (no text generation)")]
309 [Tooltip(
"Number of dimensions in embedding vectors (0 if not an embedding model)")]
313 #region Private Fields
315 public int minContextLength = 0;
316 public int maxContextLength = 0;
318 private LlamaLib llmlib =
null;
320 protected LLMService _llmService;
321 private readonly List<LLMClient> clients =
new List<LLMClient>();
323 private static readonly
object staticLock =
new object();
326 string loraWeightsPre =
"";
330 #region Unity Lifecycle
351 if (!enabled)
return;
363 await StartServiceAsync();
368 public void OnDestroy()
376 #region Initialization
377 private void ValidateParameters()
381 LLMUnitySetup.LogError(
"Both SSL certificate and key must be provided together!",
true);
385 private string GetValidatedModelPath()
387 if (
string.IsNullOrEmpty(
model))
389 LLMUnitySetup.LogError(
"No model file provided!",
true);
392 string modelPath = GetLLMManagerAssetRuntime(
model);
393 if (!File.Exists(modelPath))
395 LLMUnitySetup.LogError($
"Model file not found: {modelPath}",
true);
400 private List<string> GetValidatedLoraPaths()
403 List<string> loraPaths =
new List<string>();
405 foreach (
string loraPath
in loraManager.
GetLoras())
407 string resolvedPath = GetLLMManagerAssetRuntime(loraPath);
408 if (!File.Exists(resolvedPath))
410 LLMUnitySetup.LogError($
"LORA file not found: {resolvedPath}",
true);
412 loraPaths.Add(resolvedPath);
417 private async Task StartServiceAsync()
424 ValidateParameters();
425 string modelPath = GetValidatedModelPath();
426 List<string> loraPaths = GetValidatedLoraPaths();
429 await CreateServiceAsync(modelPath, loraPaths);
431 catch (LLMUnityException ex)
433 LLMUnitySetup.LogError(ex.Message);
439 LLMUnitySetup.LogError($
"Failed to create LLM service: {ex.Message}");
447 LLMUnitySetup.Log($
"LLM service created successfully, using {architecture}");
451 private void CreateLib()
453 if (LLMUnitySetup.DebugMode <= LLMUnitySetup.DebugModeType.All)
456 if (LLMUnitySetup.DebugMode == LLMUnitySetup.DebugModeType.Debug) debugLevel = 5;
457 LlamaLib.Debug(debugLevel);
459 IL2CPP_Logging.LoggingCallback(LLMUnitySetup.Log);
461 LlamaLib.LoggingCallback(LLMUnitySetup.Log);
465 llmlib =
new LlamaLib(useGPU);
471 private void SetupServer()
475 if (!
string.IsNullOrEmpty(
SSLCert) && !
string.IsNullOrEmpty(
SSLKey))
477 LLMUnitySetup.Log(
"Enabling SSL for server");
486 private void RestartServer()
493 private async Task CreateServiceAsync(
string modelPath, List<string> loraPaths)
495 int numSlots = GetNumClients();
498 if (Application.platform == RuntimePlatform.Android &&
numThreads <= 0)
500 effectiveThreads = LLMUnitySetup.AndroidGetNumBigCores();
503 string processorType = SystemInfo.processorType;
508 IntPtr llmPtr = LLMService.CreateLLM(
509 llmlib, modelPath, numSlots, effectiveThreads,
numGPULayers,
514 string serverString =
"llamalib_**architecture**_server";
515 if (Application.platform == RuntimePlatform.WindowsEditor || Application.platform == RuntimePlatform.WindowsPlayer || Application.platform == RuntimePlatform.WindowsServer)
516 serverString =
"llamalib_win-x64_server.exe";
517 else if (Application.platform == RuntimePlatform.OSXEditor || Application.platform == RuntimePlatform.OSXPlayer || Application.platform == RuntimePlatform.OSXServer)
518 serverString = processorType.Contains(
"Intel") ?
"llamalib_osx-x64_server" :
"llamalib_osx-arm64_server";
519 else if (Application.platform == RuntimePlatform.LinuxEditor || Application.platform == RuntimePlatform.LinuxPlayer || Application.platform == RuntimePlatform.LinuxServer)
520 serverString =
"llamalib_linux-x64_server";
521 LLMUnitySetup.Log($
"Deploy server command: {serverString} {llmService.Command}");
537 #region Public Methods
562 if (downloadProgressCallback !=
null)
564 LLMManager.downloadProgressCallbacks.Add(downloadProgressCallback);
581 if (
model == path)
return;
584 _model = GetLLMManagerAsset(path);
585 if (
string.IsNullOrEmpty(
model))
return;
589 maxContextLength = modelEntry.contextLength;
595 SetEmbeddings(modelEntry.embeddingLength, modelEntry.embeddingOnly);
597 if (
contextSize == 0 && modelEntry.contextLength > 32768)
599 LLMUnitySetup.LogWarning($
"Model {path} has large context size ({modelEntry.contextLength}). Consider setting contextSize to ≤32768 to avoid excessive memory usage.");
603 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(
this);
627 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(
this);
638 if (llmClient ==
null)
643 clients.Add(llmClient);
658 #region LORA Management
664 public void SetLora(
string path,
float weight = 1f)
676 public void AddLora(
string path,
float weight = 1f)
679 loraManager.
Add(path, weight);
722 if (loraToWeight ==
null)
727 foreach (var entry
in loraToWeight)
729 loraManager.
SetWeight(entry.Key, entry.Value);
735 private void UpdateLoras()
737 (_lora, _loraWeights) = loraManager.ToStrings();
739 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(
this);
743 private void UpdateLoraManagerFromStrings()
748 private void ApplyLoras()
751 var loras =
new List<LoraIdScale>();
754 for (
int i = 0; i < weights.Length; i++)
756 loras.Add(
new LoraIdScale(i, weights[i]));
767 #region SSL Configuration
774 SSLCert = ReadFileContents(path);
783 SSLKey = ReadFileContents(path);
786 private string ReadFileContents(
string path)
788 if (
string.IsNullOrEmpty(path))
return "";
790 if (!File.Exists(path))
796 return File.ReadAllText(path);
801 #region Helper Methods
802 private int GetNumClients()
807 private void AssertStarted()
810 if (
failed) error =
"LLM service couldn't be created";
811 else if (!
started) error =
"LLM service not started";
812 if (error !=
null) LLMUnitySetup.LogError(error,
true);
815 private void AssertNotStarted()
817 if (
started) LLMUnitySetup.LogError(
"This method can't be called when the LLM has started",
true);
836 LLMUnitySetup.LogError($
"Error during LLM cleanup: {ex.Message}");
843 #region Static Asset Management
845 public static string GetLLMManagerAsset(
string path)
848 if (!EditorApplication.isPlaying)
return GetLLMManagerAssetEditor(path);
850 return GetLLMManagerAssetRuntime(path);
853 public static string GetLLMManagerAssetEditor(
string path)
855 if (
string.IsNullOrEmpty(path))
return path;
858 ModelEntry modelEntry = LLMManager.Get(path);
859 if (modelEntry !=
null)
return modelEntry.filename;
862 string assetPath = LLMUnitySetup.GetAssetPath(path);
863 string basePath = LLMUnitySetup.GetAssetPath();
865 if (File.Exists(assetPath) && LLMUnitySetup.IsSubPath(assetPath, basePath))
867 return LLMUnitySetup.RelativePath(assetPath, basePath);
871 if (File.Exists(assetPath))
873 string errorMessage = $
"The model {path} was loaded locally. You can include it in the build in one of these ways:";
874 errorMessage += $
"\n-Copy the model inside the StreamingAssets folder and use its StreamingAssets path";
875 errorMessage += $
"\n-Load the model with the model manager inside the LLM GameObject and use its filename";
876 LLMUnitySetup.LogWarning(errorMessage);
880 LLMUnitySetup.LogError($
"Model file not found: {path}");
886 public static string GetLLMManagerAssetRuntime(
string path)
888 if (
string.IsNullOrEmpty(path))
return path;
891 string managerPath = LLMManager.GetAssetPath(path);
892 if (!
string.IsNullOrEmpty(managerPath) && File.Exists(managerPath))
898 string assetPath = LLMUnitySetup.GetAssetPath(path);
899 if (File.Exists(assetPath))
return assetPath;
902 string downloadPath = LLMUnitySetup.GetDownloadAssetPath(path);
903 if (File.Exists(downloadPath))
return downloadPath;
Unity MonoBehaviour base class for LLM client functionality. Handles both local and remote LLM connec...
Class implementing the LLM model manager.
static void Unregister(LLM llm)
Removes a LLM from the model manager.
static ModelEntry Get(string path)
Gets the model entry for a model path.
static Task< bool > Setup()
Setup of the models.
static void Register(LLM llm)
Registers a LLM to the model manager.
Class implementing helper functions for setup and process management.
Unity MonoBehaviour component that manages a local LLM server instance. Handles model loading,...
int numGPULayers
Number of model layers to offload to GPU (0 = CPU only)
void SetLoraWeights(Dictionary< string, float > loraToWeight)
Changes the weights of multiple LORA adapters.
static async Task< bool > WaitUntilModelSetup(Action< float > downloadProgressCallback=null)
Waits asynchronously until model setup is complete.
void SetLoraWeight(string path, float weight)
Changes the weight of a specific LORA adapter.
LLMService llmService
The underlying LLM service instance.
int parallelPrompts
Number of prompts that can be processed in parallel (-1 = auto-detect from clients)
string architecture
Model architecture name (e.g., llama, mistral)
async void Awake()
Unity Awake method that initializes the LLM server. Sets up the model, starts the service,...
void SetReasoning(bool reasoning)
Enable LLM reasoning ("thinking" mode)
bool reasoning
Enable LLM reasoning ('thinking' mode)
bool advancedOptions
Show/hide advanced options in the inspector.
void RemoveLora(string path)
Removes a specific LORA adapter.
bool embeddingsOnly
True if this model only supports embeddings (no text generation)
static bool modelSetupFailed
True if model setup failed during initialization.
string lora
LORA adapter model paths (.gguf format), separated by commas.
int contextSize
Size of the prompt context in tokens (0 = use model's default context size)
void SetSSLCertFromFile(string path)
Sets the SSL certificate for secure server connections.
int numThreads
Number of threads to use for processing (-1 = use all available threads)
bool started
True if the LLM server has started and is ready to receive requests.
void SetModel(string path)
Sets the model file to use. Automatically configures context size and embedding settings.
int port
Port to use for the remote LLM server.
bool remote
Enable remote server functionality to allow external connections.
void SetLora(string path, float weight=1f)
Sets a single LORA adapter, replacing any existing ones.
void RemoveLoras()
Removes all LORA adapters.
string SSLKey
SSL key for the remote LLM server.
bool dontDestroyOnLoad
Persist this LLM GameObject across scene transitions.
void SetEmbeddings(int embeddingLength, bool embeddingsOnly)
Configure the LLM for embedding generation.
string model
LLM model file path (.gguf format)
string APIKey
API key required for server access (leave empty to disable authentication)
int embeddingLength
Number of dimensions in embedding vectors (0 if not an embedding model)
void Register(LLMClient llmClient)
Registers an LLMClient for slot management.
int batchSize
Batch size for prompt processing (larger = more memory, potentially faster)
bool flashAttention
Enable flash attention optimization (requires compatible model)
static bool modelSetupComplete
True if model setup completed (successfully or not)
void AddLora(string path, float weight=1f)
Adds a LORA adapter to the existing set.
void SetSSLKeyFromFile(string path)
Sets the SSL private key for secure server connections.
async Task WaitUntilReady()
Waits asynchronously until the LLM is ready to accept requests.
void Destroy()
Stops and cleans up the LLM service.
string loraWeights
Weights for LORA adapters, separated by commas (default: 1.0 for each)
bool failed
True if the LLM server failed to start.
List< LoraIdScalePath > ListLoras()
Gets a list of loaded LORA adapters.
string SSLCert
SSL certificate for the remote LLM server.
Class representing the LORA manager allowing to convert and retrieve LORA assets to string (for seria...
float[] GetWeights()
Gets the weights of the LORAs in the manager.
void Add(string path, float weight=1)
Adds a LORA with the defined weight.
void Remove(string path)
Removes a LORA based on its path.
void SetWeight(string path, float weight)
Modifies the weight of a LORA.
void FromStrings(string loraString, string loraWeightsString)
Converts strings with the lora paths and weights to entries in the LORA manager.
string[] GetLoras()
Gets the paths of the LORAs in the manager.
void Clear()
Clears the LORA assets.
Class implementing a LLM model entry.