4using System.Collections.Generic;
7using System.Threading.Tasks;
8using UndreamAI.LlamaLib;
20 public class LLM : MonoBehaviour
22 #region Inspector Fields
24 [Tooltip(
"Show/hide advanced options in the inspector")]
28 [Tooltip(
"Enable remote server functionality to allow external connections")]
29 [LocalRemote, SerializeField]
private bool _remote =
false;
32 [Tooltip(
"Port to use for the remote LLM server")]
33 [Remote, SerializeField]
private int _port = 13333;
36 [Tooltip(
"API key required for server access (leave empty to disable authentication)")]
37 [SerializeField]
private string _APIKey =
"";
40 [Tooltip(
"SSL certificate for the remote LLM server")]
41 [SerializeField]
private string _SSLCert =
"";
44 [Tooltip(
"SSL key for the remote LLM server")]
45 [SerializeField]
private string _SSLKey =
"";
48 [Tooltip(
"Number of threads to use for processing (-1 = use all available threads)")]
49 [
LLM, SerializeField]
private int _numThreads = -1;
52 [Tooltip(
"Number of model layers to offload to GPU (0 = CPU only). Falls back to CPU if GPU unsupported")]
53 [
LLM, SerializeField]
private int _numGPULayers = 0;
56 [Tooltip(
"Number of prompts that can be processed in parallel (-1 = auto-detect from clients)")]
57 [
LLM, SerializeField]
private int _parallelPrompts = -1;
60 [Tooltip(
"Size of the prompt context in tokens (0 = use model's default context size)")]
61 [DynamicRange(
"minContextLength",
"maxContextLength",
false), Model, SerializeField]
private int _contextSize = 8192;
64 [Tooltip(
"Batch size for prompt processing (larger = more memory, potentially faster)")]
65 [ModelAdvanced, SerializeField]
private int _batchSize = 512;
68 [Tooltip(
"LLM model file path (.gguf format)")]
69 [ModelAdvanced, SerializeField]
private string _model =
"";
72 [Tooltip(
"Enable flash attention optimization (requires compatible model)")]
73 [ModelExtras, SerializeField]
private bool _flashAttention =
false;
76 [Tooltip(
"Enable LLM reasoning ('thinking' mode)")]
77 [ModelAdvanced, SerializeField]
private bool _reasoning =
false;
80 [Tooltip(
"LORA adapter model paths (.gguf format), separated by commas")]
81 [ModelAdvanced, SerializeField]
private string _lora =
"";
84 [Tooltip(
"Weights for LORA adapters, separated by commas (default: 1.0 for each)")]
85 [ModelAdvanced, SerializeField]
private string _loraWeights =
"";
88 [Tooltip(
"Persist this LLM GameObject across scene transitions")]
93 [Tooltip(
"True if this model only supports embeddings (no text generation)")]
94 private bool _embeddingsOnly =
false;
98 [Tooltip(
"Number of dimensions in embedding vectors (0 if not an embedding model)")]
99 private int _embeddingLength = 0;
102 #region Public Properties with Validation
120 get => _numGPULayers;
126 _numGPULayers = value;
133 get => _parallelPrompts;
138 LLMUnitySetup.LogError(
"parallelPrompts must be >= -1",
true);
139 _parallelPrompts = value;
152 _contextSize = value;
172 get => _flashAttention;
176 _flashAttention = value;
200 if (value == _lora)
return;
203 UpdateLoraManagerFromStrings();
213 if (value == _loraWeights)
return;
214 _loraWeights = value;
215 UpdateLoraManagerFromStrings();
226 if (value == _remote)
return;
238 if (value == _port)
return;
239 if (value < 0 || value > 65535)
240 LLMUnitySetup.LogError(
"port must be between 0 and 65535",
true);
252 if (value == _APIKey)
return;
265 if (value == _SSLCert)
return;
277 if (value == _SSLKey)
return;
284 #region Other Public Properties
286 public bool started {
get;
private set; } =
false;
289 public bool failed {
get;
private set; } =
false;
301 [Tooltip(
"Model architecture name (e.g., llama, mistral)")]
305 [Tooltip(
"True if this model only supports embeddings (no text generation)")]
309 [Tooltip(
"Number of dimensions in embedding vectors (0 if not an embedding model)")]
313 #region Private Fields
315 public int minContextLength = 0;
316 public int maxContextLength = 0;
318 private LlamaLib llmlib =
null;
320 protected LLMService _llmService;
321 private readonly List<LLMClient> clients =
new List<LLMClient>();
323 private static readonly
object staticLock =
new object();
326 string loraWeightsPre =
"";
330 #region Unity Lifecycle
351 if (!enabled)
return;
363 await StartServiceAsync();
368 public void OnDestroy()
376 #region Initialization
377 private void ValidateParameters()
381 LLMUnitySetup.LogError(
"Both SSL certificate and key must be provided together!",
true);
385 private string GetValidatedModelPath()
387 if (
string.IsNullOrEmpty(
model))
389 LLMUnitySetup.LogError(
"No model file provided!",
true);
392 string modelPath = GetLLMManagerAssetRuntime(
model);
393 if (!File.Exists(modelPath))
395 LLMUnitySetup.LogError($
"Model file not found: {modelPath}",
true);
400 private List<string> GetValidatedLoraPaths()
403 List<string> loraPaths =
new List<string>();
405 foreach (
string loraPath
in loraManager.
GetLoras())
407 string resolvedPath = GetLLMManagerAssetRuntime(loraPath);
408 if (!File.Exists(resolvedPath))
410 LLMUnitySetup.LogError($
"LORA file not found: {resolvedPath}",
true);
412 loraPaths.Add(resolvedPath);
417 private async Task StartServiceAsync()
424 ValidateParameters();
425 string modelPath = GetValidatedModelPath();
426 List<string> loraPaths = GetValidatedLoraPaths();
429 await CreateServiceAsync(modelPath, loraPaths);
431 catch (LLMUnityException ex)
433 LLMUnitySetup.LogError(ex.Message);
439 LLMUnitySetup.LogError($
"Failed to create LLM service: {ex.Message}");
447 LLMUnitySetup.Log($
"LLM service created successfully, using {architecture}");
451 private void CreateLib()
453 if (LLMUnitySetup.DebugMode <= LLMUnitySetup.DebugModeType.All)
455 LlamaLib.Debug(LLMUnitySetup.DebugModeType.All - LLMUnitySetup.DebugMode + 1);
457 IL2CPP_Logging.LoggingCallback(LLMUnitySetup.Log);
459 LlamaLib.LoggingCallback(LLMUnitySetup.Log);
463 llmlib =
new LlamaLib(useGPU);
469 private void SetupServer()
473 if (!
string.IsNullOrEmpty(
SSLCert) && !
string.IsNullOrEmpty(
SSLKey))
475 LLMUnitySetup.Log(
"Enabling SSL for server");
484 private void RestartServer()
491 private async Task CreateServiceAsync(
string modelPath, List<string> loraPaths)
493 int numSlots = GetNumClients();
496 if (Application.platform == RuntimePlatform.Android &&
numThreads <= 0)
498 effectiveThreads = LLMUnitySetup.AndroidGetNumBigCores();
501 string processorType = SystemInfo.processorType;
506 IntPtr llmPtr = LLMService.CreateLLM(
507 llmlib, modelPath, numSlots, effectiveThreads,
numGPULayers,
512 string serverString =
"llamalib_**architecture**_server";
513 if (Application.platform == RuntimePlatform.WindowsEditor || Application.platform == RuntimePlatform.WindowsPlayer || Application.platform == RuntimePlatform.WindowsServer)
514 serverString =
"llamalib_win-x64_server.exe";
515 else if (Application.platform == RuntimePlatform.OSXEditor || Application.platform == RuntimePlatform.OSXPlayer || Application.platform == RuntimePlatform.OSXServer)
516 serverString = processorType.Contains(
"Intel") ?
"llamalib_osx-x64_server" :
"llamalib_osx-arm64_server";
517 else if (Application.platform == RuntimePlatform.LinuxEditor || Application.platform == RuntimePlatform.LinuxPlayer || Application.platform == RuntimePlatform.LinuxServer)
518 serverString =
"llamalib_linux-x64_server";
519 LLMUnitySetup.Log($
"Deploy server command: {serverString} {llmService.Command}");
535 #region Public Methods
560 if (downloadProgressCallback !=
null)
562 LLMManager.downloadProgressCallbacks.Add(downloadProgressCallback);
579 if (
model == path)
return;
582 _model = GetLLMManagerAsset(path);
583 if (
string.IsNullOrEmpty(
model))
return;
587 maxContextLength = modelEntry.contextLength;
593 SetEmbeddings(modelEntry.embeddingLength, modelEntry.embeddingOnly);
595 if (
contextSize == 0 && modelEntry.contextLength > 32768)
597 LLMUnitySetup.LogWarning($
"Model {path} has large context size ({modelEntry.contextLength}). Consider setting contextSize to ≤32768 to avoid excessive memory usage.");
601 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(
this);
625 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(
this);
636 if (llmClient ==
null)
641 clients.Add(llmClient);
656 #region LORA Management
662 public void SetLora(
string path,
float weight = 1f)
674 public void AddLora(
string path,
float weight = 1f)
677 loraManager.
Add(path, weight);
720 if (loraToWeight ==
null)
725 foreach (var entry
in loraToWeight)
727 loraManager.
SetWeight(entry.Key, entry.Value);
733 private void UpdateLoras()
735 (_lora, _loraWeights) = loraManager.ToStrings();
737 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(
this);
741 private void UpdateLoraManagerFromStrings()
746 private void ApplyLoras()
749 var loras =
new List<LoraIdScale>();
752 for (
int i = 0; i < weights.Length; i++)
754 loras.Add(
new LoraIdScale(i, weights[i]));
765 #region SSL Configuration
772 SSLCert = ReadFileContents(path);
781 SSLKey = ReadFileContents(path);
784 private string ReadFileContents(
string path)
786 if (
string.IsNullOrEmpty(path))
return "";
788 if (!File.Exists(path))
794 return File.ReadAllText(path);
799 #region Helper Methods
800 private int GetNumClients()
805 private void AssertStarted()
808 if (
failed) error =
"LLM service couldn't be created";
809 else if (!
started) error =
"LLM service not started";
810 if (error !=
null) LLMUnitySetup.LogError(error,
true);
813 private void AssertNotStarted()
815 if (
started) LLMUnitySetup.LogError(
"This method can't be called when the LLM has started",
true);
834 LLMUnitySetup.LogError($
"Error during LLM cleanup: {ex.Message}");
841 #region Static Asset Management
843 public static string GetLLMManagerAsset(
string path)
846 if (!EditorApplication.isPlaying)
return GetLLMManagerAssetEditor(path);
848 return GetLLMManagerAssetRuntime(path);
851 public static string GetLLMManagerAssetEditor(
string path)
853 if (
string.IsNullOrEmpty(path))
return path;
856 ModelEntry modelEntry = LLMManager.Get(path);
857 if (modelEntry !=
null)
return modelEntry.filename;
860 string assetPath = LLMUnitySetup.GetAssetPath(path);
861 string basePath = LLMUnitySetup.GetAssetPath();
863 if (File.Exists(assetPath) && LLMUnitySetup.IsSubPath(assetPath, basePath))
865 return LLMUnitySetup.RelativePath(assetPath, basePath);
869 if (File.Exists(assetPath))
871 string errorMessage = $
"The model {path} was loaded locally. You can include it in the build in one of these ways:";
872 errorMessage += $
"\n-Copy the model inside the StreamingAssets folder and use its StreamingAssets path";
873 errorMessage += $
"\n-Load the model with the model manager inside the LLM GameObject and use its filename";
874 LLMUnitySetup.LogWarning(errorMessage);
878 LLMUnitySetup.LogError($
"Model file not found: {path}");
884 public static string GetLLMManagerAssetRuntime(
string path)
886 if (
string.IsNullOrEmpty(path))
return path;
889 string managerPath = LLMManager.GetAssetPath(path);
890 if (!
string.IsNullOrEmpty(managerPath) && File.Exists(managerPath))
896 string assetPath = LLMUnitySetup.GetAssetPath(path);
897 if (File.Exists(assetPath))
return assetPath;
900 string downloadPath = LLMUnitySetup.GetDownloadAssetPath(path);
901 if (File.Exists(downloadPath))
return downloadPath;
Unity MonoBehaviour base class for LLM client functionality. Handles both local and remote LLM connec...
Class implementing the LLM model manager.
static void Unregister(LLM llm)
Removes a LLM from the model manager.
static ModelEntry Get(string path)
Gets the model entry for a model path.
static Task< bool > Setup()
Setup of the models.
static void Register(LLM llm)
Registers a LLM to the model manager.
Class implementing helper functions for setup and process management.
Unity MonoBehaviour component that manages a local LLM server instance. Handles model loading,...
int numGPULayers
Number of model layers to offload to GPU (0 = CPU only)
void SetLoraWeights(Dictionary< string, float > loraToWeight)
Changes the weights of multiple LORA adapters.
static async Task< bool > WaitUntilModelSetup(Action< float > downloadProgressCallback=null)
Waits asynchronously until model setup is complete.
void SetLoraWeight(string path, float weight)
Changes the weight of a specific LORA adapter.
LLMService llmService
The underlying LLM service instance.
int parallelPrompts
Number of prompts that can be processed in parallel (-1 = auto-detect from clients)
string architecture
Model architecture name (e.g., llama, mistral)
async void Awake()
Unity Awake method that initializes the LLM server. Sets up the model, starts the service,...
void SetReasoning(bool reasoning)
Enable LLM reasoning ("thinking" mode)
bool reasoning
Enable LLM reasoning ('thinking' mode)
bool advancedOptions
Show/hide advanced options in the inspector.
void RemoveLora(string path)
Removes a specific LORA adapter.
bool embeddingsOnly
True if this model only supports embeddings (no text generation)
static bool modelSetupFailed
True if model setup failed during initialization.
string lora
LORA adapter model paths (.gguf format), separated by commas.
int contextSize
Size of the prompt context in tokens (0 = use model's default context size)
void SetSSLCertFromFile(string path)
Sets the SSL certificate for secure server connections.
int numThreads
Number of threads to use for processing (-1 = use all available threads)
bool started
True if the LLM server has started and is ready to receive requests.
void SetModel(string path)
Sets the model file to use. Automatically configures context size and embedding settings.
int port
Port to use for the remote LLM server.
bool remote
Enable remote server functionality to allow external connections.
void SetLora(string path, float weight=1f)
Sets a single LORA adapter, replacing any existing ones.
void RemoveLoras()
Removes all LORA adapters.
string SSLKey
SSL key for the remote LLM server.
bool dontDestroyOnLoad
Persist this LLM GameObject across scene transitions.
void SetEmbeddings(int embeddingLength, bool embeddingsOnly)
Configure the LLM for embedding generation.
string model
LLM model file path (.gguf format)
string APIKey
API key required for server access (leave empty to disable authentication)
int embeddingLength
Number of dimensions in embedding vectors (0 if not an embedding model)
void Register(LLMClient llmClient)
Registers an LLMClient for slot management.
int batchSize
Batch size for prompt processing (larger = more memory, potentially faster)
bool flashAttention
Enable flash attention optimization (requires compatible model)
static bool modelSetupComplete
True if model setup completed (successfully or not)
void AddLora(string path, float weight=1f)
Adds a LORA adapter to the existing set.
void SetSSLKeyFromFile(string path)
Sets the SSL private key for secure server connections.
async Task WaitUntilReady()
Waits asynchronously until the LLM is ready to accept requests.
void Destroy()
Stops and cleans up the LLM service.
string loraWeights
Weights for LORA adapters, separated by commas (default: 1.0 for each)
bool failed
True if the LLM server failed to start.
List< LoraIdScalePath > ListLoras()
Gets a list of loaded LORA adapters.
string SSLCert
SSL certificate for the remote LLM server.
Class representing the LORA manager allowing to convert and retrieve LORA assets to string (for seria...
float[] GetWeights()
Gets the weights of the LORAs in the manager.
void Add(string path, float weight=1)
Adds a LORA with the defined weight.
void Remove(string path)
Removes a LORA based on its path.
void SetWeight(string path, float weight)
Modifies the weight of a LORA.
void FromStrings(string loraString, string loraWeightsString)
Converts strings with the lora paths and weights to entries in the LORA manager.
string[] GetLoras()
Gets the paths of the LORAs in the manager.
void Clear()
Clears the LORA assets.
Class implementing a LLM model entry.