LLM for Unity  v2.4.2
Create characters in Unity with LLMs!
Loading...
Searching...
No Matches
LLM.cs
Go to the documentation of this file.
1
3using System;
4using System.Collections.Generic;
5using System.IO;
6using System.Threading;
7using System.Threading.Tasks;
8using UnityEditor;
9using UnityEngine;
10
11namespace LLMUnity
12{
13 [DefaultExecutionOrder(-1)]
18 public class LLM : MonoBehaviour
19 {
21 [Tooltip("show/hide advanced options in the GameObject")]
22 [HideInInspector] public bool advancedOptions = false;
24 [Tooltip("enable remote server functionality")]
25 [LocalRemote] public bool remote = false;
27 [Tooltip("port to use for the remote LLM server")]
28 [Remote] public int port = 13333;
30 [Tooltip("number of threads to use (-1 = all)")]
31 [LLM] public int numThreads = -1;
34 [Tooltip("number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported, the LLM will fall back to the CPU")]
35 [LLM] public int numGPULayers = 0;
37 [Tooltip("log the output of the LLM in the Unity Editor.")]
38 [LLM] public bool debug = false;
40 [Tooltip("number of prompts that can happen in parallel (-1 = number of LLMCaller objects)")]
41 [LLMAdvanced] public int parallelPrompts = -1;
43 [Tooltip("do not destroy the LLM GameObject when loading a new Scene.")]
44 [LLMAdvanced] public bool dontDestroyOnLoad = true;
47 [Tooltip("Size of the prompt context (0 = context size of the model). This is the number of tokens the model can take as input when generating responses.")]
48 [DynamicRange("minContextLength", "maxContextLength", false), Model] public int contextSize = 8192;
50 [Tooltip("Batch size for prompt processing.")]
51 [ModelAdvanced] public int batchSize = 512;
53 public bool started { get; protected set; } = false;
55 public bool failed { get; protected set; } = false;
57 public static bool modelSetupFailed { get; protected set; } = false;
59 public static bool modelSetupComplete { get; protected set; } = false;
61 [Tooltip("LLM model to use (.gguf format)")]
62 [ModelAdvanced] public string model = "";
64 [Tooltip("Chat template for the model")]
65 [ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate;
67 [Tooltip("LORA models to use (.gguf format)")]
68 [ModelAdvanced] public string lora = "";
70 [Tooltip("the weights of the LORA models being used.")]
71 [ModelAdvanced] public string loraWeights = "";
73 [Tooltip("enable use of flash attention")]
74 [ModelExtras] public bool flashAttention = false;
76 [Tooltip("API key to use for the server")]
77 public string APIKey;
78
79 // SSL certificate
80 [SerializeField]
81 private string SSLCert = "";
82 public string SSLCertPath = "";
83 // SSL key
84 [SerializeField]
85 private string SSLKey = "";
86 public string SSLKeyPath = "";
87
89 public int minContextLength = 0;
90 public int maxContextLength = 0;
91
92 IntPtr LLMObject = IntPtr.Zero;
93 List<LLMCaller> clients = new List<LLMCaller>();
94 LLMLib llmlib;
95 StreamWrapper logStreamWrapper = null;
96 Thread llmThread = null;
97 List<StreamWrapper> streamWrappers = new List<StreamWrapper>();
98 public LLMManager llmManager = new LLMManager();
99 private readonly object startLock = new object();
100 static readonly object staticLock = new object();
101 public LoraManager loraManager = new LoraManager();
102 string loraPre = "";
103 string loraWeightsPre = "";
104 public bool embeddingsOnly = false;
105 public int embeddingLength = 0;
106
108
109 public LLM()
110 {
111 LLMManager.Register(this);
112 }
113
114 void OnValidate()
115 {
116 if (lora != loraPre || loraWeights != loraWeightsPre)
117 {
118 loraManager.FromStrings(lora, loraWeights);
119 (loraPre, loraWeightsPre) = (lora, loraWeights);
120 }
121 }
122
126 public async void Awake()
127 {
128 if (!enabled) return;
129#if !UNITY_EDITOR
131#endif
132 modelSetupComplete = true;
134 {
135 failed = true;
136 return;
137 }
138 string arguments = GetLlamaccpArguments();
139 if (arguments == null)
140 {
141 failed = true;
142 return;
143 }
144 await Task.Run(() => StartLLMServer(arguments));
145 if (!started) return;
146 if (dontDestroyOnLoad) DontDestroyOnLoad(transform.root.gameObject);
147 }
148
152 public async Task WaitUntilReady()
153 {
154 while (!started) await Task.Yield();
155 }
156
161 public static async Task<bool> WaitUntilModelSetup(Callback<float> downloadProgressCallback = null)
162 {
163 if (downloadProgressCallback != null) LLMManager.downloadProgressCallbacks.Add(downloadProgressCallback);
164 while (!modelSetupComplete) await Task.Yield();
165 return !modelSetupFailed;
166 }
167
169 public static string GetLLMManagerAsset(string path)
170 {
171#if UNITY_EDITOR
172 if (!EditorApplication.isPlaying) return GetLLMManagerAssetEditor(path);
173#endif
174 return GetLLMManagerAssetRuntime(path);
175 }
176
177 public static string GetLLMManagerAssetEditor(string path)
178 {
179 // empty
180 if (string.IsNullOrEmpty(path)) return path;
181 // LLMManager - return location the file will be stored in StreamingAssets
182 ModelEntry modelEntry = LLMManager.Get(path);
183 if (modelEntry != null) return modelEntry.filename;
184 // StreamingAssets - return relative location within StreamingAssets
185 string assetPath = LLMUnitySetup.GetAssetPath(path); // Note: this will return the full path if a full path is passed
186 string basePath = LLMUnitySetup.GetAssetPath();
187 if (File.Exists(assetPath))
188 {
189 if (LLMUnitySetup.IsSubPath(assetPath, basePath)) return LLMUnitySetup.RelativePath(assetPath, basePath);
190 }
191 // full path
192 if (!File.Exists(assetPath))
193 {
194 LLMUnitySetup.LogError($"Model {path} was not found.");
195 }
196 else
197 {
198 string errorMessage = $"The model {path} was loaded locally. You can include it in the build in one of these ways:";
199 errorMessage += $"\n-Copy the model inside the StreamingAssets folder and use its StreamingAssets path";
200 errorMessage += $"\n-Load the model with the model manager inside the LLM GameObject and use its filename";
201 LLMUnitySetup.LogWarning(errorMessage);
202 }
203 return path;
204 }
205
206 public static string GetLLMManagerAssetRuntime(string path)
207 {
208 // empty
209 if (string.IsNullOrEmpty(path)) return path;
210 // LLMManager
211 string managerPath = LLMManager.GetAssetPath(path);
212 if (!string.IsNullOrEmpty(managerPath) && File.Exists(managerPath)) return managerPath;
213 // StreamingAssets
214 string assetPath = LLMUnitySetup.GetAssetPath(path);
215 if (File.Exists(assetPath)) return assetPath;
216 // download path
217 assetPath = LLMUnitySetup.GetDownloadAssetPath(path);
218 if (File.Exists(assetPath)) return assetPath;
219 // give up
220 return path;
221 }
222
224
231 public void SetModel(string path)
232 {
233 model = GetLLMManagerAsset(path);
234 if (!string.IsNullOrEmpty(model))
235 {
236 ModelEntry modelEntry = LLMManager.Get(model);
237 if (modelEntry == null) modelEntry = new ModelEntry(GetLLMManagerAssetRuntime(model));
238 SetTemplate(modelEntry.chatTemplate);
239
240 maxContextLength = modelEntry.contextLength;
241 if (contextSize > maxContextLength) contextSize = maxContextLength;
242 SetEmbeddings(modelEntry.embeddingLength, modelEntry.embeddingOnly);
243 if (contextSize == 0 && modelEntry.contextLength > 32768)
244 {
245 LLMUnitySetup.LogWarning($"The model {path} has very large context size ({modelEntry.contextLength}), consider setting it to a smaller value (<=32768) to avoid filling up the RAM");
246 }
247 }
248#if UNITY_EDITOR
249 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);
250#endif
251 }
252
259 public void SetLora(string path, float weight = 1)
260 {
261 AssertNotStarted();
262 loraManager.Clear();
263 AddLora(path, weight);
264 }
265
272 public void AddLora(string path, float weight = 1)
273 {
274 AssertNotStarted();
275 loraManager.Add(path, weight);
276 UpdateLoras();
277 }
278
284 public void RemoveLora(string path)
285 {
286 AssertNotStarted();
287 loraManager.Remove(path);
288 UpdateLoras();
289 }
290
294 public void RemoveLoras()
295 {
296 AssertNotStarted();
297 loraManager.Clear();
298 UpdateLoras();
299 }
300
306 public void SetLoraWeight(string path, float weight)
307 {
308 loraManager.SetWeight(path, weight);
309 UpdateLoras();
310 if (started) ApplyLoras();
311 }
312
317 public void SetLoraWeights(Dictionary<string, float> loraToWeight)
318 {
319 foreach (KeyValuePair<string, float> entry in loraToWeight) loraManager.SetWeight(entry.Key, entry.Value);
320 UpdateLoras();
321 if (started) ApplyLoras();
322 }
323
324 public void UpdateLoras()
325 {
326 (lora, loraWeights) = loraManager.ToStrings();
327 (loraPre, loraWeightsPre) = (lora, loraWeights);
328#if UNITY_EDITOR
329 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);
330#endif
331 }
332
337 public void SetTemplate(string templateName, bool setDirty = true)
338 {
339 chatTemplate = templateName;
340 if (started) llmlib?.LLM_SetTemplate(LLMObject, chatTemplate);
341#if UNITY_EDITOR
342 if (setDirty && !EditorApplication.isPlaying) EditorUtility.SetDirty(this);
343#endif
344 }
345
351 public void SetEmbeddings(int embeddingLength, bool embeddingsOnly)
352 {
353 this.embeddingsOnly = embeddingsOnly;
354 this.embeddingLength = embeddingLength;
355#if UNITY_EDITOR
356 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);
357#endif
358 }
359
361
362 string ReadFileContents(string path)
363 {
364 if (String.IsNullOrEmpty(path)) return "";
365 else if (!File.Exists(path))
366 {
367 LLMUnitySetup.LogError($"File {path} not found!");
368 return "";
369 }
370 return File.ReadAllText(path);
371 }
372
374
379 public void SetSSLCert(string path)
380 {
381 SSLCertPath = path;
382 SSLCert = ReadFileContents(path);
383 }
384
389 public void SetSSLKey(string path)
390 {
391 SSLKeyPath = path;
392 SSLKey = ReadFileContents(path);
393 }
394
399 public string GetTemplate()
400 {
401 return chatTemplate;
402 }
403
404 protected virtual string GetLlamaccpArguments()
405 {
406 // Start the LLM server in a cross-platform way
407 if ((SSLCert != "" && SSLKey == "") || (SSLCert == "" && SSLKey != ""))
408 {
409 LLMUnitySetup.LogError($"Both SSL certificate and key need to be provided!");
410 return null;
411 }
412
413 if (model == "")
414 {
415 LLMUnitySetup.LogError("No model file provided!");
416 return null;
417 }
418 string modelPath = GetLLMManagerAssetRuntime(model);
419 if (!File.Exists(modelPath))
420 {
421 LLMUnitySetup.LogError($"File {modelPath} not found!");
422 return null;
423 }
424
425 loraManager.FromStrings(lora, loraWeights);
426 string loraArgument = "";
427 foreach (string lora in loraManager.GetLoras())
428 {
429 string loraPath = GetLLMManagerAssetRuntime(lora);
430 if (!File.Exists(loraPath))
431 {
432 LLMUnitySetup.LogError($"File {loraPath} not found!");
433 return null;
434 }
435 loraArgument += $" --lora \"{loraPath}\"";
436 }
437
438 int numThreadsToUse = numThreads;
439 if (Application.platform == RuntimePlatform.Android && numThreads <= 0) numThreadsToUse = LLMUnitySetup.AndroidGetNumBigCores();
440
441 int slots = GetNumClients();
442 string arguments = $"-m \"{modelPath}\" -c {contextSize} -b {batchSize} --log-disable -np {slots}";
443 if (embeddingsOnly) arguments += " --embedding";
444 if (numThreadsToUse > 0) arguments += $" -t {numThreadsToUse}";
445 arguments += loraArgument;
446 arguments += $" -ngl {numGPULayers}";
447 if (LLMUnitySetup.FullLlamaLib && flashAttention) arguments += $" --flash-attn";
448 if (remote)
449 {
450 arguments += $" --port {port} --host 0.0.0.0";
451 if (!String.IsNullOrEmpty(APIKey)) arguments += $" --api-key {APIKey}";
452 }
453
454 // the following is the equivalent for running from command line
455 string serverCommand;
456 if (Application.platform == RuntimePlatform.WindowsEditor || Application.platform == RuntimePlatform.WindowsPlayer) serverCommand = "undreamai_server.exe";
457 else serverCommand = "./undreamai_server";
458 serverCommand += " " + arguments;
459 serverCommand += $" --template \"{chatTemplate}\"";
460 if (remote && SSLCert != "" && SSLKey != "") serverCommand += $" --ssl-cert-file {SSLCertPath} --ssl-key-file {SSLKeyPath}";
461 LLMUnitySetup.Log($"Deploy server command: {serverCommand}");
462 return arguments;
463 }
464
465 private void SetupLogging()
466 {
467 logStreamWrapper = ConstructStreamWrapper(LLMUnitySetup.LogWarning, true);
468 llmlib?.Logging(logStreamWrapper.GetStringWrapper());
469 }
470
471 private void StopLogging()
472 {
473 if (logStreamWrapper == null) return;
474 llmlib?.StopLogging();
475 DestroyStreamWrapper(logStreamWrapper);
476 }
477
478 private void StartLLMServer(string arguments)
479 {
480 started = false;
481 failed = false;
482 bool useGPU = numGPULayers > 0;
483
484 foreach (string arch in LLMLib.PossibleArchitectures(useGPU))
485 {
486 string error;
487 try
488 {
489 InitLib(arch);
490 InitService(arguments);
491 LLMUnitySetup.Log($"Using architecture: {arch}");
492 break;
493 }
494 catch (LLMException e)
495 {
496 error = e.Message;
497 Destroy();
498 }
499 catch (DestroyException)
500 {
501 break;
502 }
503 catch (Exception e)
504 {
505 error = $"{e.GetType()}: {e.Message}";
506 }
507 LLMUnitySetup.Log($"Tried architecture: {arch}, error: " + error);
508 }
509 if (llmlib == null)
510 {
511 LLMUnitySetup.LogError("LLM service couldn't be created");
512 failed = true;
513 return;
514 }
515 CallWithLock(StartService);
516 LLMUnitySetup.Log("LLM service created");
517 }
518
519 private void InitLib(string arch)
520 {
521 llmlib = new LLMLib(arch);
522 CheckLLMStatus(false);
523 }
524
525 void CallWithLock(EmptyCallback fn)
526 {
527 lock (startLock)
528 {
529 if (llmlib == null) throw new DestroyException();
530 fn();
531 }
532 }
533
534 private void InitService(string arguments)
535 {
536 lock (staticLock)
537 {
538 if (debug) CallWithLock(SetupLogging);
539 CallWithLock(() => { LLMObject = llmlib.LLM_Construct(arguments); });
540 CallWithLock(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));
541 if (remote)
542 {
543 if (SSLCert != "" && SSLKey != "")
544 {
545 LLMUnitySetup.Log("Using SSL");
546 CallWithLock(() => llmlib.LLM_SetSSL(LLMObject, SSLCert, SSLKey));
547 }
548 CallWithLock(() => llmlib.LLM_StartServer(LLMObject));
549 }
550 CallWithLock(() => CheckLLMStatus(false));
551 }
552 }
553
554 private void StartService()
555 {
556 llmThread = new Thread(() => llmlib.LLM_Start(LLMObject));
557 llmThread.Start();
558 while (!llmlib.LLM_Started(LLMObject)) {}
559 ApplyLoras();
560 started = true;
561 }
562
569 public int Register(LLMCaller llmCaller)
570 {
571 clients.Add(llmCaller);
572 int index = clients.IndexOf(llmCaller);
573 if (parallelPrompts != -1) return index % parallelPrompts;
574 return index;
575 }
576
577 protected int GetNumClients()
578 {
579 return Math.Max(parallelPrompts == -1 ? clients.Count : parallelPrompts, 1);
580 }
581
583 public delegate void LLMStatusCallback(IntPtr LLMObject, IntPtr stringWrapper);
584 public delegate void LLMNoInputReplyCallback(IntPtr LLMObject, IntPtr stringWrapper);
585 public delegate void LLMReplyCallback(IntPtr LLMObject, string json_data, IntPtr stringWrapper);
587
588 StreamWrapper ConstructStreamWrapper(Callback<string> streamCallback = null, bool clearOnUpdate = false)
589 {
590 StreamWrapper streamWrapper = new StreamWrapper(llmlib, streamCallback, clearOnUpdate);
591 streamWrappers.Add(streamWrapper);
592 return streamWrapper;
593 }
594
595 void DestroyStreamWrapper(StreamWrapper streamWrapper)
596 {
597 streamWrappers.Remove(streamWrapper);
598 streamWrapper.Destroy();
599 }
600
603 public void Update()
604 {
605 foreach (StreamWrapper streamWrapper in streamWrappers) streamWrapper.Update();
606 }
607
608 void AssertStarted()
609 {
610 string error = null;
611 if (failed) error = "LLM service couldn't be created";
612 else if (!started) error = "LLM service not started";
613 if (error != null)
614 {
615 LLMUnitySetup.LogError(error);
616 throw new Exception(error);
617 }
618 }
619
620 void AssertNotStarted()
621 {
622 if (started)
623 {
624 string error = "This method can't be called when the LLM has started";
625 LLMUnitySetup.LogError(error);
626 throw new Exception(error);
627 }
628 }
629
630 void CheckLLMStatus(bool log = true)
631 {
632 if (llmlib == null) { return; }
633 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
634 int status = llmlib.LLM_Status(LLMObject, stringWrapper);
635 string result = llmlib.GetStringWrapperResult(stringWrapper);
636 llmlib.StringWrapper_Delete(stringWrapper);
637 string message = $"LLM {status}: {result}";
638 if (status > 0)
639 {
640 if (log) LLMUnitySetup.LogError(message);
641 throw new LLMException(message, status);
642 }
643 else if (status < 0)
644 {
645 if (log) LLMUnitySetup.LogWarning(message);
646 }
647 }
648
649 async Task<string> LLMNoInputReply(LLMNoInputReplyCallback callback)
650 {
651 AssertStarted();
652 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
653 await Task.Run(() => callback(LLMObject, stringWrapper));
654 string result = llmlib?.GetStringWrapperResult(stringWrapper);
655 llmlib?.StringWrapper_Delete(stringWrapper);
656 CheckLLMStatus();
657 return result;
658 }
659
660 async Task<string> LLMReply(LLMReplyCallback callback, string json)
661 {
662 AssertStarted();
663 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
664 await Task.Run(() => callback(LLMObject, json, stringWrapper));
665 string result = llmlib?.GetStringWrapperResult(stringWrapper);
666 llmlib?.StringWrapper_Delete(stringWrapper);
667 CheckLLMStatus();
668 return result;
669 }
670
676 public async Task<string> Tokenize(string json)
677 {
678 AssertStarted();
679 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
680 {
681 llmlib.LLM_Tokenize(LLMObject, jsonData, strWrapper);
682 };
683 return await LLMReply(callback, json);
684 }
685
691 public async Task<string> Detokenize(string json)
692 {
693 AssertStarted();
694 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
695 {
696 llmlib.LLM_Detokenize(LLMObject, jsonData, strWrapper);
697 };
698 return await LLMReply(callback, json);
699 }
700
706 public async Task<string> Embeddings(string json)
707 {
708 AssertStarted();
709 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
710 {
711 llmlib.LLM_Embeddings(LLMObject, jsonData, strWrapper);
712 };
713 return await LLMReply(callback, json);
714 }
715
720 public void ApplyLoras()
721 {
722 LoraWeightRequestList loraWeightRequest = new LoraWeightRequestList();
723 loraWeightRequest.loraWeights = new List<LoraWeightRequest>();
724 float[] weights = loraManager.GetWeights();
725 if (weights.Length == 0) return;
726 for (int i = 0; i < weights.Length; i++)
727 {
728 loraWeightRequest.loraWeights.Add(new LoraWeightRequest() { id = i, scale = weights[i] });
729 }
730
731 string json = JsonUtility.ToJson(loraWeightRequest);
732 int startIndex = json.IndexOf("[");
733 int endIndex = json.LastIndexOf("]") + 1;
734 json = json.Substring(startIndex, endIndex - startIndex);
735
736 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
737 llmlib.LLM_LoraWeight(LLMObject, json, stringWrapper);
738 llmlib.StringWrapper_Delete(stringWrapper);
739 }
740
745 public async Task<List<LoraWeightResult>> ListLoras()
746 {
747 AssertStarted();
748 LLMNoInputReplyCallback callback = (IntPtr LLMObject, IntPtr strWrapper) =>
749 {
750 llmlib.LLM_LoraList(LLMObject, strWrapper);
751 };
752 string json = await LLMNoInputReply(callback);
753 if (String.IsNullOrEmpty(json)) return null;
754 LoraWeightResultList loraRequest = JsonUtility.FromJson<LoraWeightResultList>("{\"loraWeights\": " + json + "}");
755 return loraRequest.loraWeights;
756 }
757
763 public async Task<string> Slot(string json)
764 {
765 AssertStarted();
766 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
767 {
768 llmlib.LLM_Slot(LLMObject, jsonData, strWrapper);
769 };
770 return await LLMReply(callback, json);
771 }
772
779 public async Task<string> Completion(string json, Callback<string> streamCallback = null)
780 {
781 AssertStarted();
782 if (streamCallback == null) streamCallback = (string s) => {};
783 StreamWrapper streamWrapper = ConstructStreamWrapper(streamCallback);
784 await Task.Run(() => llmlib.LLM_Completion(LLMObject, json, streamWrapper.GetStringWrapper()));
785 if (!started) return null;
786 streamWrapper.Update();
787 string result = streamWrapper.GetString();
788 DestroyStreamWrapper(streamWrapper);
789 CheckLLMStatus();
790 return result;
791 }
792
797 public void CancelRequest(int id_slot)
798 {
799 AssertStarted();
800 llmlib?.LLM_Cancel(LLMObject, id_slot);
801 CheckLLMStatus();
802 }
803
807 public void Destroy()
808 {
809 lock (staticLock)
810 lock (startLock)
811 {
812 try
813 {
814 if (llmlib != null)
815 {
816 if (LLMObject != IntPtr.Zero)
817 {
818 llmlib.LLM_Stop(LLMObject);
819 if (remote) llmlib.LLM_StopServer(LLMObject);
820 StopLogging();
821 llmThread?.Join();
822 llmlib.LLM_Delete(LLMObject);
823 LLMObject = IntPtr.Zero;
824 }
825 llmlib.Destroy();
826 llmlib = null;
827 }
828 started = false;
829 failed = false;
830 }
831 catch (Exception e)
832 {
833 LLMUnitySetup.LogError(e.Message);
834 }
835 }
836 }
837
842 public void OnDestroy()
843 {
844 Destroy();
846 }
847 }
848}
Class implementing the skeleton of a chat template.
static string DefaultTemplate
the default template used when it can't be determined ("chatml")
Class implementing calling of LLM functions (local and remote).
Definition LLMCaller.cs:17
Class implementing the LLM model manager.
static void Unregister(LLM llm)
Removes a LLM from the model manager.
static ModelEntry Get(string path)
Gets the model entry for a model path.
static Task< bool > Setup()
Setup of the models.
static void Register(LLM llm)
Registers a LLM to the model manager.
Class implementing helper functions for setup and process management.
Class implementing the LLM server.
Definition LLM.cs:19
int numGPULayers
number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported,...
Definition LLM.cs:35
void ApplyLoras()
Sets the lora scale, only works after the LLM service has started.
Definition LLM.cs:720
async Task< string > Slot(string json)
Allows to save / restore the state of a slot.
Definition LLM.cs:763
void SetLoraWeights(Dictionary< string, float > loraToWeight)
Allows to change the weights (scale) of the LORA models in the LLM.
Definition LLM.cs:317
async Task< List< LoraWeightResult > > ListLoras()
Gets a list of the lora adapters.
Definition LLM.cs:745
static async Task< bool > WaitUntilModelSetup(Callback< float > downloadProgressCallback=null)
Allows to wait until the LLM models are downloaded and ready.
Definition LLM.cs:161
string GetTemplate()
Returns the chat template of the LLM.
Definition LLM.cs:399
void SetLoraWeight(string path, float weight)
Allows to change the weight (scale) of a LORA model in the LLM.
Definition LLM.cs:306
void CancelRequest(int id_slot)
Allows to cancel the requests in a specific slot of the LLM.
Definition LLM.cs:797
int parallelPrompts
number of prompts that can happen in parallel (-1 = number of LLMCaller objects)
Definition LLM.cs:41
bool debug
log the output of the LLM in the Unity Editor.
Definition LLM.cs:38
async void Awake()
The Unity Awake function that starts the LLM server.
Definition LLM.cs:126
async Task< string > Detokenize(string json)
Detokenises the provided query.
Definition LLM.cs:691
void OnDestroy()
The Unity OnDestroy function called when the onbject is destroyed. The function StopProcess is called...
Definition LLM.cs:842
void SetLora(string path, float weight=1)
Allows to set a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAss...
Definition LLM.cs:259
void AddLora(string path, float weight=1)
Allows to add a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAss...
Definition LLM.cs:272
bool advancedOptions
show/hide advanced options in the GameObject
Definition LLM.cs:22
void RemoveLora(string path)
Allows to remove a LORA model from the LLM. Models supported are in .gguf format.
Definition LLM.cs:284
static bool modelSetupFailed
Boolean set to true if the models were not downloaded successfully.
Definition LLM.cs:57
string lora
LORA models to use (.gguf format)
Definition LLM.cs:68
int contextSize
Size of the prompt context (0 = context size of the model). This is the number of tokens the model ca...
Definition LLM.cs:48
int numThreads
number of threads to use (-1 = all)
Definition LLM.cs:31
bool started
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
Definition LLM.cs:53
void SetModel(string path)
Allows to set the model used by the LLM. The model provided is copied to the Assets/StreamingAssets f...
Definition LLM.cs:231
int port
port to use for the remote LLM server
Definition LLM.cs:28
bool remote
enable remote server functionality
Definition LLM.cs:25
void SetSSLCert(string path)
Use a SSL certificate for the LLM server.
Definition LLM.cs:379
void RemoveLoras()
Allows to remove all LORA models from the LLM.
Definition LLM.cs:294
bool dontDestroyOnLoad
do not destroy the LLM GameObject when loading a new Scene.
Definition LLM.cs:44
void SetEmbeddings(int embeddingLength, bool embeddingsOnly)
Set LLM Embedding parameters.
Definition LLM.cs:351
string model
LLM model to use (.gguf format)
Definition LLM.cs:62
string APIKey
API key to use for the server.
Definition LLM.cs:77
async Task< string > Tokenize(string json)
Tokenises the provided query.
Definition LLM.cs:676
int batchSize
Batch size for prompt processing.
Definition LLM.cs:51
void SetSSLKey(string path)
Use a SSL key for the LLM server.
Definition LLM.cs:389
string chatTemplate
Chat template for the model.
Definition LLM.cs:65
bool flashAttention
enable use of flash attention
Definition LLM.cs:74
async Task< string > Completion(string json, Callback< string > streamCallback=null)
Allows to use the chat and completion functionality of the LLM.
Definition LLM.cs:779
static bool modelSetupComplete
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
Definition LLM.cs:59
async Task WaitUntilReady()
Allows to wait until the LLM is ready.
Definition LLM.cs:152
void SetTemplate(string templateName, bool setDirty=true)
Set the chat template for the LLM.
Definition LLM.cs:337
void Destroy()
Stops and destroys the LLM.
Definition LLM.cs:807
void Update()
The Unity Update function. It is used to retrieve the LLM replies.
Definition LLM.cs:603
string loraWeights
the weights of the LORA models being used.
Definition LLM.cs:71
int Register(LLMCaller llmCaller)
Registers a local LLMCaller object. This allows to bind the LLMCaller "client" to a specific slot of ...
Definition LLM.cs:569
bool failed
Boolean set to true if the server has failed to start.
Definition LLM.cs:55
async Task< string > Embeddings(string json)
Computes the embeddings of the provided query.
Definition LLM.cs:706
Class representing the LORA manager allowing to convert and retrieve LORA assets to string (for seria...
Definition LLMUtils.cs:56
float[] GetWeights()
Gets the weights of the LORAs in the manager.
Definition LLMUtils.cs:185
void Add(string path, float weight=1)
Adds a LORA with the defined weight.
Definition LLMUtils.cs:99
void Remove(string path)
Removes a LORA based on its path.
Definition LLMUtils.cs:109
void SetWeight(string path, float weight)
Modifies the weight of a LORA.
Definition LLMUtils.cs:120
void FromStrings(string loraString, string loraWeightsString)
Converts strings with the lora paths and weights to entries in the LORA manager.
Definition LLMUtils.cs:136
string[] GetLoras()
Gets the paths of the LORAs in the manager.
Definition LLMUtils.cs:196
void Clear()
Clears the LORA assets.
Definition LLMUtils.cs:63
Class implementing a LLM model entry.
Definition LLMManager.cs:18