LLM for Unity  v2.5.0
Create characters in Unity with LLMs!
Loading...
Searching...
No Matches
LLM.cs
Go to the documentation of this file.
1
3using System;
4using System.Collections.Generic;
5using System.IO;
6using System.Threading;
7using System.Threading.Tasks;
8using UnityEditor;
9using UnityEngine;
10
11namespace LLMUnity
12{
13 [DefaultExecutionOrder(-1)]
18 public class LLM : MonoBehaviour
19 {
21 [Tooltip("show/hide advanced options in the GameObject")]
22 [HideInInspector] public bool advancedOptions = false;
24 [Tooltip("enable remote server functionality")]
25 [LocalRemote] public bool remote = false;
27 [Tooltip("port to use for the remote LLM server")]
28 [Remote] public int port = 13333;
30 [Tooltip("number of threads to use (-1 = all)")]
31 [LLM] public int numThreads = -1;
34 [Tooltip("number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported, the LLM will fall back to the CPU")]
35 [LLM] public int numGPULayers = 0;
37 [Tooltip("log the output of the LLM in the Unity Editor.")]
38 [LLM] public bool debug = false;
40 [Tooltip("number of prompts that can happen in parallel (-1 = number of LLMCaller objects)")]
41 [LLMAdvanced] public int parallelPrompts = -1;
43 [Tooltip("do not destroy the LLM GameObject when loading a new Scene.")]
44 [LLMAdvanced] public bool dontDestroyOnLoad = true;
47 [Tooltip("Size of the prompt context (0 = context size of the model). This is the number of tokens the model can take as input when generating responses.")]
48 [DynamicRange("minContextLength", "maxContextLength", false), Model] public int contextSize = 8192;
50 [Tooltip("Batch size for prompt processing.")]
51 [ModelAdvanced] public int batchSize = 512;
53 public bool started { get; protected set; } = false;
55 public bool failed { get; protected set; } = false;
57 public static bool modelSetupFailed { get; protected set; } = false;
59 public static bool modelSetupComplete { get; protected set; } = false;
61 [Tooltip("LLM model to use (.gguf format)")]
62 [ModelAdvanced] public string model = "";
64 [Tooltip("Chat template for the model")]
65 [ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate;
67 [Tooltip("LORA models to use (.gguf format)")]
68 [ModelAdvanced] public string lora = "";
70 [Tooltip("the weights of the LORA models being used.")]
71 [ModelAdvanced] public string loraWeights = "";
73 [Tooltip("enable use of flash attention")]
74 [ModelExtras] public bool flashAttention = false;
76 [Tooltip("API key to use for the server")]
77 public string APIKey;
78
79 // SSL certificate
80 [SerializeField]
81 private string SSLCert = "";
82 public string SSLCertPath = "";
83 // SSL key
84 [SerializeField]
85 private string SSLKey = "";
86 public string SSLKeyPath = "";
87
89 public int minContextLength = 0;
90 public int maxContextLength = 0;
91 public string architecture => llmlib.architecture;
92
93 IntPtr LLMObject = IntPtr.Zero;
94 List<LLMCaller> clients = new List<LLMCaller>();
95 LLMLib llmlib;
96 StreamWrapper logStreamWrapper = null;
97 Thread llmThread = null;
98 List<StreamWrapper> streamWrappers = new List<StreamWrapper>();
99 public LLMManager llmManager = new LLMManager();
100 private readonly object startLock = new object();
101 static readonly object staticLock = new object();
102 public LoraManager loraManager = new LoraManager();
103 string loraPre = "";
104 string loraWeightsPre = "";
105 public bool embeddingsOnly = false;
106 public int embeddingLength = 0;
107
109
110 public LLM()
111 {
112 LLMManager.Register(this);
113 }
114
115 void OnValidate()
116 {
117 if (lora != loraPre || loraWeights != loraWeightsPre)
118 {
119 loraManager.FromStrings(lora, loraWeights);
120 (loraPre, loraWeightsPre) = (lora, loraWeights);
121 }
122 }
123
127 public async void Awake()
128 {
129 if (!enabled) return;
130#if !UNITY_EDITOR
132#endif
133 modelSetupComplete = true;
135 {
136 failed = true;
137 return;
138 }
139 string arguments = GetLlamaccpArguments();
140 if (arguments == null)
141 {
142 failed = true;
143 return;
144 }
145 await Task.Run(() => StartLLMServer(arguments));
146 if (!started) return;
147 if (dontDestroyOnLoad) DontDestroyOnLoad(transform.root.gameObject);
148 }
149
153 public async Task WaitUntilReady()
154 {
155 while (!started) await Task.Yield();
156 }
157
162 public static async Task<bool> WaitUntilModelSetup(Callback<float> downloadProgressCallback = null)
163 {
164 if (downloadProgressCallback != null) LLMManager.downloadProgressCallbacks.Add(downloadProgressCallback);
165 while (!modelSetupComplete) await Task.Yield();
166 return !modelSetupFailed;
167 }
168
170 public static string GetLLMManagerAsset(string path)
171 {
172#if UNITY_EDITOR
173 if (!EditorApplication.isPlaying) return GetLLMManagerAssetEditor(path);
174#endif
175 return GetLLMManagerAssetRuntime(path);
176 }
177
178 public static string GetLLMManagerAssetEditor(string path)
179 {
180 // empty
181 if (string.IsNullOrEmpty(path)) return path;
182 // LLMManager - return location the file will be stored in StreamingAssets
183 ModelEntry modelEntry = LLMManager.Get(path);
184 if (modelEntry != null) return modelEntry.filename;
185 // StreamingAssets - return relative location within StreamingAssets
186 string assetPath = LLMUnitySetup.GetAssetPath(path); // Note: this will return the full path if a full path is passed
187 string basePath = LLMUnitySetup.GetAssetPath();
188 if (File.Exists(assetPath))
189 {
190 if (LLMUnitySetup.IsSubPath(assetPath, basePath)) return LLMUnitySetup.RelativePath(assetPath, basePath);
191 }
192 // full path
193 if (!File.Exists(assetPath))
194 {
195 LLMUnitySetup.LogError($"Model {path} was not found.");
196 }
197 else
198 {
199 string errorMessage = $"The model {path} was loaded locally. You can include it in the build in one of these ways:";
200 errorMessage += $"\n-Copy the model inside the StreamingAssets folder and use its StreamingAssets path";
201 errorMessage += $"\n-Load the model with the model manager inside the LLM GameObject and use its filename";
202 LLMUnitySetup.LogWarning(errorMessage);
203 }
204 return path;
205 }
206
207 public static string GetLLMManagerAssetRuntime(string path)
208 {
209 // empty
210 if (string.IsNullOrEmpty(path)) return path;
211 // LLMManager
212 string managerPath = LLMManager.GetAssetPath(path);
213 if (!string.IsNullOrEmpty(managerPath) && File.Exists(managerPath)) return managerPath;
214 // StreamingAssets
215 string assetPath = LLMUnitySetup.GetAssetPath(path);
216 if (File.Exists(assetPath)) return assetPath;
217 // download path
218 assetPath = LLMUnitySetup.GetDownloadAssetPath(path);
219 if (File.Exists(assetPath)) return assetPath;
220 // give up
221 return path;
222 }
223
225
232 public void SetModel(string path)
233 {
234 model = GetLLMManagerAsset(path);
235 if (!string.IsNullOrEmpty(model))
236 {
237 ModelEntry modelEntry = LLMManager.Get(model);
238 if (modelEntry == null) modelEntry = new ModelEntry(GetLLMManagerAssetRuntime(model));
239 SetTemplate(modelEntry.chatTemplate);
240
241 maxContextLength = modelEntry.contextLength;
242 if (contextSize > maxContextLength) contextSize = maxContextLength;
243 SetEmbeddings(modelEntry.embeddingLength, modelEntry.embeddingOnly);
244 if (contextSize == 0 && modelEntry.contextLength > 32768)
245 {
246 LLMUnitySetup.LogWarning($"The model {path} has very large context size ({modelEntry.contextLength}), consider setting it to a smaller value (<=32768) to avoid filling up the RAM");
247 }
248 }
249#if UNITY_EDITOR
250 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);
251#endif
252 }
253
260 public void SetLora(string path, float weight = 1)
261 {
262 AssertNotStarted();
263 loraManager.Clear();
264 AddLora(path, weight);
265 }
266
273 public void AddLora(string path, float weight = 1)
274 {
275 AssertNotStarted();
276 loraManager.Add(path, weight);
277 UpdateLoras();
278 }
279
285 public void RemoveLora(string path)
286 {
287 AssertNotStarted();
288 loraManager.Remove(path);
289 UpdateLoras();
290 }
291
295 public void RemoveLoras()
296 {
297 AssertNotStarted();
298 loraManager.Clear();
299 UpdateLoras();
300 }
301
307 public void SetLoraWeight(string path, float weight)
308 {
309 loraManager.SetWeight(path, weight);
310 UpdateLoras();
311 if (started) ApplyLoras();
312 }
313
318 public void SetLoraWeights(Dictionary<string, float> loraToWeight)
319 {
320 foreach (KeyValuePair<string, float> entry in loraToWeight) loraManager.SetWeight(entry.Key, entry.Value);
321 UpdateLoras();
322 if (started) ApplyLoras();
323 }
324
325 public void UpdateLoras()
326 {
327 (lora, loraWeights) = loraManager.ToStrings();
328 (loraPre, loraWeightsPre) = (lora, loraWeights);
329#if UNITY_EDITOR
330 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);
331#endif
332 }
333
338 public void SetTemplate(string templateName, bool setDirty = true)
339 {
340 chatTemplate = templateName;
341 if (started) llmlib?.LLM_SetTemplate(LLMObject, chatTemplate);
342#if UNITY_EDITOR
343 if (setDirty && !EditorApplication.isPlaying) EditorUtility.SetDirty(this);
344#endif
345 }
346
352 public void SetEmbeddings(int embeddingLength, bool embeddingsOnly)
353 {
354 this.embeddingsOnly = embeddingsOnly;
355 this.embeddingLength = embeddingLength;
356#if UNITY_EDITOR
357 if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);
358#endif
359 }
360
362
363 string ReadFileContents(string path)
364 {
365 if (String.IsNullOrEmpty(path)) return "";
366 else if (!File.Exists(path))
367 {
368 LLMUnitySetup.LogError($"File {path} not found!");
369 return "";
370 }
371 return File.ReadAllText(path);
372 }
373
375
380 public void SetSSLCert(string path)
381 {
382 SSLCertPath = path;
383 SSLCert = ReadFileContents(path);
384 }
385
390 public void SetSSLKey(string path)
391 {
392 SSLKeyPath = path;
393 SSLKey = ReadFileContents(path);
394 }
395
400 public string GetTemplate()
401 {
402 return chatTemplate;
403 }
404
405 protected virtual string GetLlamaccpArguments()
406 {
407 // Start the LLM server in a cross-platform way
408 if ((SSLCert != "" && SSLKey == "") || (SSLCert == "" && SSLKey != ""))
409 {
410 LLMUnitySetup.LogError($"Both SSL certificate and key need to be provided!");
411 return null;
412 }
413
414 if (model == "")
415 {
416 LLMUnitySetup.LogError("No model file provided!");
417 return null;
418 }
419 string modelPath = GetLLMManagerAssetRuntime(model);
420 if (!File.Exists(modelPath))
421 {
422 LLMUnitySetup.LogError($"File {modelPath} not found!");
423 return null;
424 }
425
426 loraManager.FromStrings(lora, loraWeights);
427 string loraArgument = "";
428 foreach (string lora in loraManager.GetLoras())
429 {
430 string loraPath = GetLLMManagerAssetRuntime(lora);
431 if (!File.Exists(loraPath))
432 {
433 LLMUnitySetup.LogError($"File {loraPath} not found!");
434 return null;
435 }
436 loraArgument += $" --lora \"{loraPath}\"";
437 }
438
439 int numThreadsToUse = numThreads;
440 if (Application.platform == RuntimePlatform.Android && numThreads <= 0) numThreadsToUse = LLMUnitySetup.AndroidGetNumBigCores();
441
442 int slots = GetNumClients();
443 string arguments = $"-m \"{modelPath}\" -c {contextSize} -b {batchSize} --log-disable -np {slots}";
444 if (embeddingsOnly) arguments += " --embedding";
445 if (numThreadsToUse > 0) arguments += $" -t {numThreadsToUse}";
446 arguments += loraArgument;
447 if (numGPULayers > 0) arguments += $" -ngl {numGPULayers}";
448 if (LLMUnitySetup.FullLlamaLib && flashAttention) arguments += $" --flash-attn";
449 if (remote)
450 {
451 arguments += $" --port {port} --host 0.0.0.0";
452 if (!String.IsNullOrEmpty(APIKey)) arguments += $" --api-key {APIKey}";
453 }
454
455 // the following is the equivalent for running from command line
456 string serverCommand;
457 if (Application.platform == RuntimePlatform.WindowsEditor || Application.platform == RuntimePlatform.WindowsPlayer) serverCommand = "undreamai_server.exe";
458 else serverCommand = "./undreamai_server";
459 serverCommand += " " + arguments;
460 serverCommand += $" --template \"{chatTemplate}\"";
461 if (remote && SSLCert != "" && SSLKey != "") serverCommand += $" --ssl-cert-file {SSLCertPath} --ssl-key-file {SSLKeyPath}";
462 LLMUnitySetup.Log($"Deploy server command: {serverCommand}");
463 return arguments;
464 }
465
466 private void SetupLogging()
467 {
468 logStreamWrapper = ConstructStreamWrapper(LLMUnitySetup.LogWarning, true);
469 llmlib?.Logging(logStreamWrapper.GetStringWrapper());
470 }
471
472 private void StopLogging()
473 {
474 if (logStreamWrapper == null) return;
475 llmlib?.StopLogging();
476 DestroyStreamWrapper(logStreamWrapper);
477 }
478
479 private void StartLLMServer(string arguments)
480 {
481 started = false;
482 failed = false;
483 bool useGPU = numGPULayers > 0;
484
485 foreach (string arch in LLMLib.PossibleArchitectures(useGPU))
486 {
487 string error;
488 try
489 {
490 InitLib(arch);
491 InitService(arguments);
492 LLMUnitySetup.Log($"Using architecture: {arch}");
493 break;
494 }
495 catch (LLMException e)
496 {
497 error = e.Message;
498 Destroy();
499 }
500 catch (DestroyException)
501 {
502 break;
503 }
504 catch (Exception e)
505 {
506 error = $"{e.GetType()}: {e.Message}";
507 }
508 LLMUnitySetup.Log($"Tried architecture: {arch}, error: " + error);
509 }
510 if (llmlib == null)
511 {
512 LLMUnitySetup.LogError("LLM service couldn't be created");
513 failed = true;
514 return;
515 }
516 CallWithLock(StartService);
517 LLMUnitySetup.Log("LLM service created");
518 }
519
520 private void InitLib(string arch)
521 {
522 llmlib = new LLMLib(arch);
523 CheckLLMStatus(false);
524 }
525
526 void CallWithLock(EmptyCallback fn)
527 {
528 lock (startLock)
529 {
530 if (llmlib == null) throw new DestroyException();
531 fn();
532 }
533 }
534
535 private void InitService(string arguments)
536 {
537 lock (staticLock)
538 {
539 if (debug) CallWithLock(SetupLogging);
540 CallWithLock(() => { LLMObject = llmlib.LLM_Construct(arguments); });
541 CallWithLock(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));
542 if (remote)
543 {
544 if (SSLCert != "" && SSLKey != "")
545 {
546 LLMUnitySetup.Log("Using SSL");
547 CallWithLock(() => llmlib.LLM_SetSSL(LLMObject, SSLCert, SSLKey));
548 }
549 CallWithLock(() => llmlib.LLM_StartServer(LLMObject));
550 }
551 CallWithLock(() => CheckLLMStatus(false));
552 }
553 }
554
555 private void StartService()
556 {
557 llmThread = new Thread(() => llmlib.LLM_Start(LLMObject));
558 llmThread.Start();
559 while (!llmlib.LLM_Started(LLMObject)) {}
560 ApplyLoras();
561 started = true;
562 }
563
570 public int Register(LLMCaller llmCaller)
571 {
572 clients.Add(llmCaller);
573 int index = clients.IndexOf(llmCaller);
574 if (parallelPrompts != -1) return index % parallelPrompts;
575 return index;
576 }
577
578 protected int GetNumClients()
579 {
580 return Math.Max(parallelPrompts == -1 ? clients.Count : parallelPrompts, 1);
581 }
582
584 public delegate void LLMStatusCallback(IntPtr LLMObject, IntPtr stringWrapper);
585 public delegate void LLMNoInputReplyCallback(IntPtr LLMObject, IntPtr stringWrapper);
586 public delegate void LLMReplyCallback(IntPtr LLMObject, string json_data, IntPtr stringWrapper);
588
589 StreamWrapper ConstructStreamWrapper(Callback<string> streamCallback = null, bool clearOnUpdate = false)
590 {
591 StreamWrapper streamWrapper = new StreamWrapper(llmlib, streamCallback, clearOnUpdate);
592 streamWrappers.Add(streamWrapper);
593 return streamWrapper;
594 }
595
596 void DestroyStreamWrapper(StreamWrapper streamWrapper)
597 {
598 streamWrappers.Remove(streamWrapper);
599 streamWrapper.Destroy();
600 }
601
604 public void Update()
605 {
606 foreach (StreamWrapper streamWrapper in streamWrappers) streamWrapper.Update();
607 }
608
609 void AssertStarted()
610 {
611 string error = null;
612 if (failed) error = "LLM service couldn't be created";
613 else if (!started) error = "LLM service not started";
614 if (error != null)
615 {
616 LLMUnitySetup.LogError(error);
617 throw new Exception(error);
618 }
619 }
620
621 void AssertNotStarted()
622 {
623 if (started)
624 {
625 string error = "This method can't be called when the LLM has started";
626 LLMUnitySetup.LogError(error);
627 throw new Exception(error);
628 }
629 }
630
631 void CheckLLMStatus(bool log = true)
632 {
633 if (llmlib == null) { return; }
634 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
635 int status = llmlib.LLM_Status(LLMObject, stringWrapper);
636 string result = llmlib.GetStringWrapperResult(stringWrapper);
637 llmlib.StringWrapper_Delete(stringWrapper);
638 string message = $"LLM {status}: {result}";
639 if (status > 0)
640 {
641 if (log) LLMUnitySetup.LogError(message);
642 throw new LLMException(message, status);
643 }
644 else if (status < 0)
645 {
646 if (log) LLMUnitySetup.LogWarning(message);
647 }
648 }
649
650 async Task<string> LLMNoInputReply(LLMNoInputReplyCallback callback)
651 {
652 AssertStarted();
653 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
654 await Task.Run(() => callback(LLMObject, stringWrapper));
655 string result = llmlib?.GetStringWrapperResult(stringWrapper);
656 llmlib?.StringWrapper_Delete(stringWrapper);
657 CheckLLMStatus();
658 return result;
659 }
660
661 async Task<string> LLMReply(LLMReplyCallback callback, string json)
662 {
663 AssertStarted();
664 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
665 await Task.Run(() => callback(LLMObject, json, stringWrapper));
666 string result = llmlib?.GetStringWrapperResult(stringWrapper);
667 llmlib?.StringWrapper_Delete(stringWrapper);
668 CheckLLMStatus();
669 return result;
670 }
671
677 public async Task<string> Tokenize(string json)
678 {
679 AssertStarted();
680 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
681 {
682 llmlib.LLM_Tokenize(LLMObject, jsonData, strWrapper);
683 };
684 return await LLMReply(callback, json);
685 }
686
692 public async Task<string> Detokenize(string json)
693 {
694 AssertStarted();
695 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
696 {
697 llmlib.LLM_Detokenize(LLMObject, jsonData, strWrapper);
698 };
699 return await LLMReply(callback, json);
700 }
701
707 public async Task<string> Embeddings(string json)
708 {
709 AssertStarted();
710 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
711 {
712 llmlib.LLM_Embeddings(LLMObject, jsonData, strWrapper);
713 };
714 return await LLMReply(callback, json);
715 }
716
721 public void ApplyLoras()
722 {
723 LoraWeightRequestList loraWeightRequest = new LoraWeightRequestList();
724 loraWeightRequest.loraWeights = new List<LoraWeightRequest>();
725 float[] weights = loraManager.GetWeights();
726 if (weights.Length == 0) return;
727 for (int i = 0; i < weights.Length; i++)
728 {
729 loraWeightRequest.loraWeights.Add(new LoraWeightRequest() { id = i, scale = weights[i] });
730 }
731
732 string json = JsonUtility.ToJson(loraWeightRequest);
733 int startIndex = json.IndexOf("[");
734 int endIndex = json.LastIndexOf("]") + 1;
735 json = json.Substring(startIndex, endIndex - startIndex);
736
737 IntPtr stringWrapper = llmlib.StringWrapper_Construct();
738 llmlib.LLM_LoraWeight(LLMObject, json, stringWrapper);
739 llmlib.StringWrapper_Delete(stringWrapper);
740 }
741
746 public async Task<List<LoraWeightResult>> ListLoras()
747 {
748 AssertStarted();
749 LLMNoInputReplyCallback callback = (IntPtr LLMObject, IntPtr strWrapper) =>
750 {
751 llmlib.LLM_LoraList(LLMObject, strWrapper);
752 };
753 string json = await LLMNoInputReply(callback);
754 if (String.IsNullOrEmpty(json)) return null;
755 LoraWeightResultList loraRequest = JsonUtility.FromJson<LoraWeightResultList>("{\"loraWeights\": " + json + "}");
756 return loraRequest.loraWeights;
757 }
758
764 public async Task<string> Slot(string json)
765 {
766 AssertStarted();
767 LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>
768 {
769 llmlib.LLM_Slot(LLMObject, jsonData, strWrapper);
770 };
771 return await LLMReply(callback, json);
772 }
773
780 public async Task<string> Completion(string json, Callback<string> streamCallback = null)
781 {
782 AssertStarted();
783 if (streamCallback == null) streamCallback = (string s) => {};
784 StreamWrapper streamWrapper = ConstructStreamWrapper(streamCallback);
785 await Task.Run(() => llmlib.LLM_Completion(LLMObject, json, streamWrapper.GetStringWrapper()));
786 if (!started) return null;
787 streamWrapper.Update();
788 string result = streamWrapper.GetString();
789 DestroyStreamWrapper(streamWrapper);
790 CheckLLMStatus();
791 return result;
792 }
793
798 public void CancelRequest(int id_slot)
799 {
800 AssertStarted();
801 llmlib?.LLM_Cancel(LLMObject, id_slot);
802 CheckLLMStatus();
803 }
804
808 public void Destroy()
809 {
810 lock (staticLock)
811 lock (startLock)
812 {
813 try
814 {
815 if (llmlib != null)
816 {
817 if (LLMObject != IntPtr.Zero)
818 {
819 llmlib.LLM_Stop(LLMObject);
820 if (remote) llmlib.LLM_StopServer(LLMObject);
821 StopLogging();
822 llmThread?.Join();
823 llmlib.LLM_Delete(LLMObject);
824 LLMObject = IntPtr.Zero;
825 }
826 llmlib.Destroy();
827 llmlib = null;
828 }
829 started = false;
830 failed = false;
831 }
832 catch (Exception e)
833 {
834 LLMUnitySetup.LogError(e.Message);
835 }
836 }
837 }
838
843 public void OnDestroy()
844 {
845 Destroy();
847 }
848 }
849}
Class implementing the skeleton of a chat template.
static string DefaultTemplate
the default template used when it can't be determined ("chatml")
Class implementing calling of LLM functions (local and remote).
Definition LLMCaller.cs:17
Class implementing the LLM model manager.
static void Unregister(LLM llm)
Removes a LLM from the model manager.
static ModelEntry Get(string path)
Gets the model entry for a model path.
static Task< bool > Setup()
Setup of the models.
static void Register(LLM llm)
Registers a LLM to the model manager.
Class implementing helper functions for setup and process management.
Class implementing the LLM server.
Definition LLM.cs:19
int numGPULayers
number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported,...
Definition LLM.cs:35
void ApplyLoras()
Sets the lora scale, only works after the LLM service has started.
Definition LLM.cs:721
async Task< string > Slot(string json)
Allows to save / restore the state of a slot.
Definition LLM.cs:764
void SetLoraWeights(Dictionary< string, float > loraToWeight)
Allows to change the weights (scale) of the LORA models in the LLM.
Definition LLM.cs:318
async Task< List< LoraWeightResult > > ListLoras()
Gets a list of the lora adapters.
Definition LLM.cs:746
static async Task< bool > WaitUntilModelSetup(Callback< float > downloadProgressCallback=null)
Allows to wait until the LLM models are downloaded and ready.
Definition LLM.cs:162
string GetTemplate()
Returns the chat template of the LLM.
Definition LLM.cs:400
void SetLoraWeight(string path, float weight)
Allows to change the weight (scale) of a LORA model in the LLM.
Definition LLM.cs:307
void CancelRequest(int id_slot)
Allows to cancel the requests in a specific slot of the LLM.
Definition LLM.cs:798
int parallelPrompts
number of prompts that can happen in parallel (-1 = number of LLMCaller objects)
Definition LLM.cs:41
bool debug
log the output of the LLM in the Unity Editor.
Definition LLM.cs:38
async void Awake()
The Unity Awake function that starts the LLM server.
Definition LLM.cs:127
async Task< string > Detokenize(string json)
Detokenises the provided query.
Definition LLM.cs:692
void OnDestroy()
The Unity OnDestroy function called when the onbject is destroyed. The function StopProcess is called...
Definition LLM.cs:843
void SetLora(string path, float weight=1)
Allows to set a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAss...
Definition LLM.cs:260
void AddLora(string path, float weight=1)
Allows to add a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAss...
Definition LLM.cs:273
bool advancedOptions
show/hide advanced options in the GameObject
Definition LLM.cs:22
void RemoveLora(string path)
Allows to remove a LORA model from the LLM. Models supported are in .gguf format.
Definition LLM.cs:285
static bool modelSetupFailed
Boolean set to true if the models were not downloaded successfully.
Definition LLM.cs:57
string lora
LORA models to use (.gguf format)
Definition LLM.cs:68
int contextSize
Size of the prompt context (0 = context size of the model). This is the number of tokens the model ca...
Definition LLM.cs:48
int numThreads
number of threads to use (-1 = all)
Definition LLM.cs:31
bool started
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
Definition LLM.cs:53
void SetModel(string path)
Allows to set the model used by the LLM. The model provided is copied to the Assets/StreamingAssets f...
Definition LLM.cs:232
int port
port to use for the remote LLM server
Definition LLM.cs:28
bool remote
enable remote server functionality
Definition LLM.cs:25
void SetSSLCert(string path)
Use a SSL certificate for the LLM server.
Definition LLM.cs:380
void RemoveLoras()
Allows to remove all LORA models from the LLM.
Definition LLM.cs:295
bool dontDestroyOnLoad
do not destroy the LLM GameObject when loading a new Scene.
Definition LLM.cs:44
void SetEmbeddings(int embeddingLength, bool embeddingsOnly)
Set LLM Embedding parameters.
Definition LLM.cs:352
string model
LLM model to use (.gguf format)
Definition LLM.cs:62
string APIKey
API key to use for the server.
Definition LLM.cs:77
async Task< string > Tokenize(string json)
Tokenises the provided query.
Definition LLM.cs:677
int batchSize
Batch size for prompt processing.
Definition LLM.cs:51
void SetSSLKey(string path)
Use a SSL key for the LLM server.
Definition LLM.cs:390
string chatTemplate
Chat template for the model.
Definition LLM.cs:65
bool flashAttention
enable use of flash attention
Definition LLM.cs:74
async Task< string > Completion(string json, Callback< string > streamCallback=null)
Allows to use the chat and completion functionality of the LLM.
Definition LLM.cs:780
static bool modelSetupComplete
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
Definition LLM.cs:59
async Task WaitUntilReady()
Allows to wait until the LLM is ready.
Definition LLM.cs:153
void SetTemplate(string templateName, bool setDirty=true)
Set the chat template for the LLM.
Definition LLM.cs:338
void Destroy()
Stops and destroys the LLM.
Definition LLM.cs:808
void Update()
The Unity Update function. It is used to retrieve the LLM replies.
Definition LLM.cs:604
string loraWeights
the weights of the LORA models being used.
Definition LLM.cs:71
int Register(LLMCaller llmCaller)
Registers a local LLMCaller object. This allows to bind the LLMCaller "client" to a specific slot of ...
Definition LLM.cs:570
bool failed
Boolean set to true if the server has failed to start.
Definition LLM.cs:55
async Task< string > Embeddings(string json)
Computes the embeddings of the provided query.
Definition LLM.cs:707
Class representing the LORA manager allowing to convert and retrieve LORA assets to string (for seria...
Definition LLMUtils.cs:56
float[] GetWeights()
Gets the weights of the LORAs in the manager.
Definition LLMUtils.cs:185
void Add(string path, float weight=1)
Adds a LORA with the defined weight.
Definition LLMUtils.cs:99
void Remove(string path)
Removes a LORA based on its path.
Definition LLMUtils.cs:109
void SetWeight(string path, float weight)
Modifies the weight of a LORA.
Definition LLMUtils.cs:120
void FromStrings(string loraString, string loraWeightsString)
Converts strings with the lora paths and weights to entries in the LORA manager.
Definition LLMUtils.cs:136
string[] GetLoras()
Gets the paths of the LORAs in the manager.
Definition LLMUtils.cs:196
void Clear()
Clears the LORA assets.
Definition LLMUtils.cs:63
Class implementing a LLM model entry.
Definition LLMManager.cs:18