LLMUnity/LLM_8cs_source.html

using System;

using System.Collections.Generic;

using System.IO;

using System.Threading;

using System.Threading.Tasks;

using UnityEditor;

using UnityEngine;


namespace LLMUnity

{

    [DefaultExecutionOrder(-1)]


    public class LLM : MonoBehaviour

    {

        [Tooltip("show/hide advanced options in the GameObject")]

        [HideInInspector] public bool advancedOptions = false;

        [Tooltip("enable remote server functionality")]

        [LocalRemote] public bool remote = false;

        [Tooltip("port to use for the remote LLM server")]

        [Remote] public int port = 13333;

        [Tooltip("number of threads to use (-1 = all)")]

        [LLM] public int numThreads = -1;

        [Tooltip("number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported, the LLM will fall back to the CPU")]

        [LLM] public int numGPULayers = 0;

        [Tooltip("log the output of the LLM in the Unity Editor.")]

        [LLM] public bool debug = false;

        [Tooltip("number of prompts that can happen in parallel (-1 = number of LLMCaller objects)")]

        [LLMAdvanced] public int parallelPrompts = -1;

        [Tooltip("do not destroy the LLM GameObject when loading a new Scene.")]

        [LLMAdvanced] public bool dontDestroyOnLoad = true;

        [Tooltip("Size of the prompt context (0 = context size of the model). This is the number of tokens the model can take as input when generating responses.")]

        [DynamicRange("minContextLength", "maxContextLength", false), Model] public int contextSize = 8192;

        [Tooltip("Batch size for prompt processing.")]

        [ModelAdvanced] public int batchSize = 512;

        public bool started { get; protected set; } = false;

        public bool failed { get; protected set; } = false;

        public static bool modelSetupFailed { get; protected set; } = false;

        public static bool modelSetupComplete { get; protected set; } = false;

        [Tooltip("LLM model to use (.gguf format)")]

        [ModelAdvanced] public string model = "";

        [Tooltip("Chat template for the model")]

        [ModelAdvanced] public string chatTemplate = ChatTemplate.DefaultTemplate;

        [Tooltip("LORA models to use (.gguf format)")]

        [ModelAdvanced] public string lora = "";

        [Tooltip("the weights of the LORA models being used.")]

        [ModelAdvanced] public string loraWeights = "";

        [Tooltip("enable use of flash attention")]

        [ModelExtras] public bool flashAttention = false;

        [Tooltip("API key to use for the server")]

        public string APIKey;


        // SSL certificate

        [SerializeField]

        private string SSLCert = "";

        public string SSLCertPath = "";

        // SSL key

        [SerializeField]

        private string SSLKey = "";

        public string SSLKeyPath = "";


        public int minContextLength = 0;

        public int maxContextLength = 0;

        public string architecture => llmlib.architecture;


        IntPtr LLMObject = IntPtr.Zero;

        List<LLMCaller> clients = new List<LLMCaller>();

        LLMLib llmlib;

        StreamWrapper logStreamWrapper = null;

        Thread llmThread = null;

        List<StreamWrapper> streamWrappers = new List<StreamWrapper>();

        public LLMManager llmManager = new LLMManager();

        private readonly object startLock = new object();

        static readonly object staticLock = new object();

        public LoraManager loraManager = new LoraManager();

        string loraPre = "";

        string loraWeightsPre = "";

        public bool embeddingsOnly = false;

        public int embeddingLength = 0;


        public LLM()

        {

            LLMManager.Register(this);

        }


        void OnValidate()

        {

            if (lora != loraPre || loraWeights != loraWeightsPre)

            {

                loraManager.FromStrings(lora, loraWeights);

                (loraPre, loraWeightsPre) = (lora, loraWeights);

            }

        }


        public async void Awake()

        {

            if (!enabled) return;

#if !UNITY_EDITOR

            modelSetupFailed = !await LLMManager.Setup();

#endif

            modelSetupComplete = true;

            if (modelSetupFailed)

            {

                failed = true;

                return;

            }

            string arguments = GetLlamaccpArguments();

            if (arguments == null)

            {

                failed = true;

                return;

            }

            await Task.Run(() => StartLLMServer(arguments));

            if (!started) return;

            if (dontDestroyOnLoad) DontDestroyOnLoad(transform.root.gameObject);

        }


        public async Task WaitUntilReady()

        {

            while (!started) await Task.Yield();

        }


        public static async Task<bool> WaitUntilModelSetup(Callback<float> downloadProgressCallback = null)

        {

            if (downloadProgressCallback != null) LLMManager.downloadProgressCallbacks.Add(downloadProgressCallback);

            while (!modelSetupComplete) await Task.Yield();

            return !modelSetupFailed;

        }


        public static string GetLLMManagerAsset(string path)

        {

#if UNITY_EDITOR

            if (!EditorApplication.isPlaying) return GetLLMManagerAssetEditor(path);

#endif

            return GetLLMManagerAssetRuntime(path);

        }


        public static string GetLLMManagerAssetEditor(string path)

        {

            // empty

            if (string.IsNullOrEmpty(path)) return path;

            // LLMManager - return location the file will be stored in StreamingAssets

            ModelEntry modelEntry = LLMManager.Get(path);

            if (modelEntry != null) return modelEntry.filename;

            // StreamingAssets - return relative location within StreamingAssets

            string assetPath = LLMUnitySetup.GetAssetPath(path); // Note: this will return the full path if a full path is passed

            string basePath = LLMUnitySetup.GetAssetPath();

            if (File.Exists(assetPath))

            {

                if (LLMUnitySetup.IsSubPath(assetPath, basePath)) return LLMUnitySetup.RelativePath(assetPath, basePath);

            }

            // full path

            if (!File.Exists(assetPath))

            {

                LLMUnitySetup.LogError($"Model {path} was not found.");

            }

            else

            {

                string errorMessage = $"The model {path} was loaded locally. You can include it in the build in one of these ways:";

                errorMessage += $"\n-Copy the model inside the StreamingAssets folder and use its StreamingAssets path";

                errorMessage += $"\n-Load the model with the model manager inside the LLM GameObject and use its filename";

                LLMUnitySetup.LogWarning(errorMessage);

            }

            return path;

        }


        public static string GetLLMManagerAssetRuntime(string path)

        {

            // empty

            if (string.IsNullOrEmpty(path)) return path;

            // LLMManager

            string managerPath = LLMManager.GetAssetPath(path);

            if (!string.IsNullOrEmpty(managerPath) && File.Exists(managerPath)) return managerPath;

            // StreamingAssets

            string assetPath = LLMUnitySetup.GetAssetPath(path);

            if (File.Exists(assetPath)) return assetPath;

            // download path

            assetPath = LLMUnitySetup.GetDownloadAssetPath(path);

            if (File.Exists(assetPath)) return assetPath;

            // give up

            return path;

        }


        public void SetModel(string path)

        {

            model = GetLLMManagerAsset(path);

            if (!string.IsNullOrEmpty(model))

            {

                ModelEntry modelEntry = LLMManager.Get(model);

                if (modelEntry == null) modelEntry = new ModelEntry(GetLLMManagerAssetRuntime(model));

                SetTemplate(modelEntry.chatTemplate);


                maxContextLength = modelEntry.contextLength;

                if (contextSize > maxContextLength) contextSize = maxContextLength;

                SetEmbeddings(modelEntry.embeddingLength, modelEntry.embeddingOnly);

                if (contextSize == 0 && modelEntry.contextLength > 32768)

                {

                    LLMUnitySetup.LogWarning($"The model {path} has very large context size ({modelEntry.contextLength}), consider setting it to a smaller value (<=32768) to avoid filling up the RAM");

                }

            }

#if UNITY_EDITOR

            if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);

#endif

        }


        public void SetLora(string path, float weight = 1)

        {

            AssertNotStarted();

            loraManager.Clear();

            AddLora(path, weight);

        }


        public void AddLora(string path, float weight = 1)

        {

            AssertNotStarted();

            loraManager.Add(path, weight);

            UpdateLoras();

        }


        public void RemoveLora(string path)

        {

            AssertNotStarted();

            loraManager.Remove(path);

            UpdateLoras();

        }


        public void RemoveLoras()

        {

            AssertNotStarted();

            loraManager.Clear();

            UpdateLoras();

        }


        public void SetLoraWeight(string path, float weight)

        {

            loraManager.SetWeight(path, weight);

            UpdateLoras();

            if (started) ApplyLoras();

        }


        public void SetLoraWeights(Dictionary<string, float> loraToWeight)

        {

            foreach (KeyValuePair<string, float> entry in loraToWeight) loraManager.SetWeight(entry.Key, entry.Value);

            UpdateLoras();

            if (started) ApplyLoras();

        }


        public void UpdateLoras()

        {

            (lora, loraWeights) = loraManager.ToStrings();

            (loraPre, loraWeightsPre) = (lora, loraWeights);

#if UNITY_EDITOR

            if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);

#endif

        }


        public void SetTemplate(string templateName, bool setDirty = true)

        {

            chatTemplate = templateName;

            if (started) llmlib?.LLM_SetTemplate(LLMObject, chatTemplate);

#if UNITY_EDITOR

            if (setDirty && !EditorApplication.isPlaying) EditorUtility.SetDirty(this);

#endif

        }


        public void SetEmbeddings(int embeddingLength, bool embeddingsOnly)

        {

            this.embeddingsOnly = embeddingsOnly;

            this.embeddingLength = embeddingLength;

#if UNITY_EDITOR

            if (!EditorApplication.isPlaying) EditorUtility.SetDirty(this);

#endif

        }


        string ReadFileContents(string path)

        {

            if (String.IsNullOrEmpty(path)) return "";

            else if (!File.Exists(path))

            {

                LLMUnitySetup.LogError($"File {path} not found!");

                return "";

            }

            return File.ReadAllText(path);

        }


        public void SetSSLCert(string path)

        {

            SSLCertPath = path;

            SSLCert = ReadFileContents(path);

        }


        public void SetSSLKey(string path)

        {

            SSLKeyPath = path;

            SSLKey = ReadFileContents(path);

        }


        public string GetTemplate()

        {

            return chatTemplate;

        }


        protected virtual string GetLlamaccpArguments()

        {

            // Start the LLM server in a cross-platform way

            if ((SSLCert != "" && SSLKey == "") || (SSLCert == "" && SSLKey != ""))

            {

                LLMUnitySetup.LogError($"Both SSL certificate and key need to be provided!");

                return null;

            }


            if (model == "")

            {

                LLMUnitySetup.LogError("No model file provided!");

                return null;

            }

            string modelPath = GetLLMManagerAssetRuntime(model);

            if (!File.Exists(modelPath))

            {

                LLMUnitySetup.LogError($"File {modelPath} not found!");

                return null;

            }


            loraManager.FromStrings(lora, loraWeights);

            string loraArgument = "";

            foreach (string lora in loraManager.GetLoras())

            {

                string loraPath = GetLLMManagerAssetRuntime(lora);

                if (!File.Exists(loraPath))

                {

                    LLMUnitySetup.LogError($"File {loraPath} not found!");

                    return null;

                }

                loraArgument += $" --lora \"{loraPath}\"";

            }


            int numThreadsToUse = numThreads;

            if (Application.platform == RuntimePlatform.Android && numThreads <= 0) numThreadsToUse = LLMUnitySetup.AndroidGetNumBigCores();


            int slots = GetNumClients();

            string arguments = $"-m \"{modelPath}\" -c {contextSize} -b {batchSize} --log-disable -np {slots}";

            if (embeddingsOnly) arguments += " --embedding";

            if (numThreadsToUse > 0) arguments += $" -t {numThreadsToUse}";

            arguments += loraArgument;

            if (numGPULayers > 0) arguments += $" -ngl {numGPULayers}";

            if (LLMUnitySetup.FullLlamaLib && flashAttention) arguments += $" --flash-attn";

            if (remote)

            {

                arguments += $" --port {port} --host 0.0.0.0";

                if (!String.IsNullOrEmpty(APIKey)) arguments += $" --api-key {APIKey}";

            }


            // the following is the equivalent for running from command line

            string serverCommand;

            if (Application.platform == RuntimePlatform.WindowsEditor || Application.platform == RuntimePlatform.WindowsPlayer) serverCommand = "undreamai_server.exe";

            else serverCommand = "./undreamai_server";

            serverCommand += " " + arguments;

            serverCommand += $" --template \"{chatTemplate}\"";

            if (remote && SSLCert != "" && SSLKey != "") serverCommand += $" --ssl-cert-file {SSLCertPath} --ssl-key-file {SSLKeyPath}";

            LLMUnitySetup.Log($"Deploy server command: {serverCommand}");

            return arguments;

        }


        private void SetupLogging()

        {

            logStreamWrapper = ConstructStreamWrapper(LLMUnitySetup.LogWarning, true);

            llmlib?.Logging(logStreamWrapper.GetStringWrapper());

        }


        private void StopLogging()

        {

            if (logStreamWrapper == null) return;

            llmlib?.StopLogging();

            DestroyStreamWrapper(logStreamWrapper);

        }


        private void StartLLMServer(string arguments)

        {

            started = false;

            failed = false;

            bool useGPU = numGPULayers > 0;


            foreach (string arch in LLMLib.PossibleArchitectures(useGPU))

            {

                string error;

                try

                {

                    InitLib(arch);

                    InitService(arguments);

                    LLMUnitySetup.Log($"Using architecture: {arch}");

                    break;

                }

                catch (LLMException e)

                {

                    error = e.Message;

                    Destroy();

                }

                catch (DestroyException)

                {

                    break;

                }

                catch (Exception e)

                {

                    error = $"{e.GetType()}: {e.Message}";

                }

                LLMUnitySetup.Log($"Tried architecture: {arch}, error: " + error);

            }

            if (llmlib == null)

            {

                LLMUnitySetup.LogError("LLM service couldn't be created");

                failed = true;

                return;

            }

            CallWithLock(StartService);

            LLMUnitySetup.Log("LLM service created");

        }


        private void InitLib(string arch)

        {

            llmlib = new LLMLib(arch);

            CheckLLMStatus(false);

        }


        void CallWithLock(EmptyCallback fn)

        {

            lock (startLock)

            {

                if (llmlib == null) throw new DestroyException();

                fn();

            }

        }


        private void InitService(string arguments)

        {

            lock (staticLock)

            {

                if (debug) CallWithLock(SetupLogging);

                CallWithLock(() => { LLMObject = llmlib.LLM_Construct(arguments); });

                CallWithLock(() => llmlib.LLM_SetTemplate(LLMObject, chatTemplate));

                if (remote)

                {

                    if (SSLCert != "" && SSLKey != "")

                    {

                        LLMUnitySetup.Log("Using SSL");

                        CallWithLock(() => llmlib.LLM_SetSSL(LLMObject, SSLCert, SSLKey));

                    }

                    CallWithLock(() => llmlib.LLM_StartServer(LLMObject));

                }

                CallWithLock(() => CheckLLMStatus(false));

            }

        }


        private void StartService()

        {

            llmThread = new Thread(() => llmlib.LLM_Start(LLMObject));

            llmThread.Start();

            while (!llmlib.LLM_Started(LLMObject)) {}

            ApplyLoras();

            started = true;

        }


        public int Register(LLMCaller llmCaller)

        {

            clients.Add(llmCaller);

            int index = clients.IndexOf(llmCaller);

            if (parallelPrompts != -1) return index % parallelPrompts;

            return index;

        }


        protected int GetNumClients()

        {

            return Math.Max(parallelPrompts == -1 ? clients.Count : parallelPrompts, 1);

        }


        public delegate void LLMStatusCallback(IntPtr LLMObject, IntPtr stringWrapper);

        public delegate void LLMNoInputReplyCallback(IntPtr LLMObject, IntPtr stringWrapper);

        public delegate void LLMReplyCallback(IntPtr LLMObject, string json_data, IntPtr stringWrapper);


        StreamWrapper ConstructStreamWrapper(Callback<string> streamCallback = null, bool clearOnUpdate = false)

        {

            StreamWrapper streamWrapper = new StreamWrapper(llmlib, streamCallback, clearOnUpdate);

            streamWrappers.Add(streamWrapper);

            return streamWrapper;

        }


        void DestroyStreamWrapper(StreamWrapper streamWrapper)

        {

            streamWrappers.Remove(streamWrapper);

            streamWrapper.Destroy();

        }


        public void Update()

        {

            foreach (StreamWrapper streamWrapper in streamWrappers) streamWrapper.Update();

        }


        void AssertStarted()

        {

            string error = null;

            if (failed) error = "LLM service couldn't be created";

            else if (!started) error = "LLM service not started";

            if (error != null)

            {

                LLMUnitySetup.LogError(error);

                throw new Exception(error);

            }

        }


        void AssertNotStarted()

        {

            if (started)

            {

                string error = "This method can't be called when the LLM has started";

                LLMUnitySetup.LogError(error);

                throw new Exception(error);

            }

        }


        void CheckLLMStatus(bool log = true)

        {

            if (llmlib == null) { return; }

            IntPtr stringWrapper = llmlib.StringWrapper_Construct();

            int status = llmlib.LLM_Status(LLMObject, stringWrapper);

            string result = llmlib.GetStringWrapperResult(stringWrapper);

            llmlib.StringWrapper_Delete(stringWrapper);

            string message = $"LLM {status}: {result}";

            if (status > 0)

            {

                if (log) LLMUnitySetup.LogError(message);

                throw new LLMException(message, status);

            }

            else if (status < 0)

            {

                if (log) LLMUnitySetup.LogWarning(message);

            }

        }


        async Task<string> LLMNoInputReply(LLMNoInputReplyCallback callback)

        {

            AssertStarted();

            IntPtr stringWrapper = llmlib.StringWrapper_Construct();

            await Task.Run(() => callback(LLMObject, stringWrapper));

            string result = llmlib?.GetStringWrapperResult(stringWrapper);

            llmlib?.StringWrapper_Delete(stringWrapper);

            CheckLLMStatus();

            return result;

        }


        async Task<string> LLMReply(LLMReplyCallback callback, string json)

        {

            AssertStarted();

            IntPtr stringWrapper = llmlib.StringWrapper_Construct();

            await Task.Run(() => callback(LLMObject, json, stringWrapper));

            string result = llmlib?.GetStringWrapperResult(stringWrapper);

            llmlib?.StringWrapper_Delete(stringWrapper);

            CheckLLMStatus();

            return result;

        }


        public async Task<string> Tokenize(string json)

        {

            AssertStarted();

            LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>

            {

                llmlib.LLM_Tokenize(LLMObject, jsonData, strWrapper);

            };

            return await LLMReply(callback, json);

        }


        public async Task<string> Detokenize(string json)

        {

            AssertStarted();

            LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>

            {

                llmlib.LLM_Detokenize(LLMObject, jsonData, strWrapper);

            };

            return await LLMReply(callback, json);

        }


        public async Task<string> Embeddings(string json)

        {

            AssertStarted();

            LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>

            {

                llmlib.LLM_Embeddings(LLMObject, jsonData, strWrapper);

            };

            return await LLMReply(callback, json);

        }


        public void ApplyLoras()

        {

            LoraWeightRequestList loraWeightRequest = new LoraWeightRequestList();

            loraWeightRequest.loraWeights = new List<LoraWeightRequest>();

            float[] weights = loraManager.GetWeights();

            if (weights.Length == 0) return;

            for (int i = 0; i < weights.Length; i++)

            {

                loraWeightRequest.loraWeights.Add(new LoraWeightRequest() { id = i, scale = weights[i] });

            }


            string json = JsonUtility.ToJson(loraWeightRequest);

            int startIndex = json.IndexOf("[");

            int endIndex = json.LastIndexOf("]") + 1;

            json = json.Substring(startIndex, endIndex - startIndex);


            IntPtr stringWrapper = llmlib.StringWrapper_Construct();

            llmlib.LLM_LoraWeight(LLMObject, json, stringWrapper);

            llmlib.StringWrapper_Delete(stringWrapper);

        }


        public async Task<List<LoraWeightResult>> ListLoras()

        {

            AssertStarted();

            LLMNoInputReplyCallback callback = (IntPtr LLMObject, IntPtr strWrapper) =>

            {

                llmlib.LLM_LoraList(LLMObject, strWrapper);

            };

            string json = await LLMNoInputReply(callback);

            if (String.IsNullOrEmpty(json)) return null;

            LoraWeightResultList loraRequest = JsonUtility.FromJson<LoraWeightResultList>("{\"loraWeights\": " + json + "}");

            return loraRequest.loraWeights;

        }


        public async Task<string> Slot(string json)

        {

            AssertStarted();

            LLMReplyCallback callback = (IntPtr LLMObject, string jsonData, IntPtr strWrapper) =>

            {

                llmlib.LLM_Slot(LLMObject, jsonData, strWrapper);

            };

            return await LLMReply(callback, json);

        }


        public async Task<string> Completion(string json, Callback<string> streamCallback = null)

        {

            AssertStarted();

            if (streamCallback == null) streamCallback = (string s) => {};

            StreamWrapper streamWrapper = ConstructStreamWrapper(streamCallback);

            await Task.Run(() => llmlib.LLM_Completion(LLMObject, json, streamWrapper.GetStringWrapper()));

            if (!started) return null;

            streamWrapper.Update();

            string result = streamWrapper.GetString();

            DestroyStreamWrapper(streamWrapper);

            CheckLLMStatus();

            return result;

        }


        public void CancelRequest(int id_slot)

        {

            AssertStarted();

            llmlib?.LLM_Cancel(LLMObject, id_slot);

            CheckLLMStatus();

        }


        public void Destroy()

        {

            lock (staticLock)

                lock (startLock)

                {

                    try

                    {

                        if (llmlib != null)

                        {

                            if (LLMObject != IntPtr.Zero)

                            {

                                llmlib.LLM_Stop(LLMObject);

                                if (remote) llmlib.LLM_StopServer(LLMObject);

                                StopLogging();

                                llmThread?.Join();

                                llmlib.LLM_Delete(LLMObject);

                                LLMObject = IntPtr.Zero;

                            }

                            llmlib.Destroy();

                            llmlib = null;

                        }

                        started = false;

                        failed = false;

                    }

                    catch (Exception e)

                    {

                        LLMUnitySetup.LogError(e.Message);

                    }

                }

        }


        public void OnDestroy()

        {

            Destroy();

            LLMManager.Unregister(this);

        }


    }


}


LLMUnity.ChatTemplate
Class implementing the skeleton of a chat template.
Definition LLMChatTemplates.cs:13

LLMUnity.ChatTemplate.DefaultTemplate
static string DefaultTemplate
the default template used when it can't be determined ("chatml")
Definition LLMChatTemplates.cs:15

LLMUnity.LLMManager
Class implementing the LLM model manager.
Definition LLMManager.cs:111

LLMUnity.LLMManager.Unregister
static void Unregister(LLM llm)
Removes a LLM from the model manager.
Definition LLMManager.cs:315

LLMUnity.LLMManager.Get
static ModelEntry Get(string path)
Gets the model entry for a model path.
Definition LLMManager.cs:242

LLMUnity.LLMManager.Setup
static Task< bool > Setup()
Setup of the models.
Definition LLMManager.cs:138

LLMUnity.LLMManager.Register
static void Register(LLM llm)
Registers a LLM to the model manager.
Definition LLMManager.cs:306

LLMUnity.LLMUnitySetup
Class implementing helper functions for setup and process management.
Definition LLMUnitySetup.cs:101

LLMUnity.LLM
Class implementing the LLM server.
Definition LLM.cs:19

LLMUnity.LLM.numGPULayers
int numGPULayers
number of model layers to offload to the GPU (0 = GPU not used). If the user's GPU is not supported,...
Definition LLM.cs:35

LLMUnity.LLM.ApplyLoras
void ApplyLoras()
Sets the lora scale, only works after the LLM service has started.
Definition LLM.cs:721

LLMUnity.LLM.Slot
async Task< string > Slot(string json)
Allows to save / restore the state of a slot.
Definition LLM.cs:764

LLMUnity.LLM.SetLoraWeights
void SetLoraWeights(Dictionary< string, float > loraToWeight)
Allows to change the weights (scale) of the LORA models in the LLM.
Definition LLM.cs:318

LLMUnity.LLM.ListLoras
async Task< List< LoraWeightResult > > ListLoras()
Gets a list of the lora adapters.
Definition LLM.cs:746

LLMUnity.LLM.WaitUntilModelSetup
static async Task< bool > WaitUntilModelSetup(Callback< float > downloadProgressCallback=null)
Allows to wait until the LLM models are downloaded and ready.
Definition LLM.cs:162

LLMUnity.LLM.GetTemplate
string GetTemplate()
Returns the chat template of the LLM.
Definition LLM.cs:400

LLMUnity.LLM.SetLoraWeight
void SetLoraWeight(string path, float weight)
Allows to change the weight (scale) of a LORA model in the LLM.
Definition LLM.cs:307

LLMUnity.LLM.CancelRequest
void CancelRequest(int id_slot)
Allows to cancel the requests in a specific slot of the LLM.
Definition LLM.cs:798

LLMUnity.LLM.parallelPrompts
int parallelPrompts
number of prompts that can happen in parallel (-1 = number of LLMCaller objects)
Definition LLM.cs:41

LLMUnity.LLM.debug
bool debug
log the output of the LLM in the Unity Editor.
Definition LLM.cs:38

LLMUnity.LLM.Awake
async void Awake()
The Unity Awake function that starts the LLM server.
Definition LLM.cs:127

LLMUnity.LLM.Detokenize
async Task< string > Detokenize(string json)
Detokenises the provided query.
Definition LLM.cs:692

LLMUnity.LLM.OnDestroy
void OnDestroy()
The Unity OnDestroy function called when the onbject is destroyed. The function StopProcess is called...
Definition LLM.cs:843

LLMUnity.LLM.SetLora
void SetLora(string path, float weight=1)
Allows to set a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAss...
Definition LLM.cs:260

LLMUnity.LLM.AddLora
void AddLora(string path, float weight=1)
Allows to add a LORA model to use in the LLM. The model provided is copied to the Assets/StreamingAss...
Definition LLM.cs:273

LLMUnity.LLM.advancedOptions
bool advancedOptions
show/hide advanced options in the GameObject
Definition LLM.cs:22

LLMUnity.LLM.RemoveLora
void RemoveLora(string path)
Allows to remove a LORA model from the LLM. Models supported are in .gguf format.
Definition LLM.cs:285

LLMUnity.LLM.modelSetupFailed
static bool modelSetupFailed
Boolean set to true if the models were not downloaded successfully.
Definition LLM.cs:57

LLMUnity.LLM.lora
string lora
LORA models to use (.gguf format)
Definition LLM.cs:68

LLMUnity.LLM.contextSize
int contextSize
Size of the prompt context (0 = context size of the model). This is the number of tokens the model ca...
Definition LLM.cs:48

LLMUnity.LLM.numThreads
int numThreads
number of threads to use (-1 = all)
Definition LLM.cs:31

LLMUnity.LLM.started
bool started
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
Definition LLM.cs:53

LLMUnity.LLM.SetModel
void SetModel(string path)
Allows to set the model used by the LLM. The model provided is copied to the Assets/StreamingAssets f...
Definition LLM.cs:232

LLMUnity.LLM.port
int port
port to use for the remote LLM server
Definition LLM.cs:28

LLMUnity.LLM.remote
bool remote
enable remote server functionality
Definition LLM.cs:25

LLMUnity.LLM.SetSSLCert
void SetSSLCert(string path)
Use a SSL certificate for the LLM server.
Definition LLM.cs:380

LLMUnity.LLM.RemoveLoras
void RemoveLoras()
Allows to remove all LORA models from the LLM.
Definition LLM.cs:295

LLMUnity.LLM.dontDestroyOnLoad
bool dontDestroyOnLoad
do not destroy the LLM GameObject when loading a new Scene.
Definition LLM.cs:44

LLMUnity.LLM.SetEmbeddings
void SetEmbeddings(int embeddingLength, bool embeddingsOnly)
Set LLM Embedding parameters.
Definition LLM.cs:352

LLMUnity.LLM.model
string model
LLM model to use (.gguf format)
Definition LLM.cs:62

LLMUnity.LLM.APIKey
string APIKey
API key to use for the server.
Definition LLM.cs:77

LLMUnity.LLM.Tokenize
async Task< string > Tokenize(string json)
Tokenises the provided query.
Definition LLM.cs:677

LLMUnity.LLM.batchSize
int batchSize
Batch size for prompt processing.
Definition LLM.cs:51

LLMUnity.LLM.SetSSLKey
void SetSSLKey(string path)
Use a SSL key for the LLM server.
Definition LLM.cs:390

LLMUnity.LLM.chatTemplate
string chatTemplate
Chat template for the model.
Definition LLM.cs:65

LLMUnity.LLM.flashAttention
bool flashAttention
enable use of flash attention
Definition LLM.cs:74

LLMUnity.LLM.Completion
async Task< string > Completion(string json, Callback< string > streamCallback=null)
Allows to use the chat and completion functionality of the LLM.
Definition LLM.cs:780

LLMUnity.LLM.modelSetupComplete
static bool modelSetupComplete
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
Definition LLM.cs:59

LLMUnity.LLM.WaitUntilReady
async Task WaitUntilReady()
Allows to wait until the LLM is ready.
Definition LLM.cs:153

LLMUnity.LLM.SetTemplate
void SetTemplate(string templateName, bool setDirty=true)
Set the chat template for the LLM.
Definition LLM.cs:338

LLMUnity.LLM.Destroy
void Destroy()
Stops and destroys the LLM.
Definition LLM.cs:808

LLMUnity.LLM.Update
void Update()
The Unity Update function. It is used to retrieve the LLM replies.
Definition LLM.cs:604

LLMUnity.LLM.loraWeights
string loraWeights
the weights of the LORA models being used.
Definition LLM.cs:71

LLMUnity.LLM.Register
int Register(LLMCaller llmCaller)
Registers a local LLMCaller object. This allows to bind the LLMCaller "client" to a specific slot of ...
Definition LLM.cs:570

LLMUnity.LLM.failed
bool failed
Boolean set to true if the server has failed to start.
Definition LLM.cs:55

LLMUnity.LLM.Embeddings
async Task< string > Embeddings(string json)
Computes the embeddings of the provided query.
Definition LLM.cs:707

LLMUnity.LoraManager
Class representing the LORA manager allowing to convert and retrieve LORA assets to string (for seria...
Definition LLMUtils.cs:56

LLMUnity.LoraManager.GetWeights
float[] GetWeights()
Gets the weights of the LORAs in the manager.
Definition LLMUtils.cs:185

LLMUnity.LoraManager.Add
void Add(string path, float weight=1)
Adds a LORA with the defined weight.
Definition LLMUtils.cs:99

LLMUnity.LoraManager.Remove
void Remove(string path)
Removes a LORA based on its path.
Definition LLMUtils.cs:109

LLMUnity.LoraManager.SetWeight
void SetWeight(string path, float weight)
Modifies the weight of a LORA.
Definition LLMUtils.cs:120

LLMUnity.LoraManager.FromStrings
void FromStrings(string loraString, string loraWeightsString)
Converts strings with the lora paths and weights to entries in the LORA manager.
Definition LLMUtils.cs:136

LLMUnity.LoraManager.GetLoras
string[] GetLoras()
Gets the paths of the LORAs in the manager.
Definition LLMUtils.cs:196

LLMUnity.LoraManager.Clear
void Clear()
Clears the LORA assets.
Definition LLMUtils.cs:63

LLMUnity.ModelEntry
Class implementing a LLM model entry.
Definition LLMManager.cs:18

LLMUnity
Definition LLM.cs:12