4using System.Collections.Generic;
7using System.Threading.Tasks;
10using UnityEngine.Networking;
14 [DefaultExecutionOrder(-2)]
24 [LocalRemote]
public bool remote =
false;
28 [Remote]
public string host =
"localhost";
30 [Remote]
public int port = 13333;
47 [ModelAdvanced]
public string grammar =
null;
51 [ModelAdvanced]
public int slot = -1;
53 [ModelAdvanced]
public int seed = 0;
63 [ModelAdvanced, Float(0f, 2f)]
public float temperature = 0.2f;
66 [ModelAdvanced, Int(-1, 100)]
public int topK = 40;
71 [ModelAdvanced, Float(0f, 1f)]
public float topP = 0.9f;
74 [ModelAdvanced, Float(0f, 1f)]
public float minP = 0.05f;
86 [ModelAdvanced, Float(0f, 1f)]
public float tfsZ = 1f;
88 [ModelAdvanced, Float(0f, 1f)]
public float typicalP = 1f;
97 [ModelAdvanced, Int(0, 2)]
public int mirostat = 0;
99 [ModelAdvanced, Float(0f, 10f)]
public float mirostatTau = 5f;
103 [ModelAdvanced, Int(0, 10)]
public int nProbs = 0;
110 public List<string>
stop =
new List<string>();
120 [TextArea(5, 10),
Chat]
public string prompt =
"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.";
125 public List<ChatMessage> chat;
126 private SemaphoreSlim chatLock =
new SemaphoreSlim(1, 1);
127 private string chatTemplate;
129 public string grammarString;
130 private List<(string, string)> requestHeaders;
131 private List<UnityWebRequest> WIPRequests =
new List<UnityWebRequest>();
146 if (!enabled)
return;
148 requestHeaders =
new List<(string, string)> { (
"Content-Type",
"application/json") };
154 LLMUnitySetup.LogError($
"No LLM assigned or detected for LLMCharacter {name}!");
158 if (
slot == -1)
slot = slotFromServer;
162 if (!String.IsNullOrEmpty(
APIKey)) requestHeaders.Add((
"Authorization",
"Bearer " +
APIKey));
189 string msg =
$"Assigning LLM {llm.name} to LLMCharacter {name}";
190 if (
llm.gameObject.scene !=
gameObject.scene)
msg +=
$" from scene {llm.gameObject.scene}";
191 LLMUnitySetup.Log(
msg);
194 void SortBySceneAndHierarchy(LLM[]
array)
218 protected void InitHistory()
226 if (
save ==
"" || !
File.Exists(GetJsonSavePath(
save)))
return;
238 public virtual string GetSavePath(
string filename)
240 return Path.Combine(
Application.persistentDataPath, filename).Replace(
'\\',
'/');
243 public virtual string GetJsonSavePath(
string filename)
245 return GetSavePath(filename +
".json");
248 public virtual string GetCacheSavePath(
string filename)
250 return GetSavePath(filename +
".cache");
253 private void InitPrompt(
bool clearChat =
true)
286 private bool CheckTemplate()
288 if (
template ==
null)
300 if (!CheckTemplate())
return false;
303 if (
tokens ==
null)
return false;
309 private void InitGrammar()
317 private void SetNKeep(List<int>
tokens)
362 if (!CheckTemplate())
return null;
403 public void AddMessage(
string role,
string content)
409 public void AddPlayerMessage(
string content)
414 public void AddAIMessage(
string content)
422 return result.content.Trim();
497 if (!CheckTemplate())
return null;
498 if (!
await InitNKeep())
return null;
504 AddPlayerMessage(
query);
521 AddPlayerMessage(
query);
570 if (!CheckTemplate())
return;
571 if (!
await InitNKeep())
return;
652 string filepath = GetJsonSavePath(filename);
658 string cachepath = GetCacheSavePath(filename);
671 string filepath = GetJsonSavePath(filename);
683 string cachepath = GetCacheSavePath(filename);
692 if (
response ==
null)
return default;
703 response =
$"{{\"data\": [{responseArray}]}}";
708 protected void CancelRequestsLocal()
713 protected void CancelRequestsRemote()
728 if (
remote) CancelRequestsRemote();
729 else CancelRequestsLocal();
766 LLMUnitySetup.LogError(
$"wrong callback type, should be string");
773 LLMUnitySetup.LogError(
$"Unknown endpoint {endpoint}");
788 LLMUnitySetup.LogError(
"Saving and loading is not currently supported in remote setting");
793 byte[]
jsonToSend =
new System.Text.UTF8Encoding().GetBytes(
json);
842 if (
request.responseCode == (
int)System.Net.HttpStatusCode.Unauthorized)
break;
849 if (
error !=
null) LLMUnitySetup.LogError(
error);
863 public class ChatListWrapper
865 public List<ChatMessage> chat;
Class implementing the skeleton of a chat template.
static ChatTemplate GetTemplate(string template)
Creates the chat template based on the provided chat template name.
Class implementing the LLM characters.
bool cachePrompt
option to cache the prompt as it is being created by the chat to avoid reprocessing the entire prompt...
LLM llm
the LLM object to use
int slot
specify which slot of the server to use for computation (affects caching)
void Awake()
The Unity Awake function that initializes the state before the application starts....
void CancelRequests()
Cancel the ongoing requests e.g. Chat, Complete.
List< string > stop
stopwords to stop the LLM in addition to the default stopwords from the chat template.
float topP
top-p sampling (1.0 = disabled). The top p value controls the cumulative probability of generated tok...
string AIName
the name of the AI
int nProbs
if greater than 0, the response also contains the probabilities of top N tokens for each generated to...
bool ignoreEos
ignore end of stream token and continue generating.
async Task< List< float > > Embeddings(string query, Callback< List< float > > callback=null)
Computes the embeddings of the provided input.
float mirostatTau
set the Mirostat target entropy, parameter tau.
int numPredict
number of tokens to predict (-1 = infinity, -2 = until context filled). This is the amount of tokens ...
int numRetries
number of retries to use for the LLM server requests (-1 = infinite)
string prompt
a description of the AI role. This defines the LLMCharacter system prompt
float temperature
LLM temperature, lower values give more deterministic answers. The temperature setting adjusts how ra...
async Task< string > AskTemplate()
Asks the LLM for the chat template to use.
float presencePenalty
repeated token presence penalty (0.0 = disabled). Positive values penalize new tokens based on whethe...
string playerName
the name of the player
async Task Warmup(EmptyCallback completionCallback=null)
Allow to warm-up a model by processing the prompt. The prompt processing will be cached (if cacheProm...
float mirostatEta
set the Mirostat learning rate, parameter eta.
async Task LoadTemplate()
Load the chat template of the LLMCharacter.
float minP
minimum probability for a token to be used. The probability is defined relative to the probability of...
float typicalP
enable locally typical sampling with parameter p (1.0 = disabled).
virtual async Task< string > Load(string filename)
Load the chat history and cache from the provided filename / relative path.
int nKeep
number of tokens to retain from the prompt when the model runs out of context (-1 = LLMCharacter prom...
async Task< string > Detokenize(List< int > tokens, Callback< string > callback=null)
Detokenises the provided tokens to a string.
bool penalizeNl
penalize newline tokens when applying the repeat penalty.
bool debugPrompt
select to log the constructed prompt the Unity Editor.
bool saveCache
toggle to save the LLM cache. This speeds up the prompt calculation but also requires ~100MB of space...
int topK
top-k sampling (0 = disabled). The top k value controls the top k most probable tokens at each step o...
string grammar
grammar file used for the LLM in .cbnf format (relative to the Assets/StreamingAssets folder)
virtual async Task< string > Save(string filename)
Saves the chat history and cache to the provided filename / relative path.
string penaltyPrompt
prompt for the purpose of the penalty evaluation. Can be either null, a string or an array of numbers...
async Task< string > Chat(string query, Callback< string > callback=null, EmptyCallback completionCallback=null, bool addToHistory=true)
Chat functionality of the LLM. It calls the LLM completion based on the provided query including the ...
float repeatPenalty
control the repetition of token sequences in the generated text. The penalty is applied to repeated t...
bool remote
toggle to use remote LLM server or local LLM
bool stream
option to receive the reply from the model as it is produced (recommended!). If it is not selected,...
float frequencyPenalty
repeated token frequency penalty (0.0 = disabled). Positive values penalize new tokens based on their...
string APIKey
allows to use a server with API key
int mirostat
enable Mirostat sampling, controlling perplexity during text generation (0 = disabled,...
string host
host to use for the LLM server
bool advancedOptions
toggle to show/hide advanced options in the GameObject
int port
port to use for the LLM server
bool setNKeepToPrompt
option to set the number of tokens to retain from the prompt (nKeep) based on the LLMCharacter system...
void SetPrompt(string newPrompt, bool clearChat=true)
Set the system prompt for the LLMCharacter.
int repeatLastN
last n tokens to consider for penalizing repetition (0 = disabled, -1 = ctx-size).
int seed
seed for reproducibility. For random results every time set to -1.
float tfsZ
enable tail free sampling with parameter z (1.0 = disabled).
async Task< List< int > > Tokenize(string query, Callback< List< int > > callback=null)
Tokenises the provided query.
Dictionary< int, string > logitBias
the logit bias option allows to manually adjust the likelihood of specific tokens appearing in the ge...
async void SetGrammar(string path)
Set the grammar file of the LLMCharacter.
async Task< string > Complete(string prompt, Callback< string > callback=null, EmptyCallback completionCallback=null)
Pure completion functionality of the LLM. It calls the LLM completion based solely on the provided pr...
string save
file to save the chat history. The file is saved only for Chat calls with addToHistory set to true....
Class implementing helper functions for setup and process management.
Class implementing the LLM server.
int Register(LLMCharacter llmCharacter)
Registers a local LLMCharacter object. This allows to bind the LLMCharacter "client" to a specific sl...
async Task< string > Slot(string json)
Allows to save / restore the state of a slot.
string GetTemplate()
Returns the chat template of the LLM.
void CancelRequest(int id_slot)
Allows to cancel the requests in a specific slot of the LLM.
int parallelPrompts
number of prompts that can happen in parallel (-1 = number of LLMCharacter objects)
async Task< string > Detokenize(string json)
Detokenises the provided query.
bool started
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
async Task< string > Tokenize(string json)
Tokenises the provided query.
async Task< string > Completion(string json, Callback< string > streamCallback=null)
Allows to use the chat and completion functionality of the LLM.
bool failed
Boolean set to true if the server has failed to start.
async Task< string > Embeddings(string json)
Computes the embeddings of the provided query.