4using System.Collections.Generic;
7using System.Threading.Tasks;
13 [DefaultExecutionOrder(-2)]
22 [Tooltip(
"file to save the chat history. The file will be saved within the persistentDataPath directory.")]
25 [Tooltip(
"save the LLM cache. Speeds up the prompt calculation when reloading from history but also requires ~100MB of space per character.")]
28 [Tooltip(
"log the constructed prompt the Unity Editor.")]
31 [Tooltip(
"maximum number of tokens that the LLM will predict (-1 = infinity, -2 = until context filled).")]
34 [Tooltip(
"slot of the server to use for computation (affects caching)")]
35 [ModelAdvanced]
public int slot = -1;
37 [Tooltip(
"grammar file used for the LLMCharacter (.gbnf format)")]
38 [ModelAdvanced]
public string grammar =
null;
40 [Tooltip(
"cache the processed prompt to avoid reprocessing the entire prompt every time (default: true, recommended!)")]
43 [Tooltip(
"seed for reproducibility (-1 = no reproducibility).")]
44 [ModelAdvanced]
public int seed = 0;
46 [Tooltip(
"LLM temperature, lower values give more deterministic answers.")]
47 [ModelAdvanced, Float(0f, 2f)]
public float temperature = 0.2f;
51 [Tooltip(
"Top-k sampling selects the next token only from the top k most likely predicted tokens (0 = disabled). Higher values lead to more diverse text, while lower value will generate more focused and conservative text. ")]
52 [ModelAdvanced, Int(-1, 100)]
public int topK = 40;
56 [Tooltip(
"Top-p sampling selects the next token from a subset of tokens that together have a cumulative probability of at least p (1.0 = disabled). Higher values lead to more diverse text, while lower value will generate more focused and conservative text. ")]
57 [ModelAdvanced, Float(0f, 1f)]
public float topP = 0.9f;
59 [Tooltip(
"minimum probability for a token to be used.")]
60 [ModelAdvanced, Float(0f, 1f)]
public float minP = 0.05f;
62 [Tooltip(
"Penalty based on repeated tokens to control the repetition of token sequences in the generated text.")]
65 [Tooltip(
"Penalty based on token presence in previous responses to control the repetition of token sequences in the generated text. (0.0 = disabled).")]
68 [Tooltip(
"Penalty based on token frequency in previous responses to control the repetition of token sequences in the generated text. (0.0 = disabled).")]
71 [Tooltip(
"enable locally typical sampling (1.0 = disabled). Higher values will promote more contextually coherent tokens, while lower values will promote more diverse tokens.")]
72 [ModelAdvanced, Float(0f, 1f)]
public float typicalP = 1f;
74 [Tooltip(
"last n tokens to consider for penalizing repetition (0 = disabled, -1 = ctx-size).")]
77 [Tooltip(
"penalize newline tokens when applying the repeat penalty.")]
80 [Tooltip(
"prompt for the purpose of the penalty evaluation. Can be either null, a string or an array of numbers representing tokens (null/'' = use original prompt)")]
83 [Tooltip(
"enable Mirostat sampling, controlling perplexity during text generation (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0).")]
84 [ModelAdvanced, Int(0, 2)]
public int mirostat = 0;
86 [Tooltip(
"The Mirostat target entropy (tau) controls the balance between coherence and diversity in the generated text.")]
87 [ModelAdvanced, Float(0f, 10f)]
public float mirostatTau = 5f;
89 [Tooltip(
"The Mirostat learning rate (eta) controls how quickly the algorithm responds to feedback from the generated text.")]
90 [ModelAdvanced, Float(0f, 1f)]
public float mirostatEta = 0.1f;
92 [Tooltip(
"if greater than 0, the response also contains the probabilities of top N tokens for each generated token.")]
93 [ModelAdvanced, Int(0, 10)]
public int nProbs = 0;
95 [Tooltip(
"ignore end of stream token and continue generating.")]
98 [Tooltip(
"number of tokens to retain from the prompt when the model runs out of context (-1 = LLMCharacter prompt tokens if setNKeepToPrompt is set to true).")]
101 [Tooltip(
"stopwords to stop the LLM in addition to the default stopwords from the chat template.")]
102 public List<string>
stop =
new List<string>();
105 [Tooltip(
"the logit bias option allows to manually adjust the likelihood of specific tokens appearing in the generated text. By providing a token ID and a positive or negative bias value, you can increase or decrease the probability of that token being generated.")]
109 [Tooltip(
"Receive the reply from the model as it is produced (recommended!). If not selected, the full reply from the model is received in one go")]
112 [Tooltip(
"the name of the player")]
115 [Tooltip(
"the name of the AI")]
118 [Tooltip(
"a description of the AI role (system prompt)")]
119 [TextArea(5, 10),
Chat]
public string prompt =
"A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.";
121 [Tooltip(
"set the number of tokens to always retain from the prompt (nKeep) based on the LLMCharacter system prompt")]
124 [Tooltip(
"the chat history as list of chat messages")]
125 public List<ChatMessage>
chat =
new List<ChatMessage>();
127 [Tooltip(
"the grammar to use")]
131 protected SemaphoreSlim chatLock =
new SemaphoreSlim(1, 1);
132 protected string chatTemplate;
147 if (!enabled)
return;
151 int slotFromServer = llm.
Register(
this);
152 if (
slot == -1)
slot = slotFromServer;
158 protected override void OnValidate()
164 protected override string NotValidLLMError()
166 return base.NotValidLLMError() +
$", it is an embedding only model";
176 return !
llmSet.embeddingsOnly;
179 protected virtual void InitHistory()
185 protected virtual async Task LoadHistory()
199 protected virtual string GetSavePath(
string filename)
201 return Path.Combine(
Application.persistentDataPath, filename).Replace(
'\\',
'/');
211 return GetSavePath(filename +
".json");
221 return GetSavePath(filename +
".cache");
247 protected virtual bool CheckTemplate()
249 if (
template ==
null)
261 if (!CheckTemplate())
return false;
264 if (
tokens ==
null)
return false;
270 protected virtual void InitGrammar()
278 protected virtual void SetNKeep(List<int>
tokens)
323 if (!CheckTemplate())
return null;
395 return result.content.Trim();
470 if (!CheckTemplate())
return null;
471 if (!
await InitNKeep())
return null;
541 if (!CheckTemplate())
return;
542 if (!
await InitNKeep())
return;
570 protected override void CancelRequestsLocal()
637 if (llm.embeddingsOnly)
LLMUnitySetup.LogError(
"The LLM can't be used for completion, only for embeddings");
652 LLMUnitySetup.LogError(
$"wrong callback type, should be string");
667 public class ChatListWrapper
669 public List<ChatMessage> chat;
Class implementing the skeleton of a chat template.
static ChatTemplate GetTemplate(string template)
Creates the chat template based on the provided chat template name.
Class implementing calling of LLM functions (local and remote).
virtual async Task< List< int > > Tokenize(string query, Callback< List< int > > callback=null)
Tokenises the provided query.
bool remote
use remote LLM server
Class implementing the LLM characters.
bool cachePrompt
cache the processed prompt to avoid reprocessing the entire prompt every time (default: true,...
int slot
slot of the server to use for computation (affects caching)
virtual async Task< string > Chat(string query, Callback< string > callback=null, EmptyCallback completionCallback=null, bool addToHistory=true)
Chat functionality of the LLM. It calls the LLM completion based on the provided query including the ...
List< string > stop
stopwords to stop the LLM in addition to the default stopwords from the chat template.
float topP
Top-p sampling selects the next token from a subset of tokens that together have a cumulative probabi...
virtual async Task LoadTemplate()
Loads the chat template of the LLMCharacter.
string AIName
the name of the AI
int nProbs
if greater than 0, the response also contains the probabilities of top N tokens for each generated to...
bool ignoreEos
ignore end of stream token and continue generating.
float mirostatTau
The Mirostat target entropy (tau) controls the balance between coherence and diversity in the generat...
int numPredict
maximum number of tokens that the LLM will predict (-1 = infinity, -2 = until context filled).
string prompt
a description of the AI role (system prompt)
float temperature
LLM temperature, lower values give more deterministic answers.
override void Awake()
The Unity Awake function that initializes the state before the application starts....
float presencePenalty
Penalty based on token presence in previous responses to control the repetition of token sequences in...
virtual async Task< string > AskTemplate()
Asks the LLM for the chat template to use.
string playerName
the name of the player
virtual string GetCacheSavePath(string filename)
Allows to get the save path of the LLM cache based on the provided filename or relative path.
float mirostatEta
The Mirostat learning rate (eta) controls how quickly the algorithm responds to feedback from the gen...
float minP
minimum probability for a token to be used.
float typicalP
enable locally typical sampling (1.0 = disabled). Higher values will promote more contextually cohere...
virtual async Task< string > Load(string filename)
Load the chat history and cache from the provided filename / relative path.
int nKeep
number of tokens to retain from the prompt when the model runs out of context (-1 = LLMCharacter prom...
virtual async void SetGrammar(string path)
Sets the grammar file of the LLMCharacter.
bool penalizeNl
penalize newline tokens when applying the repeat penalty.
bool debugPrompt
log the constructed prompt the Unity Editor.
bool saveCache
save the LLM cache. Speeds up the prompt calculation when reloading from history but also requires ~1...
int topK
Top-k sampling selects the next token only from the top k most likely predicted tokens (0 = disabled)...
string grammar
grammar file used for the LLMCharacter (.gbnf format)
virtual async Task< string > Save(string filename)
Saves the chat history and cache to the provided filename / relative path.
virtual void AddAIMessage(string content)
Allows to add a AI message in the chat history.
string penaltyPrompt
prompt for the purpose of the penalty evaluation. Can be either null, a string or an array of numbers...
override bool IsValidLLM(LLM llmSet)
Checks if a LLM is valid for the LLMCaller.
virtual void AddPlayerMessage(string content)
Allows to add a player message in the chat history.
float repeatPenalty
Penalty based on repeated tokens to control the repetition of token sequences in the generated text.
virtual string GetJsonSavePath(string filename)
Allows to get the save path of the chat history based on the provided filename or relative path.
bool stream
Receive the reply from the model as it is produced (recommended!). If not selected,...
float frequencyPenalty
Penalty based on token frequency in previous responses to control the repetition of token sequences i...
virtual async Task Warmup(EmptyCallback completionCallback=null)
Allow to warm-up a model by processing the system prompt. The prompt processing will be cached (if ca...
int mirostat
enable Mirostat sampling, controlling perplexity during text generation (0 = disabled,...
bool setNKeepToPrompt
set the number of tokens to always retain from the prompt (nKeep) based on the LLMCharacter system pr...
int repeatLastN
last n tokens to consider for penalizing repetition (0 = disabled, -1 = ctx-size).
int seed
seed for reproducibility (-1 = no reproducibility).
string grammarString
the grammar to use
Dictionary< int, string > logitBias
the logit bias option allows to manually adjust the likelihood of specific tokens appearing in the ge...
virtual async Task< string > Complete(string prompt, Callback< string > callback=null, EmptyCallback completionCallback=null)
Pure completion functionality of the LLM. It calls the LLM completion based solely on the provided pr...
virtual void AddMessage(string role, string content)
Allows to add a message in the chat history.
virtual async Task Warmup(string query, EmptyCallback completionCallback=null)
Allow to warm-up a model by processing the provided prompt without adding it to history....
virtual void ClearChat()
Clear the chat of the LLMCharacter.
List< ChatMessage > chat
the chat history as list of chat messages
string save
file to save the chat history. The file will be saved within the persistentDataPath directory.
virtual void SetPrompt(string newPrompt, bool clearChat=true)
Set the system prompt for the LLMCharacter.
Class implementing helper functions for setup and process management.
Class implementing the LLM server.
string GetTemplate()
Returns the chat template of the LLM.
void CancelRequest(int id_slot)
Allows to cancel the requests in a specific slot of the LLM.
int parallelPrompts
number of prompts that can happen in parallel (-1 = number of LLMCaller objects)
bool started
Boolean set to true if the server has started and is ready to receive requests, false otherwise.
async Task< string > Completion(string json, Callback< string > streamCallback=null)
Allows to use the chat and completion functionality of the LLM.
int Register(LLMCaller llmCaller)
Registers a local LLMCaller object. This allows to bind the LLMCaller "client" to a specific slot of ...
bool failed
Boolean set to true if the server has failed to start.