LLM for Unity  v2.3.0
Create characters in Unity with LLMs!
Loading...
Searching...
No Matches
Chunking.cs
Go to the documentation of this file.
1
3using System;
4using System.Collections.Generic;
5using System.IO.Compression;
6using System.Linq;
7using System.Text;
8using System.Threading.Tasks;
9
10namespace LLMUnity
11{
16 [Serializable]
17 public abstract class Chunking : SearchPlugin
18 {
19 protected bool returnChunks = false;
20 protected Dictionary<string, List<int>> dataSplitToPhrases = new Dictionary<string, List<int>>();
21 protected Dictionary<int, int[]> phraseToSentences = new Dictionary<int, int[]>();
22 protected Dictionary<int, int> sentenceToPhrase = new Dictionary<int, int>();
23 protected Dictionary<int, int[]> hexToPhrase = new Dictionary<int, int[]>();
24 protected int nextKey = 0;
25
30 public void ReturnChunks(bool returnChunks)
31 {
32 this.returnChunks = returnChunks;
33 }
34
40 public abstract Task<List<(int, int)>> Split(string input);
41
47 public override string Get(int key)
48 {
49 StringBuilder phraseBuilder = new StringBuilder();
50 foreach (int sentenceId in phraseToSentences[key])
51 {
52 phraseBuilder.Append(search.Get(sentenceId));
53 }
54 return phraseBuilder.ToString();
55 }
56
63 public override async Task<int> Add(string inputString, string group = "")
64 {
65 int key = nextKey++;
66 // sentence -> phrase
67 List<int> sentenceIds = new List<int>();
68 foreach ((int startIndex, int endIndex) in await Split(inputString))
69 {
70 string sentenceText = inputString.Substring(startIndex, endIndex - startIndex + 1);
71 int sentenceId = await search.Add(sentenceText, group);
72 sentenceIds.Add(sentenceId);
73
74 sentenceToPhrase[sentenceId] = key;
75 }
76 // phrase -> sentence
77 phraseToSentences[key] = sentenceIds.ToArray();
78
79 // data split -> phrase
80 if (!dataSplitToPhrases.ContainsKey(group)) dataSplitToPhrases[group] = new List<int>(){key};
81 else dataSplitToPhrases[group].Add(key);
82
83 // hex -> phrase
84 int hash = inputString.GetHashCode();
85 if (!hexToPhrase.TryGetValue(hash, out int[] entries)) entries = new int[0];
86 List<int> matchingHash = new List<int>(entries);
87 matchingHash.Add(key);
88
89 hexToPhrase[hash] = matchingHash.ToArray();
90 return key;
91 }
92
97 public override void Remove(int key)
98 {
99 if (!phraseToSentences.TryGetValue(key, out int[] sentenceIds)) return;
100 int hash = Get(key).GetHashCode();
101
102 // phrase -> sentence
103 phraseToSentences.Remove(key);
104 foreach (int sentenceId in sentenceIds)
105 {
106 search.Remove(sentenceId);
107 // sentence -> phrase
108 sentenceToPhrase.Remove(sentenceId);
109 }
110
111 // data split -> phrase
112 foreach (var dataSplitPhrases in dataSplitToPhrases.Values) dataSplitPhrases.Remove(key);
113
114 // hex -> phrase
115 if (hexToPhrase.TryGetValue(hash, out int[] phraseIds))
116 {
117 List<int> updatedIds = phraseIds.ToList();
118 updatedIds.Remove(key);
119 if (updatedIds.Count == 0) hexToPhrase.Remove(hash);
120 else hexToPhrase[hash] = updatedIds.ToArray();
121 }
122 }
123
130 public override int Remove(string inputString, string group = "")
131 {
132 int hash = inputString.GetHashCode();
133 if (!hexToPhrase.TryGetValue(hash, out int[] entries)) return 0;
134 List<int> removeIds = new List<int>();
135 foreach (int key in entries)
136 {
137 if (dataSplitToPhrases[group].Contains(key) && Get(key) == inputString) removeIds.Add(key);
138 }
139 foreach (int removeId in removeIds) Remove(removeId);
140 return removeIds.Count;
141 }
142
147 public override int Count()
148 {
149 return phraseToSentences.Count;
150 }
151
157 public override int Count(string group)
158 {
159 if (!dataSplitToPhrases.TryGetValue(group, out List<int> dataSplitPhrases)) return 0;
160 return dataSplitPhrases.Count;
161 }
162
169 public override async Task<int> IncrementalSearch(string queryString, string group = "")
170 {
171 return await search.IncrementalSearch(queryString, group);
172 }
173
189 public override ValueTuple<int[], float[], bool> IncrementalFetchKeys(int fetchKey, int k)
190 {
191 if (returnChunks)
192 {
193 return search.IncrementalFetchKeys(fetchKey, k);
194 }
195 else
196 {
197 List<int> phraseKeys = new List<int>();
198 List<float> distancesList = new List<float>();
199 bool done = false;
200 bool completed;
201 do
202 {
203 int[] resultKeys;
204 float[] distancesIter;
205 (resultKeys, distancesIter, completed) = search.IncrementalFetchKeys(fetchKey, k);
206 for (int i = 0; i < resultKeys.Length; i++)
207 {
208 int phraseId = sentenceToPhrase[resultKeys[i]];
209 if (phraseKeys.Contains(phraseId)) continue;
210 phraseKeys.Add(phraseId);
211 distancesList.Add(distancesIter[i]);
212 if (phraseKeys.Count() == k)
213 {
214 done = true;
215 break;
216 }
217 }
218 if (completed) break;
219 }
220 while (!done);
221 if (completed) IncrementalSearchComplete(fetchKey);
222 return (phraseKeys.ToArray(), distancesList.ToArray(), completed);
223 }
224 }
225
241 public override ValueTuple<string[], float[], bool> IncrementalFetch(int fetchKey, int k)
242 {
243 (int[] resultKeys, float[] distances, bool completed) = IncrementalFetchKeys(fetchKey, k);
244 string[] results = new string[resultKeys.Length];
245 for (int i = 0; i < resultKeys.Length; i++)
246 {
247 if (returnChunks) results[i] = search.Get(resultKeys[i]);
248 else results[i] = Get(resultKeys[i]);
249 }
250 return (results, distances, completed);
251 }
252
257 public override void IncrementalSearchComplete(int fetchKey)
258 {
259 search.IncrementalSearchComplete(fetchKey);
260 }
261
265 public override void Clear()
266 {
267 nextKey = 0;
268 dataSplitToPhrases.Clear();
269 phraseToSentences.Clear();
270 sentenceToPhrase.Clear();
271 hexToPhrase.Clear();
272 search.Clear();
273 }
274
275 protected override void SaveInternal(ZipArchive archive)
276 {
277 ArchiveSaver.Save(archive, dataSplitToPhrases, GetSavePath("dataSplitToPhrases"));
278 ArchiveSaver.Save(archive, phraseToSentences, GetSavePath("phraseToSentences"));
279 ArchiveSaver.Save(archive, sentenceToPhrase, GetSavePath("sentenceToPhrase"));
280 ArchiveSaver.Save(archive, hexToPhrase, GetSavePath("hexToPhrase"));
281 ArchiveSaver.Save(archive, nextKey, GetSavePath("nextKey"));
282 }
283
284 protected override void LoadInternal(ZipArchive archive)
285 {
286 dataSplitToPhrases = ArchiveSaver.Load<Dictionary<string, List<int>>>(archive, GetSavePath("dataSplitToPhrases"));
287 phraseToSentences = ArchiveSaver.Load<Dictionary<int, int[]>>(archive, GetSavePath("phraseToSentences"));
288 sentenceToPhrase = ArchiveSaver.Load<Dictionary<int, int>>(archive, GetSavePath("sentenceToPhrase"));
289 hexToPhrase = ArchiveSaver.Load<Dictionary<int, int[]>>(archive, GetSavePath("hexToPhrase"));
290 nextKey = ArchiveSaver.Load<int>(archive, GetSavePath("nextKey"));
291 }
292 }
293}
Class implementing the chunking functionality.
Definition Chunking.cs:18
override int Count(string group)
Returns a count of the phrases in a specific data group.
Definition Chunking.cs:157
Task< List<(int, int)> > Split(string input)
Splits the provided phrase into chunks.
override void IncrementalSearchComplete(int fetchKey)
Completes the search and clears the cached results for an incremental search.
Definition Chunking.cs:257
override async Task< int > Add(string inputString, string group="")
Adds a phrase to the search after splitting it into chunks.
Definition Chunking.cs:63
override string Get(int key)
Retrieves the phrase with the specific id.
Definition Chunking.cs:47
override void Remove(int key)
Removes a phrase and the phrase chunks from the search.
Definition Chunking.cs:97
override ValueTuple< int[], float[], bool > IncrementalFetchKeys(int fetchKey, int k)
Retrieves the most similar search results in batches (incremental search). The phrase/chunk keys and ...
Definition Chunking.cs:189
void ReturnChunks(bool returnChunks)
Set to true to return chunks or the direct input with the Search function.
Definition Chunking.cs:30
override void Clear()
Clears the object and the associated search object.
Definition Chunking.cs:265
override int Count()
Returns a count of the phrases.
Definition Chunking.cs:147
override async Task< int > IncrementalSearch(string queryString, string group="")
Allows to do search and retrieve results in batches (incremental search).
Definition Chunking.cs:169
override int Remove(string inputString, string group="")
Removes a phrase and the phrase chunks from the search.
Definition Chunking.cs:130
override ValueTuple< string[], float[], bool > IncrementalFetch(int fetchKey, int k)
Retrieves the most similar search results in batches (incremental search). The phrases/chunks and the...
Definition Chunking.cs:241
Class implementing the search plugin template used e.g. in chunking.
Definition Search.cs:462