LLM for Unity  v2.4.1
Create characters in Unity with LLMs!
Loading...
Searching...
No Matches
TokenSplitter.cs
Go to the documentation of this file.
1
3using System;
4using System.Collections.Generic;
5using System.Threading.Tasks;
6
7namespace LLMUnity
8{
13 [Serializable]
14 public class TokenSplitter : Chunking
15 {
17 public int numTokens = 10;
18
19 protected int DetermineEndIndex(string input, string detokenised, int startIndex, int searchRange = 5, int charsFromEnd = 3)
20 {
21 int endIndex = Math.Min(input.Length - 1, startIndex + detokenised.Length - 1);
22 if (endIndex == input.Length - 1) return endIndex;
23
24 for (int lastCharI = 0; lastCharI < charsFromEnd; lastCharI++)
25 {
26 int charI = detokenised.Length - 1 - lastCharI;
27 if (charI < 0) break;
28 char lastChar = detokenised[charI];
29
30 for (int i = 0; i < searchRange; i++)
31 {
32 foreach (int mul in new int[] {-1, 1})
33 {
34 int inputCharI = endIndex + mul * i;
35 if (inputCharI < 0 || inputCharI > input.Length - 1) continue;
36 if (input[inputCharI] == lastChar) return inputCharI;
37 }
38 }
39 }
40 return endIndex;
41 }
42
48 public override async Task<List<(int, int)>> Split(string input)
49 {
50 List<(int, int)> indices = new List<(int, int)>();
51 List<int> tokens = await search.Tokenize(input);
52 if (tokens.Count == 0) return indices;
53
54 int startIndex = 0;
55 for (int i = 0; i < tokens.Count; i += numTokens)
56 {
57 int batchTokens = Math.Min(tokens.Count, i + numTokens) - i;
58 string detokenised = await search.Detokenize(tokens.GetRange(i, batchTokens));
59 int endIndex = DetermineEndIndex(input, detokenised, startIndex);
60 indices.Add((startIndex, endIndex));
61 startIndex = endIndex + 1;
62 if (endIndex == input.Length - 1) break;
63 }
64 if (startIndex <= input.Length - 1) indices.Add((startIndex, input.Length - 1));
65 return indices;
66 }
67 }
68}
Class implementing the chunking functionality.
Definition Chunking.cs:18
Class implementing a token-based splitter.
int numTokens
the number of tokens to split phrases into chunks
override async Task< List<(int, int)> > Split(string input)
Splits the provided phrase into chunks of a specific number of tokens (defined by the numTokens varia...