4using System.Collections.Generic;
5using System.IO.Compression;
8using System.Threading.Tasks;
19 protected bool returnChunks =
false;
20 protected Dictionary<string, List<int>> dataSplitToPhrases =
new Dictionary<string, List<int>>();
21 protected Dictionary<int, int[]> phraseToSentences =
new Dictionary<int, int[]>();
22 protected Dictionary<int, int> sentenceToPhrase =
new Dictionary<int, int>();
23 protected Dictionary<int, int[]> hexToPhrase =
new Dictionary<int, int[]>();
24 protected int nextKey = 0;
32 this.returnChunks = returnChunks;
40 public abstract Task<List<(int, int)>>
Split(
string input);
47 public override string Get(
int key)
49 StringBuilder phraseBuilder =
new StringBuilder();
50 foreach (
int sentenceId
in phraseToSentences[key])
52 phraseBuilder.Append(search.Get(sentenceId));
54 return phraseBuilder.ToString();
63 public override async Task<int>
Add(
string inputString,
string group =
"")
67 List<int> sentenceIds =
new List<int>();
68 foreach ((
int startIndex,
int endIndex) in await
Split(inputString))
70 string sentenceText = inputString.Substring(startIndex, endIndex - startIndex + 1);
71 int sentenceId = await search.Add(sentenceText, group);
72 sentenceIds.Add(sentenceId);
74 sentenceToPhrase[sentenceId] = key;
77 phraseToSentences[key] = sentenceIds.ToArray();
80 if (!dataSplitToPhrases.ContainsKey(group)) dataSplitToPhrases[group] =
new List<int>(){key};
81 else dataSplitToPhrases[group].Add(key);
84 int hash = inputString.GetHashCode();
85 if (!hexToPhrase.TryGetValue(hash, out
int[] entries)) entries =
new int[0];
86 List<int> matchingHash =
new List<int>(entries);
87 matchingHash.Add(key);
89 hexToPhrase[hash] = matchingHash.ToArray();
99 if (!phraseToSentences.TryGetValue(key, out
int[] sentenceIds))
return;
100 int hash =
Get(key).GetHashCode();
103 phraseToSentences.Remove(key);
104 foreach (
int sentenceId
in sentenceIds)
106 search.Remove(sentenceId);
108 sentenceToPhrase.Remove(sentenceId);
112 foreach (var dataSplitPhrases
in dataSplitToPhrases.Values) dataSplitPhrases.Remove(key);
115 if (hexToPhrase.TryGetValue(hash, out
int[] phraseIds))
117 List<int> updatedIds = phraseIds.ToList();
118 updatedIds.Remove(key);
119 if (updatedIds.Count == 0) hexToPhrase.Remove(hash);
120 else hexToPhrase[hash] = updatedIds.ToArray();
130 public override int Remove(
string inputString,
string group =
"")
132 int hash = inputString.GetHashCode();
133 if (!hexToPhrase.TryGetValue(hash, out
int[] entries))
return 0;
134 List<int> removeIds =
new List<int>();
135 foreach (
int key
in entries)
137 if (dataSplitToPhrases[group].Contains(key) &&
Get(key) == inputString) removeIds.Add(key);
139 foreach (
int removeId
in removeIds)
Remove(removeId);
140 return removeIds.Count;
149 return phraseToSentences.Count;
157 public override int Count(
string group)
159 if (!dataSplitToPhrases.TryGetValue(group, out List<int> dataSplitPhrases))
return 0;
160 return dataSplitPhrases.Count;
171 return await search.IncrementalSearch(queryString, group);
193 return search.IncrementalFetchKeys(fetchKey, k);
197 List<int> phraseKeys =
new List<int>();
198 List<float> distancesList =
new List<float>();
204 float[] distancesIter;
205 (resultKeys, distancesIter, completed) = search.IncrementalFetchKeys(fetchKey, k);
206 for (
int i = 0; i < resultKeys.Length; i++)
208 int phraseId = sentenceToPhrase[resultKeys[i]];
209 if (phraseKeys.Contains(phraseId))
continue;
210 phraseKeys.Add(phraseId);
211 distancesList.Add(distancesIter[i]);
212 if (phraseKeys.Count() == k)
218 if (completed)
break;
222 return (phraseKeys.ToArray(), distancesList.ToArray(), completed);
244 string[] results =
new string[resultKeys.Length];
245 for (
int i = 0; i < resultKeys.Length; i++)
247 if (returnChunks) results[i] = search.Get(resultKeys[i]);
248 else results[i] =
Get(resultKeys[i]);
250 return (results, distances, completed);
259 search.IncrementalSearchComplete(fetchKey);
268 dataSplitToPhrases.Clear();
269 phraseToSentences.Clear();
270 sentenceToPhrase.Clear();
275 protected override void SaveInternal(ZipArchive archive)
277 ArchiveSaver.Save(archive, dataSplitToPhrases, GetSavePath(
"dataSplitToPhrases"));
278 ArchiveSaver.Save(archive, phraseToSentences, GetSavePath(
"phraseToSentences"));
279 ArchiveSaver.Save(archive, sentenceToPhrase, GetSavePath(
"sentenceToPhrase"));
280 ArchiveSaver.Save(archive, hexToPhrase, GetSavePath(
"hexToPhrase"));
281 ArchiveSaver.Save(archive, nextKey, GetSavePath(
"nextKey"));
284 protected override void LoadInternal(ZipArchive archive)
286 dataSplitToPhrases = ArchiveSaver.Load<Dictionary<string, List<int>>>(archive, GetSavePath(
"dataSplitToPhrases"));
287 phraseToSentences = ArchiveSaver.Load<Dictionary<int, int[]>>(archive, GetSavePath(
"phraseToSentences"));
288 sentenceToPhrase = ArchiveSaver.Load<Dictionary<int, int>>(archive, GetSavePath(
"sentenceToPhrase"));
289 hexToPhrase = ArchiveSaver.Load<Dictionary<int, int[]>>(archive, GetSavePath(
"hexToPhrase"));
290 nextKey = ArchiveSaver.Load<
int>(archive, GetSavePath(
"nextKey"));
Class implementing the chunking functionality.
override int Count(string group)
Returns a count of the phrases in a specific data group.
Task< List<(int, int)> > Split(string input)
Splits the provided phrase into chunks.
override void IncrementalSearchComplete(int fetchKey)
Completes the search and clears the cached results for an incremental search.
override async Task< int > Add(string inputString, string group="")
Adds a phrase to the search after splitting it into chunks.
override string Get(int key)
Retrieves the phrase with the specific id.
override void Remove(int key)
Removes a phrase and the phrase chunks from the search.
override ValueTuple< int[], float[], bool > IncrementalFetchKeys(int fetchKey, int k)
Retrieves the most similar search results in batches (incremental search). The phrase/chunk keys and ...
void ReturnChunks(bool returnChunks)
Set to true to return chunks or the direct input with the Search function.
override void Clear()
Clears the object and the associated search object.
override int Count()
Returns a count of the phrases.
override async Task< int > IncrementalSearch(string queryString, string group="")
Allows to do search and retrieve results in batches (incremental search).
override int Remove(string inputString, string group="")
Removes a phrase and the phrase chunks from the search.
override ValueTuple< string[], float[], bool > IncrementalFetch(int fetchKey, int k)
Retrieves the most similar search results in batches (incremental search). The phrases/chunks and the...
Class implementing the search plugin template used e.g. in chunking.