56 private const uint GGUF_MAGIC = 0x46554747;
57 private const int GGUF_VERSION = 3;
58 private readonly List<int> READER_SUPPORTED_VERSIONS =
new List<int> { 2, GGUF_VERSION };
59 private Dictionary<GGUFValueType, Type> gguf_scalar_to_np =
new Dictionary<GGUFValueType, Type>
61 { GGUFValueType.UINT8, typeof(
byte) },
62 { GGUFValueType.INT8, typeof(sbyte) },
63 { GGUFValueType.UINT16, typeof(ushort) },
64 { GGUFValueType.INT16, typeof(
short) },
65 { GGUFValueType.UINT32, typeof(uint) },
66 { GGUFValueType.INT32, typeof(
int) },
67 { GGUFValueType.FLOAT32, typeof(
float) },
68 { GGUFValueType.UINT64, typeof(ulong) },
69 { GGUFValueType.INT64, typeof(
long) },
70 { GGUFValueType.FLOAT64, typeof(
double) },
71 { GGUFValueType.BOOL, typeof(
bool) }
75 private FileStream data;
77 public Dictionary<string, ReaderField>
fields =
new Dictionary<string, ReaderField>();
86 data =
new FileStream(path, FileMode.Open, FileAccess.Read);
89 if (BitConverter.ToUInt32(ReadBytes(offs, 4), 0) != GGUF_MAGIC)
90 throw new ArgumentException(
"GGUF magic invalid");
93 uint temp_version = BitConverter.ToUInt32(ReadBytes(offs, 4));
94 if ((temp_version & 65535) == 0)
96 byte[] tempBytes = ReadBytes(offs, 4);
97 Array.Reverse(tempBytes);
98 temp_version = BitConverter.ToUInt32(tempBytes, 0);
100 uint version = temp_version;
102 if (!READER_SUPPORTED_VERSIONS.Contains((
int)version))
103 throw new ArgumentException($
"Sorry, file appears to be version {version} which we cannot handle");
105 offs += PushField(
new ReaderField { offset = offs, name =
"GGUF.version", parts =
new List<Array> {
new uint[] { temp_version } }, data =
new List<int> { 0 }, types =
new List<GGUFValueType> { GGUFValueType.UINT32 } });
106 ulong[] temp_counts =
new ulong[2];
107 Buffer.BlockCopy(ReadBytes(offs, 16), 0, temp_counts, 0, 16);
108 offs += PushField(
new ReaderField { offset = offs, name =
"GGUF.tensor_count", parts =
new List<Array> {
new ulong[] { temp_counts[0] } }, data =
new List<int> { 0 }, types =
new List<GGUFValueType> { GGUFValueType.UINT64 } });
109 offs += PushField(
new ReaderField { offset = offs, name =
"GGUF.kv_count", parts =
new List<Array> {
new ulong[] { temp_counts[1] } }, data =
new List<int> { 0 }, types =
new List<GGUFValueType> { GGUFValueType.UINT64 } });
110 ulong tensor_count = temp_counts[0];
111 ulong kv_count = temp_counts[1];
112 offs = BuildFields(offs, (
int)kv_count);
123 if (
fields.TryGetValue(key, out ReaderField value))
128 public byte[] GetGenericField(
string key)
131 if (field ==
null || field.parts.Count == 0)
return null;
132 return (
byte[])field.parts[field.parts.Count - 1];
142 byte[] value = GetGenericField(key);
143 if (value ==
null)
return null;
144 return System.Text.Encoding.UTF8.GetString(value);
154 byte[] value = GetGenericField(key);
155 if (value ==
null)
return -1;
156 return BitConverter.ToInt32(value, 0);
159 private byte[] ReadBytes(
int offset,
int count)
161 byte[] buffer =
new byte[count];
162 data.Seek(offset, SeekOrigin.Begin);
163 data.Read(buffer, 0, count);
167 private int PushField(ReaderField field,
bool skip_sum =
false)
169 if (
fields.ContainsKey(field.name))
170 throw new ArgumentException($
"Duplicate {field.name} already in list at offset {field.offset}");
171 fields[field.name] = field;
175 for (
int i = 0; i < field.parts.Count; i++)
177 Type partType = gguf_scalar_to_np[field.types[i]];
178 sum += Marshal.SizeOf(partType) * field.parts[i].Length;
183 private (ulong[],
byte[]) GetStr(
int offset)
185 ulong slen = BitConverter.ToUInt64(ReadBytes(offset, 8));
186 byte[] sdata = ReadBytes(offset + 8, (
int)slen);
187 return (
new ulong[] { slen }, sdata);
190 private (int, List<Array>, List<int>, List<GGUFValueType>) GetFieldParts(
int orig_offs,
int raw_type)
192 int offs = orig_offs;
193 List<GGUFValueType> types =
new List<GGUFValueType>();
194 types.Add((GGUFValueType)raw_type);
196 if ((GGUFValueType)raw_type == GGUFValueType.STRING)
198 (ulong[] slen,
byte[] sdata) = GetStr(offs);
199 List<Array> sparts =
new List<Array> { slen, sdata };
200 int size = slen.Length *
sizeof(ulong) + sdata.Length;
201 return (size, sparts,
new List<int> { 1 }, types);
205 if (gguf_scalar_to_np.TryGetValue((GGUFValueType)raw_type, out Type nptype))
207 Array val = ReadBytes(offs, Marshal.SizeOf(nptype));
208 int size = nptype == typeof(
bool) ? 1 : Marshal.SizeOf(nptype);
209 return (size,
new List<Array> { val },
new List<int> { 0 }, types);
213 if ((GGUFValueType)raw_type == GGUFValueType.ARRAY)
215 int raw_itype = BitConverter.ToInt32(ReadBytes(offs, 4));
216 offs += Marshal.SizeOf(typeof(
int));
218 ulong alen = BitConverter.ToUInt64(ReadBytes(offs, 8));
219 offs += Marshal.SizeOf(typeof(ulong));
221 List<Array> aparts =
new List<Array> { BitConverter.GetBytes(raw_itype), BitConverter.GetBytes(alen) };
222 List<int> data_idxs =
new List<int>();
224 for (
int idx = 0; idx < (int)alen; idx++)
226 (
int curr_size, List<Array> curr_parts, List<int> curr_idxs, List<GGUFValueType> curr_types) = GetFieldParts(offs, raw_itype);
228 types.AddRange(curr_types);
230 int idxs_offs = aparts.Count;
231 aparts.AddRange(curr_parts);
232 data_idxs.AddRange(
new List<int>(curr_idxs.ConvertAll(i => i + idxs_offs)));
235 return (offs - orig_offs, aparts, data_idxs, types);
238 throw new ArgumentException($
"Unknown/unhandled field type {(GGUFValueType)raw_type}");
241 private int BuildFields(
int offs,
int count)
243 for (
int i = 0; i < count; i++)
245 int orig_offs = offs;
246 (ulong[] kv_klen,
byte[] kv_kdata) = GetStr(offs);
247 offs += Marshal.SizeOf(typeof(ulong)) + kv_kdata.Length;
249 int raw_kv_type = BitConverter.ToInt32(ReadBytes(offs, 4));
250 offs += Marshal.SizeOf(typeof(
int));
251 List<Array> parts =
new List<Array> { kv_klen, kv_kdata, BitConverter.GetBytes(raw_kv_type) };
252 List<int> idxs_offs =
new List<int> { parts.Count };
254 (
int field_size, List<Array> field_parts, List<int> field_idxs, List<GGUFValueType> field_types) = GetFieldParts(offs, raw_kv_type);
255 if (field_size == -1)
258 parts.AddRange(field_parts);
259 ReaderField readerField =
new ReaderField
262 name = System.Text.Encoding.UTF8.GetString(kv_kdata),
264 data =
new List<int>(field_idxs.ConvertAll(idx => idx + idxs_offs[0])),
267 PushField(readerField, skip_sum: true);