LlamaLib  v2.0.2
Cross-platform library for local LLMs
Loading...
Searching...
No Matches
LLM_service_builder.h
1
2#ifdef USE_RUNTIME_DETECTION
3#include "LLM_runtime.h"
4#else
5#include "LLM_service.h"
6#endif
7
8class UNDREAMAI_API LLMServiceBuilder {
9private:
10 std::string model_path_;
11 int num_slots_ = 1;
12 int num_threads_ = -1;
13 int num_GPU_layers_ = 0;
14 bool flash_attention_ = false;
15 int context_size_ = 4096;
16 int batch_size_ = 2048;
17 bool embedding_only_ = false;
18 std::vector<std::string> lora_paths_ = {};
19
20public:
21 LLMServiceBuilder& model(const std::string& path) {
22 model_path_ = path;
23 return *this;
24 }
25
26 LLMServiceBuilder& numSlots(int val) {
27 num_slots_ = val;
28 return *this;
29 }
30
31 LLMServiceBuilder& numThreads(int val) {
32 num_threads_ = val;
33 return *this;
34 }
35
36 LLMServiceBuilder& numGPULayers(int val) {
37 num_GPU_layers_ = val;
38 return *this;
39 }
40
41 LLMServiceBuilder& flashAttention(bool val) {
42 flash_attention_ = val;
43 return *this;
44 }
45
46 LLMServiceBuilder& contextSize(int val) {
47 context_size_ = val;
48 return *this;
49 }
50
51 LLMServiceBuilder& batchSize(int val) {
52 batch_size_ = val;
53 return *this;
54 }
55
56 LLMServiceBuilder& embeddingOnly(bool val) {
57 embedding_only_ = val;
58 return *this;
59 }
60
61 LLMServiceBuilder& loraPaths(const std::vector<std::string>& paths) {
62 lora_paths_ = paths;
63 return *this;
64 }
65
66 LLMService* build() {
67 LLMService* service = new LLMService(
68 model_path_,
69 num_slots_,
70 num_threads_,
71 num_GPU_layers_,
72 flash_attention_,
73 context_size_,
74 batch_size_,
75 embedding_only_,
76 lora_paths_
77 );
78 return service;
79 }
80};
Runtime loading and management of LLM libraries.
LLM service implementation with server capabilities.
< LLM service implementation
Runtime loader for LLM libraries.
Definition LLM_runtime.h:63