5#if !(TARGET_OS_IOS || TARGET_OS_VISION)
6X509_STORE *load_client_cert(
const std::string &cert_str)
8 BIO *mem = BIO_new_mem_buf(cert_str.data(), (
int)cert_str.size());
14 auto inf = PEM_X509_INFO_read_bio(mem,
nullptr,
nullptr,
nullptr);
20 auto cts = X509_STORE_new();
23 for (
auto i = 0; i < static_cast<int>(sk_X509_INFO_num(inf)); i++)
25 auto itmp = sk_X509_INFO_value(inf, i);
33 X509_STORE_add_cert(cts, itmp->x509);
37 X509_STORE_add_crl(cts, itmp->crl);
42 sk_X509_INFO_pop_free(inf, X509_INFO_free);
47struct IOSCallbackContext {
53static void ios_callback_with_context(
const char* data,
void* ctx) {
54 auto* context =
static_cast<IOSCallbackContext*
>(ctx);
56 if (!context || !context->concatenator || !context->cancel_flag) {
60 if (*context->cancel_flag) {
64 std::string chunk_str(data);
65 if (!context->concatenator->process_chunk(chunk_str)) {
66 *context->cancel_flag =
true;
71bool LLMClient::is_server_alive()
75 std::vector<std::pair<std::string, std::string>> headers;
76 if (!API_key.empty()) {
77 headers.push_back({
"Authorization",
"Bearer " + API_key});
80#if TARGET_OS_IOS || TARGET_OS_VISION
81 HttpResult result = transport->post_request(
"health",
"{}", headers);
82 return result.success && result.status_code >= 200 && result.status_code < 300;
84 httplib::Headers Headers;
85 for (
const auto& h : headers) Headers.insert(h);
86 auto res = use_ssl ? sslClient->Post(
"/health", Headers) : client->Post(
"/health", Headers);
87 return res && res->status >= 200 && res->status < 300;
91std::string LLMClient::post_request(
92 const std::string &path,
95 bool callbackWithJSON)
98 bool stream = callback !=
nullptr;
99 if (body.contains(
"stream"))
100 stream = body[
"stream"];
102 body[
"stream"] = stream;
104 bool* cancel_flag =
new bool(
false);
105 if (stream) active_requests.push_back(cancel_flag);
107 std::string response_buffer =
"";
109 if (stream && callback) concatenator.
set_callback(callback, callbackWithJSON);
111 std::vector<std::pair<std::string, std::string>> headers = {
112 {
"Content-Type",
"application/json"},
113 {
"Accept", stream ?
"text/event-stream" :
"application/json"},
114 {
"Cache-Control",
"no-cache"}
117 if (!API_key.empty()) {
118 headers.push_back({
"Authorization",
"Bearer " + API_key});
121#if TARGET_OS_IOS || TARGET_OS_VISION
123 IOSCallbackContext ios_context = {&concatenator, cancel_flag};
126 for (
int attempt = 0; attempt <= max_retries; attempt++) {
127 result = transport->post_request(
131 stream ? ios_callback_with_context : nullptr,
132 stream ? &ios_context : nullptr,
136 if (result.success || *cancel_flag)
break;
138 int delay_seconds = std::min(30, 1 << attempt);
139 std::cerr <<
"[LLMClient] POST failed: " << result.error_message
140 <<
", retrying in " << delay_seconds <<
"s (attempt "
141 << attempt <<
"/" << max_retries <<
")\n";
142 std::this_thread::sleep_for(std::chrono::seconds(delay_seconds));
145 if (!result.success) {
146 std::cerr <<
"[LLMClient] POST request failed: " << result.error_message <<
"\n";
148 active_requests.erase(std::remove(active_requests.begin(), active_requests.end(), cancel_flag), active_requests.end());
155 active_requests.erase(std::remove(active_requests.begin(), active_requests.end(), cancel_flag), active_requests.end());
163 httplib::Headers Headers;
164 for (
const auto& h : headers) Headers.insert(h);
166 httplib::Request req;
168 req.path =
"/" + path;
169 req.headers = Headers;
170 req.body = body.dump();
172 req.content_receiver = [&](
const char *data,
size_t data_length, uint64_t , uint64_t )
174 std::string chunk_str(data, data_length);
182 std::cerr <<
"[LLMClient] Streaming cancelled\n";
188 response_buffer += chunk_str;
193 const int max_delay = 30;
195 for (
int attempt = 0; attempt <= max_retries; attempt++)
197 request_sent = use_ssl ? sslClient->send(req) : client->send(req);
198 if (request_sent || *cancel_flag)
break;
200 int delay_seconds = std::min(max_delay, 1 << attempt);
201 std::cerr <<
"[LLMClient] POST failed, retrying in " << delay_seconds
202 <<
"s (attempt " << attempt <<
"/" << max_retries <<
")\n";
203 std::this_thread::sleep_for(std::chrono::seconds(delay_seconds));
208 std::cerr <<
"[LLMClient] POST request failed after retries\n";
212 if (stream) active_requests.erase(std::remove(active_requests.begin(), active_requests.end(), cancel_flag), active_requests.end());
218 return response_buffer;
229LLMClient::LLMClient(
const std::string &url_,
const int port_,
const std::string &API_key_,
const int max_retries_) : url(url_), port(port_), API_key(API_key_), max_retries(max_retries_)
232 if (url.rfind(
"https://", 0) == 0)
234 host = url.substr(8);
239 host = url.rfind(
"http://", 0) == 0 ? url.substr(7) : url;
243#if TARGET_OS_IOS || TARGET_OS_VISION
245 transport->set_timeout(60.0);
249 sslClient =
new httplib::SSLClient(host.c_str(), port);
253 client =
new httplib::Client(host.c_str(), port);
260#if TARGET_OS_IOS || TARGET_OS_VISION
261 if (transport !=
nullptr) {
265 if (client !=
nullptr)
267 if (sslClient !=
nullptr)
274#if !(TARGET_OS_IOS || TARGET_OS_VISION)
277 this->SSL_cert = SSL_cert_;
278 if (sslClient !=
nullptr)
279 sslClient->set_ca_cert_store(load_client_cert(SSL_cert));
288 return post_request(
"tokenize", data);
300 return post_request(
"detokenize", data);
312 return post_request(
"embeddings", data);
324 json data_remote = data;
325 if (data.contains(
"id_slot") && data[
"id_slot"] != -1)
327 std::cerr <<
"Remote clients can only use id_slot -1" << std::endl;
328 data_remote[
"id_slot"] = -1;
330 return post_request(
"completion", data_remote, callback, callbackWithJSON);
349 return post_request(
"apply-template", data);
361 std::cerr <<
"Slot operations are not supported in remote clients" << std::endl;
374 for (
bool* flag : active_requests) *flag =
true;
384bool LLMClient_Is_Server_Alive(
LLMClient *llm)
386 return llm->is_server_alive();
401 return new LLMClient(url, port, API_key);
Client interface for local and remote LLM access.
Client for accessing LLM functionality locally or remotely.
int get_next_available_slot() override
Get available processing slot (override)
void set_SSL(const char *SSL_cert)
Configure SSL certificate for remote connections.
std::string slot_json(const json &data) override
Manage slots with HTTP response support.
LLMClient(LLMProvider *llm)
Constructor for local LLM access.
std::string apply_template_json(const json &data) override
Apply a chat template to message data.
bool is_remote() const
Check if this is a remote client.
std::string detokenize_json(const json &data) override
Convert tokens back to text.
std::string completion_json(const json &data, CharArrayFn callback=nullptr, bool callbackWithJSON=true) override
Generate text completion (override)
std::string embeddings_json(const json &data) override
Generate embeddings with HTTP response support.
std::string tokenize_json(const json &data) override
Tokenize input (override)
void cancel(int id_slot) override
Cancel running request (override)
virtual std::string slot_json(const json &data)=0
Manage slots with HTTP response support.
virtual int get_next_available_slot()=0
Get an available processing slot.
virtual void cancel(int id_slot)=0
Cancel request.
Abstract class for LLM service providers.
virtual std::string embeddings_json(const json &data)=0
Generate embeddings with HTTP response support.
virtual std::string apply_template_json(const json &data)=0
Apply a chat template to message data.
virtual std::string tokenize_json(const json &data)=0
Tokenize input (override)
virtual std::string completion_json(const json &data, CharArrayFn callback, bool callbackWithJSON)=0
Generate text completion.
virtual std::string detokenize_json(const json &data)=0
Convert tokens back to text.
Handles concatenation of LLM response chunks (both streaming and non-streaming) Accumulates content a...
bool process_chunk(const std::string &chunk_data)
Process a single chunk and accumulate its content/tokens.
std::string get_result_json() const
Get the complete result as JSON string.
void set_callback(CharArrayFn callback, bool callWithJSON=false)
Set a callback to be invoked after each chunk is processed.
LLMClient * LLMClient_Construct(LLMProvider *llm)
Construct local LLMClient (C API)
void LLMClient_Set_SSL(LLMClient *llm, const char *SSL_cert)
Set SSL certificate (C API)
LLMClient * LLMClient_Construct_Remote(const char *url, const int port, const char *API_key="")
Construct remote LLMClient (C API)