DevHunterAI commited on
Commit
6a90c85
·
verified ·
1 Parent(s): 1335cf8

Upload ministral_3b_hmc_server.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. ministral_3b_hmc_server.py +566 -0
ministral_3b_hmc_server.py ADDED
@@ -0,0 +1,566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import ast
3
+ import json
4
+ import operator
5
+ import re
6
+ import threading
7
+ from http import HTTPStatus
8
+ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
9
+ from urllib.parse import urlparse
10
+
11
+ from ministral_3b_hmc_chat import (
12
+ DEFAULT_ADAPTER_DIR,
13
+ DEFAULT_MODEL_ID,
14
+ SYSTEM_PROMPT,
15
+ build_prompt,
16
+ generate_reply,
17
+ load_model,
18
+ )
19
+
20
+ SERVER_VERSION = "ministral-hmc-server-2026-03-22-v1"
21
+ MATH_SYSTEM_PROMPT = "You are RubiNet. Solve math problems carefully and step by step. Verify arithmetic before answering. Keep the reasoning concise but clear, and end with 'Final answer: ...'."
22
+ MATH_KEYWORDS = (
23
+ "calculate",
24
+ "compute",
25
+ "evaluate",
26
+ "solve",
27
+ "equation",
28
+ "math",
29
+ "algebra",
30
+ "geometry",
31
+ "probability",
32
+ "percentage",
33
+ "percent",
34
+ "sum",
35
+ "product",
36
+ "difference",
37
+ "quotient",
38
+ )
39
+ ALLOWED_CALC_NODES = {
40
+ ast.Expression,
41
+ ast.BinOp,
42
+ ast.UnaryOp,
43
+ ast.Constant,
44
+ ast.Add,
45
+ ast.Sub,
46
+ ast.Mult,
47
+ ast.Div,
48
+ ast.FloorDiv,
49
+ ast.Mod,
50
+ ast.Pow,
51
+ ast.USub,
52
+ ast.UAdd,
53
+ }
54
+ CALC_BIN_OPS = {
55
+ ast.Add: operator.add,
56
+ ast.Sub: operator.sub,
57
+ ast.Mult: operator.mul,
58
+ ast.Div: operator.truediv,
59
+ ast.FloorDiv: operator.floordiv,
60
+ ast.Mod: operator.mod,
61
+ ast.Pow: operator.pow,
62
+ }
63
+ CALC_UNARY_OPS = {
64
+ ast.UAdd: operator.pos,
65
+ ast.USub: operator.neg,
66
+ }
67
+
68
+ HTML_PAGE = """<!doctype html>
69
+ <html lang="en">
70
+ <head>
71
+ <meta charset="utf-8">
72
+ <meta name="viewport" content="width=device-width, initial-scale=1">
73
+ <title>RubiNet Chat</title>
74
+ <style>
75
+ body { font-family: Arial, sans-serif; margin: 0; background: #111827; color: #f3f4f6; }
76
+ .wrap { max-width: 960px; margin: 0 auto; padding: 24px; }
77
+ .card { background: #1f2937; border-radius: 16px; padding: 20px; box-shadow: 0 10px 30px rgba(0,0,0,.25); }
78
+ h1 { margin-top: 0; font-size: 28px; }
79
+ .meta { color: #9ca3af; margin-bottom: 10px; }
80
+ #chat { min-height: 360px; max-height: 60vh; overflow-y: auto; padding: 12px; background: #0f172a; border-radius: 12px; margin-bottom: 16px; }
81
+ .msg { padding: 12px 14px; border-radius: 12px; margin-bottom: 12px; white-space: pre-wrap; word-break: break-word; }
82
+ .user { background: #2563eb; }
83
+ .bot { background: #374151; }
84
+ form { display: flex; gap: 12px; align-items: stretch; }
85
+ textarea { flex: 1; min-height: 96px; max-height: 240px; resize: vertical; border-radius: 12px; border: none; padding: 12px; font: inherit; }
86
+ button { border: none; border-radius: 12px; padding: 0 20px; background: #10b981; color: white; font-weight: 700; cursor: pointer; }
87
+ button:disabled { background: #6b7280; cursor: wait; }
88
+ .status { margin-top: 12px; color: #93c5fd; min-height: 24px; }
89
+ .controls { display: flex; gap: 12px; align-items: center; flex-wrap: wrap; margin-bottom: 16px; }
90
+ .controls select { border-radius: 12px; border: none; padding: 12px 14px; font: inherit; background: #e5e7eb; color: #111827; }
91
+ .secondary { background: #7c3aed; }
92
+ .danger { background: #dc2626; }
93
+ </style>
94
+ </head>
95
+ <body>
96
+ <div class="wrap">
97
+ <div class="card">
98
+ <h1>RubiNet Local Chat</h1>
99
+ <div class="meta">Version: <code>__VERSION__</code></div>
100
+ <div class="meta">Model: <code>__MODEL__</code></div>
101
+ <div class="meta">Adapter: <code>__ADAPTER__</code></div>
102
+ <div class="controls">
103
+ <select id="voice-gender">
104
+ <option value="female">Female voice</option>
105
+ <option value="male">Male voice</option>
106
+ </select>
107
+ <select id="speech-language">
108
+ <option value="tr-TR">Turkish speech</option>
109
+ <option value="en-US">English speech</option>
110
+ </select>
111
+ <button id="activate-voice" class="secondary" type="button">Activate voice</button>
112
+ <button id="stop-voice" class="danger" type="button">Stop voice</button>
113
+ </div>
114
+ <div id="chat"></div>
115
+ <form id="chat-form">
116
+ <textarea id="message" placeholder="Type your message..."></textarea>
117
+ <button id="send" type="submit">Send</button>
118
+ </form>
119
+ <div class="status" id="status"></div>
120
+ </div>
121
+ </div>
122
+ <script>
123
+ const form = document.getElementById('chat-form');
124
+ const messageEl = document.getElementById('message');
125
+ const chatEl = document.getElementById('chat');
126
+ const statusEl = document.getElementById('status');
127
+ const sendEl = document.getElementById('send');
128
+ const voiceGenderEl = document.getElementById('voice-gender');
129
+ const speechLanguageEl = document.getElementById('speech-language');
130
+ const activateVoiceEl = document.getElementById('activate-voice');
131
+ const stopVoiceEl = document.getElementById('stop-voice');
132
+ const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
133
+ const speechSupported = !!SpeechRecognition && 'speechSynthesis' in window;
134
+ let recognition = null;
135
+ let listening = false;
136
+ let voices = [];
137
+ let restartingRecognition = false;
138
+ let audioStream = null;
139
+
140
+ function addMessage(role, text) {
141
+ const div = document.createElement('div');
142
+ div.className = `msg ${role}`;
143
+ div.textContent = text;
144
+ chatEl.appendChild(div);
145
+ chatEl.scrollTop = chatEl.scrollHeight;
146
+ }
147
+
148
+ function updateRecognitionLanguage() {
149
+ if (recognition) {
150
+ recognition.lang = speechLanguageEl.value || 'en-US';
151
+ }
152
+ }
153
+
154
+ async function ensureMicrophoneAccess() {
155
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
156
+ throw new Error('This browser does not support microphone access.');
157
+ }
158
+ if (audioStream) {
159
+ return audioStream;
160
+ }
161
+ audioStream = await navigator.mediaDevices.getUserMedia({
162
+ audio: {
163
+ echoCancellation: true,
164
+ noiseSuppression: true,
165
+ autoGainControl: true,
166
+ }
167
+ });
168
+ return audioStream;
169
+ }
170
+
171
+ async function startListening() {
172
+ if (!recognition) return;
173
+ try {
174
+ await ensureMicrophoneAccess();
175
+ updateRecognitionLanguage();
176
+ recognition.start();
177
+ listening = true;
178
+ restartingRecognition = false;
179
+ statusEl.textContent = 'Listening...';
180
+ activateVoiceEl.textContent = 'Listening';
181
+ } catch (error) {
182
+ statusEl.textContent = `Voice activation failed: ${error.message || error}`;
183
+ }
184
+ }
185
+
186
+ function loadVoices() {
187
+ voices = window.speechSynthesis ? window.speechSynthesis.getVoices() : [];
188
+ }
189
+
190
+ function pickVoice(gender) {
191
+ const normalizedGender = gender === 'male' ? 'male' : 'female';
192
+ const preferred = normalizedGender === 'male'
193
+ ? ['male', 'david', 'mark', 'guy', 'james', 'richard', 'george', 'microsoft david', 'microsoft mark']
194
+ : ['female', 'zira', 'hazel', 'aria', 'jenny', 'susan', 'sara', 'microsoft zira', 'microsoft aria'];
195
+ const lowered = voices.map((voice) => ({ voice, name: `${voice.name} ${voice.voiceURI}`.toLowerCase() }));
196
+ for (const token of preferred) {
197
+ const match = lowered.find((item) => item.name.includes(token));
198
+ if (match) return match.voice;
199
+ }
200
+ const trMatch = voices.find((voice) => /tr|turkish/i.test(`${voice.lang} ${voice.name}`));
201
+ return trMatch || voices[0] || null;
202
+ }
203
+
204
+ function speakReply(text) {
205
+ if (!window.speechSynthesis || !text) return;
206
+ window.speechSynthesis.cancel();
207
+ const utterance = new SpeechSynthesisUtterance(text);
208
+ utterance.lang = speechLanguageEl.value || 'en-US';
209
+ utterance.rate = 1;
210
+ utterance.pitch = voiceGenderEl.value === 'male' ? 0.85 : 1.1;
211
+ const voice = pickVoice(voiceGenderEl.value);
212
+ if (voice) {
213
+ utterance.voice = voice;
214
+ utterance.lang = voice.lang || utterance.lang;
215
+ }
216
+ window.speechSynthesis.speak(utterance);
217
+ }
218
+
219
+ async function sendMessage(message, shouldSpeak = false) {
220
+ addMessage('user', message);
221
+ statusEl.textContent = 'Generating reply...';
222
+ sendEl.disabled = true;
223
+ activateVoiceEl.disabled = true;
224
+ const controller = new AbortController();
225
+ const timeoutId = setTimeout(() => controller.abort(), 90000);
226
+ try {
227
+ const response = await fetch('/chat', {
228
+ method: 'POST',
229
+ headers: { 'Content-Type': 'application/json' },
230
+ body: JSON.stringify({ message }),
231
+ signal: controller.signal
232
+ });
233
+ const data = await response.json();
234
+ if (!response.ok) {
235
+ addMessage('bot', data.error || 'Unknown error');
236
+ return;
237
+ }
238
+ addMessage('bot', data.reply);
239
+ if (shouldSpeak) {
240
+ speakReply(data.reply);
241
+ }
242
+ } catch (error) {
243
+ if (error && error.name === 'AbortError') {
244
+ addMessage('bot', 'The request timed out. Try a shorter message.');
245
+ } else {
246
+ addMessage('bot', `Request failed: ${error}`);
247
+ }
248
+ } finally {
249
+ clearTimeout(timeoutId);
250
+ statusEl.textContent = listening ? 'Listening...' : '';
251
+ sendEl.disabled = false;
252
+ activateVoiceEl.disabled = !speechSupported;
253
+ messageEl.focus();
254
+ }
255
+ }
256
+
257
+ form.addEventListener('submit', async (event) => {
258
+ event.preventDefault();
259
+ const message = messageEl.value.trim();
260
+ if (!message) return;
261
+ messageEl.value = '';
262
+ await sendMessage(message, false);
263
+ });
264
+
265
+ if (speechSupported) {
266
+ loadVoices();
267
+ window.speechSynthesis.onvoiceschanged = loadVoices;
268
+ recognition = new SpeechRecognition();
269
+ recognition.lang = speechLanguageEl.value || 'en-US';
270
+ recognition.continuous = false;
271
+ recognition.interimResults = false;
272
+ recognition.maxAlternatives = 1;
273
+
274
+ recognition.onstart = () => {
275
+ restartingRecognition = false;
276
+ statusEl.textContent = 'Listening...';
277
+ };
278
+
279
+ recognition.onresult = async (event) => {
280
+ const result = event.results[event.results.length - 1];
281
+ if (!result || !result.isFinal) return;
282
+ const transcript = result[0].transcript.trim();
283
+ if (!transcript) return;
284
+ await sendMessage(transcript, true);
285
+ };
286
+
287
+ recognition.onend = () => {
288
+ if (listening && !restartingRecognition) {
289
+ restartingRecognition = true;
290
+ setTimeout(() => {
291
+ if (!listening) {
292
+ restartingRecognition = false;
293
+ return;
294
+ }
295
+ startListening();
296
+ }, 350);
297
+ }
298
+ };
299
+
300
+ recognition.onerror = (event) => {
301
+ if (event.error === 'no-speech') {
302
+ statusEl.textContent = 'No speech detected. Keep speaking closer to the microphone...';
303
+ if (listening && !restartingRecognition) {
304
+ restartingRecognition = true;
305
+ try {
306
+ recognition.stop();
307
+ } catch (error) {
308
+ }
309
+ }
310
+ return;
311
+ }
312
+ if (event.error === 'not-allowed' || event.error === 'service-not-allowed') {
313
+ listening = false;
314
+ activateVoiceEl.textContent = 'Activate voice';
315
+ statusEl.textContent = 'Microphone permission was denied. Please allow microphone access in your browser.';
316
+ return;
317
+ }
318
+ if (event.error === 'audio-capture') {
319
+ listening = false;
320
+ activateVoiceEl.textContent = 'Activate voice';
321
+ statusEl.textContent = 'No microphone was found, or another application is using it.';
322
+ return;
323
+ }
324
+ statusEl.textContent = `Voice listening error: ${event.error}`;
325
+ };
326
+
327
+ speechLanguageEl.addEventListener('change', () => {
328
+ updateRecognitionLanguage();
329
+ if (listening) {
330
+ statusEl.textContent = 'Listening...';
331
+ }
332
+ });
333
+
334
+ activateVoiceEl.addEventListener('click', async () => {
335
+ if (listening) return;
336
+ await startListening();
337
+ });
338
+
339
+ stopVoiceEl.addEventListener('click', () => {
340
+ listening = false;
341
+ restartingRecognition = false;
342
+ activateVoiceEl.textContent = 'Activate voice';
343
+ statusEl.textContent = '';
344
+ window.speechSynthesis.cancel();
345
+ if (recognition) {
346
+ recognition.stop();
347
+ }
348
+ if (audioStream) {
349
+ for (const track of audioStream.getTracks()) {
350
+ track.stop();
351
+ }
352
+ audioStream = null;
353
+ }
354
+ });
355
+ } else {
356
+ activateVoiceEl.disabled = true;
357
+ stopVoiceEl.disabled = true;
358
+ voiceGenderEl.disabled = true;
359
+ speechLanguageEl.disabled = true;
360
+ statusEl.textContent = 'This browser does not support speech recognition or speech synthesis.';
361
+ }
362
+ </script>
363
+ </body>
364
+ </html>
365
+ """
366
+
367
+
368
+ def looks_like_math_query(message: str) -> bool:
369
+ normalized = message.strip().lower()
370
+ if not normalized:
371
+ return False
372
+ if re.search(r"\d", normalized) and re.search(r"[+\-*/=^×÷%]", normalized):
373
+ return True
374
+ return any(keyword in normalized for keyword in MATH_KEYWORDS)
375
+
376
+
377
+ def extract_simple_expression(message: str) -> tuple[str, str] | None:
378
+ normalized = message.strip()
379
+ normalized = re.sub(r"(?i)\bwhat is\b", "", normalized)
380
+ normalized = re.sub(r"(?i)\bcalculate\b", "", normalized)
381
+ normalized = re.sub(r"(?i)\bcompute\b", "", normalized)
382
+ normalized = re.sub(r"(?i)\bevaluate\b", "", normalized)
383
+ normalized = re.sub(r"(?i)\bsolve\b", "", normalized)
384
+ normalized = normalized.replace("×", "*").replace("÷", "/").replace("^", "**")
385
+ normalized = normalized.replace("=?", "").replace("= ?", "").replace("=", "")
386
+ normalized = normalized.replace("?", "").strip()
387
+ if not normalized:
388
+ return None
389
+ if not re.fullmatch(r"[0-9\s\.+\-*/()%]*", normalized):
390
+ return None
391
+ if not re.search(r"\d", normalized) or not re.search(r"[+\-*/%()]", normalized):
392
+ return None
393
+ compact = re.sub(r"\s+", "", normalized)
394
+ return normalized, compact
395
+
396
+
397
+ def _eval_calc_node(node):
398
+ if type(node) not in ALLOWED_CALC_NODES:
399
+ raise ValueError("Unsupported expression.")
400
+ if isinstance(node, ast.Expression):
401
+ return _eval_calc_node(node.body)
402
+ if isinstance(node, ast.Constant):
403
+ if not isinstance(node.value, (int, float)):
404
+ raise ValueError("Unsupported constant.")
405
+ return float(node.value)
406
+ if isinstance(node, ast.UnaryOp):
407
+ op_type = type(node.op)
408
+ if op_type not in CALC_UNARY_OPS:
409
+ raise ValueError("Unsupported unary operator.")
410
+ return CALC_UNARY_OPS[op_type](_eval_calc_node(node.operand))
411
+ if isinstance(node, ast.BinOp):
412
+ op_type = type(node.op)
413
+ if op_type not in CALC_BIN_OPS:
414
+ raise ValueError("Unsupported binary operator.")
415
+ left = _eval_calc_node(node.left)
416
+ right = _eval_calc_node(node.right)
417
+ return CALC_BIN_OPS[op_type](left, right)
418
+ raise ValueError("Unsupported expression.")
419
+
420
+
421
+ def evaluate_simple_expression(expression: str) -> str:
422
+ parsed = ast.parse(expression, mode="eval")
423
+ value = _eval_calc_node(parsed)
424
+ if isinstance(value, float) and value.is_integer():
425
+ return str(int(value))
426
+ return f"{value:.12g}"
427
+
428
+
429
+ class MinistralHMCService:
430
+ def __init__(self, model_id: str, adapter_dir: str, system_prompt: str, max_new_tokens: int, temperature: float, top_p: float, use_4bit: bool, cpu_dtype: str, offload_folder: str):
431
+ self.model_id = model_id
432
+ self.adapter_dir = adapter_dir
433
+ self.system_prompt = system_prompt
434
+ self.max_new_tokens = max_new_tokens
435
+ self.temperature = temperature
436
+ self.top_p = top_p
437
+ self.use_4bit = use_4bit
438
+ self.cpu_dtype = cpu_dtype
439
+ self.offload_folder = offload_folder
440
+ self.tokenizer = None
441
+ self.model = None
442
+ self._generation_lock = threading.Lock()
443
+
444
+ def load(self):
445
+ self.tokenizer, self.model = load_model(
446
+ self.model_id,
447
+ self.adapter_dir,
448
+ self.use_4bit,
449
+ self.cpu_dtype,
450
+ self.offload_folder,
451
+ )
452
+
453
+ def reply(self, message: str) -> str:
454
+ with self._generation_lock:
455
+ simple_expression = extract_simple_expression(message)
456
+ if simple_expression is not None:
457
+ original_expression, compact_expression = simple_expression
458
+ exact_answer = evaluate_simple_expression(compact_expression)
459
+ return f"Expression: {original_expression}\nVerified result: {exact_answer}\nFinal answer: {exact_answer}"
460
+ system_prompt = MATH_SYSTEM_PROMPT if looks_like_math_query(message) else self.system_prompt
461
+ prompt = build_prompt(message, system_prompt)
462
+ return generate_reply(
463
+ self.tokenizer,
464
+ self.model,
465
+ prompt,
466
+ self.max_new_tokens,
467
+ self.temperature,
468
+ self.top_p,
469
+ )
470
+
471
+
472
+ class ChatHandler(BaseHTTPRequestHandler):
473
+ service = None
474
+
475
+ def _send_json(self, payload, status=HTTPStatus.OK):
476
+ body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
477
+ self.send_response(status)
478
+ self.send_header("Content-Type", "application/json; charset=utf-8")
479
+ self.send_header("Content-Length", str(len(body)))
480
+ self.end_headers()
481
+ self.wfile.write(body)
482
+
483
+ def _send_html(self, html: str):
484
+ body = html.encode("utf-8")
485
+ self.send_response(HTTPStatus.OK)
486
+ self.send_header("Content-Type", "text/html; charset=utf-8")
487
+ self.send_header("Content-Length", str(len(body)))
488
+ self.end_headers()
489
+ self.wfile.write(body)
490
+
491
+ def do_GET(self):
492
+ path = urlparse(self.path).path
493
+ if path == "/health":
494
+ self._send_json({"status": "ok", "model": self.service.model_id, "adapter": self.service.adapter_dir, "version": SERVER_VERSION})
495
+ return
496
+ if path != "/":
497
+ self.send_error(HTTPStatus.NOT_FOUND)
498
+ return
499
+ page = HTML_PAGE.replace("__VERSION__", SERVER_VERSION)
500
+ page = page.replace("__MODEL__", self.service.model_id)
501
+ page = page.replace("__ADAPTER__", self.service.adapter_dir)
502
+ self._send_html(page)
503
+
504
+ def do_POST(self):
505
+ if urlparse(self.path).path != "/chat":
506
+ self.send_error(HTTPStatus.NOT_FOUND)
507
+ return
508
+ try:
509
+ content_length = int(self.headers.get("Content-Length", "0"))
510
+ body = self.rfile.read(content_length)
511
+ data = json.loads(body.decode("utf-8"))
512
+ message = str(data.get("message", "")).strip()
513
+ if not message:
514
+ self._send_json({"error": "Message cannot be empty."}, status=HTTPStatus.BAD_REQUEST)
515
+ return
516
+ reply = self.service.reply(message)
517
+ self._send_json({"reply": reply})
518
+ except Exception as exc:
519
+ self._send_json({"error": str(exc)}, status=HTTPStatus.INTERNAL_SERVER_ERROR)
520
+
521
+ def log_message(self, format, *args):
522
+ return
523
+
524
+
525
+ def main():
526
+ parser = argparse.ArgumentParser(description="Serve Ministral 3B HMC on a local web server")
527
+ parser.add_argument("--host", default="127.0.0.1")
528
+ parser.add_argument("--port", type=int, default=8036)
529
+ parser.add_argument("--model-id", default=DEFAULT_MODEL_ID)
530
+ parser.add_argument("--adapter-dir", default=DEFAULT_ADAPTER_DIR)
531
+ parser.add_argument("--system-prompt", default=SYSTEM_PROMPT)
532
+ parser.add_argument("--max-new-tokens", type=int, default=32)
533
+ parser.add_argument("--temperature", type=float, default=0.0)
534
+ parser.add_argument("--top-p", type=float, default=1.0)
535
+ parser.add_argument("--use-4bit", action="store_true")
536
+ parser.add_argument("--cpu-dtype", choices=["float32", "float16", "bfloat16"], default="bfloat16")
537
+ parser.add_argument("--offload-folder", default=r"C:\Users\ASUS\CascadeProjects\.hf-offload")
538
+ args = parser.parse_args()
539
+
540
+ service = MinistralHMCService(
541
+ model_id=args.model_id,
542
+ adapter_dir=args.adapter_dir,
543
+ system_prompt=args.system_prompt,
544
+ max_new_tokens=args.max_new_tokens,
545
+ temperature=args.temperature,
546
+ top_p=args.top_p,
547
+ use_4bit=args.use_4bit,
548
+ cpu_dtype=args.cpu_dtype,
549
+ offload_folder=args.offload_folder,
550
+ )
551
+ print("Loading Ministral 3B HMC model...")
552
+ service.load()
553
+ print(f"Ministral 3B HMC server ready at http://{args.host}:{args.port}")
554
+
555
+ ChatHandler.service = service
556
+ server = ThreadingHTTPServer((args.host, args.port), ChatHandler)
557
+ try:
558
+ server.serve_forever()
559
+ except KeyboardInterrupt:
560
+ pass
561
+ finally:
562
+ server.server_close()
563
+
564
+
565
+ if __name__ == "__main__":
566
+ main()