kcrobot20 commited on
Commit
84ad4e7
·
verified ·
1 Parent(s): 138566c

initial commit

Browse files
Files changed (1) hide show
  1. app.py +53 -72
app.py CHANGED
@@ -2,21 +2,21 @@
2
  # -*- coding: utf-8 -*-
3
  """
4
  KCrobot AI — Vmax Final (voice-first)
5
- - Default Gemini model: gemini-1.5-pro (but code will fallback)
6
- - Read secrets from environment (HF "New secret"):
7
  GEMINI_API_KEY, GEMINI_MODEL, TELEGRAM_TOKEN, TELEGRAM_CHAT_ID,
8
  ELEVEN_API_KEY, ELEVEN_VOICE_ID, GOOGLE_SPEECH_LANG
9
  - Voice-first: ESP32 uploads audio -> /chat_audio -> STT -> Gemini -> TTS -> MP3
10
- - If STT libs not installed on server, /chat_audio returns 501 with message
11
  - Endpoints:
12
- GET / -> web UI (chat, minimal)
13
- POST /chat_text -> {"q":"...","voice":true}
14
- POST /chat_audio -> upload wav (multipart 'file' or raw bytes)
15
- POST /esp/send_text -> wrapper for /chat_text
16
- GET /play_latest -> latest_reply.mp3
17
- GET /_history -> conversation history
18
- POST /notify -> forward to Telegram
19
- GET /health -> health check
20
  """
21
  from __future__ import annotations
22
 
@@ -32,33 +32,31 @@ import pathlib
32
  from datetime import datetime
33
  from typing import Tuple, Dict, Any, Optional
34
 
35
- import requests
36
  from flask import Flask, request, jsonify, send_file, render_template_string
 
37
 
38
- # --- Try to import recommended/new SDK first (google-genai)
39
  USE_GENAI_SDK = False
40
  GENAI_CLIENT = None
41
  try:
42
- # new official: from google import genai
43
  from google import genai # type: ignore
44
  USE_GENAI_SDK = True
45
  except Exception:
46
  try:
47
- # fallback: google.generativeai (older package)
48
  import google.generativeai as genai # type: ignore
49
  USE_GENAI_SDK = True
50
  except Exception:
51
  genai = None
52
  USE_GENAI_SDK = False
53
 
54
- # TTS fallback
55
  try:
56
  from gtts import gTTS # type: ignore
57
  GTTS_AVAILABLE = True
58
  except Exception:
59
  GTTS_AVAILABLE = False
60
 
61
- # Optional STT libs
62
  try:
63
  import speech_recognition as sr # type: ignore
64
  from pydub import AudioSegment # type: ignore
@@ -69,7 +67,7 @@ except Exception:
69
  STT_AVAILABLE = False
70
 
71
  # -------------------------
72
- # CONFIG via env (HF New secret)
73
  # -------------------------
74
  CFG = {
75
  "GEMINI_API_KEY": os.getenv("GEMINI_API_KEY", "").strip(),
@@ -81,7 +79,7 @@ CFG = {
81
  "GOOGLE_SPEECH_LANG": os.getenv("GOOGLE_SPEECH_LANG", "vi-VN").strip(),
82
  }
83
 
84
- # Model fallback list (priority order)
85
  MODEL_FALLBACK_LIST = [
86
  CFG.get("GEMINI_MODEL") or "gemini-1.5-pro",
87
  "gemini-1.5-flash",
@@ -89,7 +87,7 @@ MODEL_FALLBACK_LIST = [
89
  "gemini-2.5-pro",
90
  ]
91
 
92
- # ensure unique, keep order
93
  seen = set()
94
  MODEL_FALLBACK = []
95
  for m in MODEL_FALLBACK_LIST:
@@ -101,18 +99,18 @@ for m in MODEL_FALLBACK_LIST:
101
  GEMINI_KEY = CFG.get("GEMINI_API_KEY") or ""
102
  if USE_GENAI_SDK and GEMINI_KEY:
103
  try:
104
- # new SDK style
105
  GENAI_CLIENT = genai.Client(api_key=GEMINI_KEY) # type: ignore
106
  except Exception:
107
  try:
108
- # older style configure (google.generativeai)
109
  genai.configure(api_key=GEMINI_KEY) # type: ignore
110
  GENAI_CLIENT = genai # type: ignore
111
  except Exception:
112
  GENAI_CLIENT = None
113
 
114
  # -------------------------
115
- # STORAGE & logging
116
  # -------------------------
117
  BASE = pathlib.Path.cwd()
118
  DATA_DIR = BASE / "data"
@@ -125,29 +123,29 @@ LATEST_MP3 = DATA_DIR / "latest_reply.mp3"
125
  logging.basicConfig(level=logging.INFO)
126
  logger = logging.getLogger("kcrobot_vmax")
127
 
128
- # snapshot non-secret config flags
129
  try:
130
  CFG_SNAPSHOT.write_text(json.dumps({k: bool(CFG.get(k)) for k in CFG}, indent=2), encoding="utf-8")
131
  except Exception:
132
  pass
133
 
134
  # -------------------------
135
- # Helpers: safe json, usage, history
136
  # -------------------------
137
  def load_json_safe(path: pathlib.Path, default):
138
  try:
139
  if path.exists():
140
  return json.loads(path.read_text(encoding="utf-8"))
141
  except Exception as e:
142
- logger.debug("load_json_safe failed %s -> %s", path, e)
143
  return default
144
 
145
  def save_json_safe(path: pathlib.Path, data):
146
  try:
147
  path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
148
  return True
149
- except Exception:
150
- logger.exception("save_json_safe failed for %s", path)
151
  return False
152
 
153
  def today_str():
@@ -175,9 +173,13 @@ def append_history(entry: dict):
175
  save_json_safe(HISTORY_FILE, h)
176
 
177
  # -------------------------
178
- # Language detection (simple)
179
  # -------------------------
180
- VIET_CHAR_RE = re.compile(r"[àáạảãâầấậẩẫăằắặẳẵđèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹ]", re.I)
 
 
 
 
181
  def detect_lang(text: str) -> str:
182
  if not text or not isinstance(text, str):
183
  return "en"
@@ -191,17 +193,15 @@ def detect_lang(text: str) -> str:
191
  return "en"
192
 
193
  # -------------------------
194
- # Gemini: SDK call and REST fallback + model fallback controller
195
  # -------------------------
196
- def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 1024, temperature: float = 0.2) -> Tuple[bool, str, int]:
197
  """
198
- Try to call Gemini with a single model.
199
- Returns (ok, text_or_error, http_status_or_0)
200
  """
201
- # prefer SDK if available
202
  if GENAI_CLIENT:
203
  try:
204
- # new SDK method
205
  if hasattr(GENAI_CLIENT, "models") and hasattr(GENAI_CLIENT.models, "generate_content"):
206
  resp = GENAI_CLIENT.models.generate_content(model=model, contents=prompt,
207
  max_output_tokens=max_output_tokens, temperature=temperature) # type: ignore
@@ -209,14 +209,12 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
209
  if txt:
210
  return True, txt, 200
211
  return True, str(resp), 200
212
- # older compatibility
213
  if hasattr(GENAI_CLIENT, "generate_content"):
214
  resp = GENAI_CLIENT.generate_content(prompt, max_output_tokens=max_output_tokens, temperature=temperature)
215
  if hasattr(resp, "text") and resp.text:
216
  return True, resp.text, 200
217
  return True, str(resp), 200
218
  except requests.exceptions.HTTPError as he:
219
- # SDK might raise requests HTTPError
220
  try:
221
  code = he.response.status_code
222
  except Exception:
@@ -224,6 +222,7 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
224
  return False, str(he), code
225
  except Exception as e:
226
  return False, str(e), 0
 
227
  # REST fallback
228
  key = CFG.get("GEMINI_API_KEY") or ""
229
  if not key:
@@ -234,11 +233,11 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
234
  "prompt": {
235
  "messages": [
236
  {"author": "system", "content": {"text": "You are a helpful assistant."}},
237
- {"author": "user", "content": {"text": prompt}}
238
  ]
239
  },
240
  "maxOutputTokens": max_output_tokens,
241
- "temperature": temperature
242
  }
243
  try:
244
  r = requests.post(url, params={"key": key}, json=payload, headers=headers, timeout=30)
@@ -248,7 +247,7 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
248
  if status >= 400:
249
  return False, f"HTTP {status}: {r.text}", status
250
  j = r.json()
251
- # parse common shapes
252
  cand = j.get("candidates")
253
  if cand and isinstance(cand, list):
254
  c0 = cand[0]
@@ -274,8 +273,7 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
274
 
275
  def call_gemini_with_fallbacks(prompt: str, max_output_tokens: int = 1024, temperature: float = 0.2) -> Dict[str, Any]:
276
  """
277
- Try sequence of models from MODEL_FALLBACK.
278
- Return dict {"ok":bool, "text":str, "model":str, "error":...}
279
  """
280
  if not CFG.get("GEMINI_API_KEY"):
281
  return {"ok": False, "error": "Gemini API key not configured (set GEMINI_API_KEY in New secret)"}
@@ -283,20 +281,16 @@ def call_gemini_with_fallbacks(prompt: str, max_output_tokens: int = 1024, tempe
283
  for model in MODEL_FALLBACK:
284
  if not model:
285
  continue
286
- ok, txt_or_err, status = gemini_call_with_model(model, prompt, max_output_tokens, temperature)
287
  if ok:
288
- return {"ok": True, "text": txt_or_err, "model": model}
289
- # if 404, try next model; otherwise remember error and maybe return at end
290
- last_error = {"model": model, "status": status, "error": txt_or_err}
291
- logger.warning("Gemini model %s failed: %s (status %s)", model, txt_or_err, status)
292
- if status not in (404, 0):
293
- # for some HTTP errors we may stop trying (e.g., 403 unauthorized)
294
- # but still try next model for robustness
295
- pass
296
  return {"ok": False, "error": f"All models failed. Last: {last_error}", "last": last_error}
297
 
298
  # -------------------------
299
- # TTS backends (ElevenLabs -> gTTS)
300
  # -------------------------
301
  def tts_elevenlabs_bytes(text: str, voice_id: str, api_key: str) -> bytes:
302
  url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
@@ -308,7 +302,7 @@ def tts_elevenlabs_bytes(text: str, voice_id: str, api_key: str) -> bytes:
308
 
309
  def tts_gtts_bytes(text: str, lang: str = "vi") -> bytes:
310
  if not GTTS_AVAILABLE:
311
- raise RuntimeError("gTTS not installed on server")
312
  t = gTTS(text=text, lang=lang)
313
  bio = io.BytesIO()
314
  t.write_to_fp(bio)
@@ -336,7 +330,7 @@ def synthesize_and_save(answer: str, lang_hint: str = "vi") -> Tuple[bool, str]:
336
  return False, f"TTS error: {e}"
337
 
338
  # -------------------------
339
- # STT: server-side speech-to-text
340
  # -------------------------
341
  def speech_to_text(wav_bytes: bytes) -> Tuple[bool, str]:
342
  if not STT_AVAILABLE:
@@ -352,7 +346,7 @@ def speech_to_text(wav_bytes: bytes) -> Tuple[bool, str]:
352
  return False, str(e)
353
 
354
  # -------------------------
355
- # Telegram helper
356
  # -------------------------
357
  def send_telegram_message(text: str) -> bool:
358
  token = CFG.get("TELEGRAM_TOKEN") or ""
@@ -384,12 +378,11 @@ textarea{width:100%;padding:10px;border-radius:8px;background:#061427;color:#fff
384
  button{padding:10px 14px;border-radius:8px;background:#0ea5ff;color:#012;border:none;cursor:pointer}
385
  #resp{white-space:pre-wrap;margin-top:12px;background:#021220;padding:12px;border-radius:6px}
386
  .small{font-size:0.9rem;color:#9fb3c8}
387
- </style></head>
388
- <body>
389
  <div class="container">
390
  <h1>🤖 KCrobot AI — Vmax (Voice-first)</h1>
391
  <p class="small">Model fallback: {{models}} — Gemini key: {{gemini}} — Telegram: {{tg}}</p>
392
- <p>Chú ý: giao diện chat là phụ — ưu tiên voice (ESP32 gửi audio). Bạn có thể thử gỏ "Xin chào" để nghe trả lời.</p>
393
  <textarea id="q" rows="4" placeholder="Nhập tiếng Việt / English..."></textarea>
394
  <p><label><input id="voice" type="checkbox" checked> Voice ON</label>
395
  <button onclick="send()">Gửi & Nghe</button></p>
@@ -455,7 +448,6 @@ def chat_text():
455
  if ok:
456
  play_url = "/play_latest"
457
  try:
458
- # Telegram notify background
459
  threading.Thread(target=send_telegram_message, args=(f"Q: {q}\nA: {answer}",)).start()
460
  except Exception:
461
  pass
@@ -466,19 +458,10 @@ def chat_text():
466
 
467
  @app.route("/esp/send_text", methods=["POST"])
468
  def esp_send_text():
469
- # wrapper for chat_text for esp32 convenience
470
  return chat_text()
471
 
472
  @app.route("/chat_audio", methods=["POST"])
473
  def chat_audio():
474
- """
475
- Primary voice endpoint:
476
- - Accept audio file field 'file' (wav) or raw bytes body
477
- - Do STT (server-side) if available, else return 501
478
- - Use Gemini to reply (model fallback)
479
- - Synthesize reply to latest_reply.mp3 and return play_url
480
- """
481
- # read bytes
482
  wav_bytes = None
483
  if 'file' in request.files:
484
  f = request.files['file']
@@ -487,7 +470,6 @@ def chat_audio():
487
  wav_bytes = request.get_data()
488
  if not wav_bytes:
489
  return jsonify({"error": "no audio provided"}), 400
490
- # save for debugging
491
  try:
492
  ts = int(time.time())
493
  (DATA_DIR / f"uploaded_{ts}.wav").write_bytes(wav_bytes)
@@ -549,10 +531,9 @@ def health():
549
  })
550
 
551
  # -------------------------
552
- # Start app
553
  # -------------------------
554
  if __name__ == "__main__":
555
- # ensure files exist
556
  load_json_safe(HISTORY_FILE, [])
557
  load_json_safe(USAGE_FILE, {})
558
  logger.info("KCrobot Vmax starting. Gemini key present: %s, SDK present: %s, STT available: %s",
 
2
  # -*- coding: utf-8 -*-
3
  """
4
  KCrobot AI — Vmax Final (voice-first)
5
+ - Default Gemini model: gemini-1.5-pro (fallbacks implemented)
6
+ - Read secrets from env (HF "New secret"):
7
  GEMINI_API_KEY, GEMINI_MODEL, TELEGRAM_TOKEN, TELEGRAM_CHAT_ID,
8
  ELEVEN_API_KEY, ELEVEN_VOICE_ID, GOOGLE_SPEECH_LANG
9
  - Voice-first: ESP32 uploads audio -> /chat_audio -> STT -> Gemini -> TTS -> MP3
10
+ - If STT libs missing, /chat_audio returns 501 (server-side STT optional)
11
  - Endpoints:
12
+ GET / -> simple web UI (chat secondary)
13
+ POST /chat_text -> {"q":"...","voice":true}
14
+ POST /chat_audio -> upload wav (multipart 'file' or raw bytes)
15
+ POST /esp/send_text-> wrapper for /chat_text
16
+ GET /play_latest -> latest_reply.mp3
17
+ GET /_history -> recent history
18
+ POST /notify -> forward to Telegram
19
+ GET /health -> health check
20
  """
21
  from __future__ import annotations
22
 
 
32
  from datetime import datetime
33
  from typing import Tuple, Dict, Any, Optional
34
 
 
35
  from flask import Flask, request, jsonify, send_file, render_template_string
36
+ import requests
37
 
38
+ # --- Attempt to import Google GenAI SDK (new) or older lib
39
  USE_GENAI_SDK = False
40
  GENAI_CLIENT = None
41
  try:
42
+ # new official package pattern: `from google import genai`
43
  from google import genai # type: ignore
44
  USE_GENAI_SDK = True
45
  except Exception:
46
  try:
 
47
  import google.generativeai as genai # type: ignore
48
  USE_GENAI_SDK = True
49
  except Exception:
50
  genai = None
51
  USE_GENAI_SDK = False
52
 
53
+ # --- TTS/STT libs (optional)
54
  try:
55
  from gtts import gTTS # type: ignore
56
  GTTS_AVAILABLE = True
57
  except Exception:
58
  GTTS_AVAILABLE = False
59
 
 
60
  try:
61
  import speech_recognition as sr # type: ignore
62
  from pydub import AudioSegment # type: ignore
 
67
  STT_AVAILABLE = False
68
 
69
  # -------------------------
70
+ # Config from env (HF New secret)
71
  # -------------------------
72
  CFG = {
73
  "GEMINI_API_KEY": os.getenv("GEMINI_API_KEY", "").strip(),
 
79
  "GOOGLE_SPEECH_LANG": os.getenv("GOOGLE_SPEECH_LANG", "vi-VN").strip(),
80
  }
81
 
82
+ # Model fallback list: prefer configured model, then alternatives
83
  MODEL_FALLBACK_LIST = [
84
  CFG.get("GEMINI_MODEL") or "gemini-1.5-pro",
85
  "gemini-1.5-flash",
 
87
  "gemini-2.5-pro",
88
  ]
89
 
90
+ # dedupe keep order
91
  seen = set()
92
  MODEL_FALLBACK = []
93
  for m in MODEL_FALLBACK_LIST:
 
99
  GEMINI_KEY = CFG.get("GEMINI_API_KEY") or ""
100
  if USE_GENAI_SDK and GEMINI_KEY:
101
  try:
102
+ # Try new SDK client style
103
  GENAI_CLIENT = genai.Client(api_key=GEMINI_KEY) # type: ignore
104
  except Exception:
105
  try:
106
+ # fallback older style configure
107
  genai.configure(api_key=GEMINI_KEY) # type: ignore
108
  GENAI_CLIENT = genai # type: ignore
109
  except Exception:
110
  GENAI_CLIENT = None
111
 
112
  # -------------------------
113
+ # Storage & logging
114
  # -------------------------
115
  BASE = pathlib.Path.cwd()
116
  DATA_DIR = BASE / "data"
 
123
  logging.basicConfig(level=logging.INFO)
124
  logger = logging.getLogger("kcrobot_vmax")
125
 
126
+ # save non-secret snapshot
127
  try:
128
  CFG_SNAPSHOT.write_text(json.dumps({k: bool(CFG.get(k)) for k in CFG}, indent=2), encoding="utf-8")
129
  except Exception:
130
  pass
131
 
132
  # -------------------------
133
+ # Helpers: json safe, usage, history
134
  # -------------------------
135
  def load_json_safe(path: pathlib.Path, default):
136
  try:
137
  if path.exists():
138
  return json.loads(path.read_text(encoding="utf-8"))
139
  except Exception as e:
140
+ logger.debug("load_json_safe error %s -> %s", path, e)
141
  return default
142
 
143
  def save_json_safe(path: pathlib.Path, data):
144
  try:
145
  path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
146
  return True
147
+ except Exception as e:
148
+ logger.exception("save_json_safe failed for %s: %s", path, e)
149
  return False
150
 
151
  def today_str():
 
173
  save_json_safe(HISTORY_FILE, h)
174
 
175
  # -------------------------
176
+ # Language detection
177
  # -------------------------
178
+ VIET_CHAR_RE = re.compile(
179
+ r"[àáạảãâầấậẩẫăằắặẳẵđèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹ]",
180
+ re.I,
181
+ )
182
+
183
  def detect_lang(text: str) -> str:
184
  if not text or not isinstance(text, str):
185
  return "en"
 
193
  return "en"
194
 
195
  # -------------------------
196
+ # Gemini single model call (SDK preferred, REST fallback)
197
  # -------------------------
198
+ def gemini_call_single(model: str, prompt: str, max_output_tokens: int = 1024, temperature: float = 0.2) -> Tuple[bool, str, int]:
199
  """
200
+ Try calling a single model. Return (ok, text_or_error, http_status_or_0)
 
201
  """
202
+ # SDK path
203
  if GENAI_CLIENT:
204
  try:
 
205
  if hasattr(GENAI_CLIENT, "models") and hasattr(GENAI_CLIENT.models, "generate_content"):
206
  resp = GENAI_CLIENT.models.generate_content(model=model, contents=prompt,
207
  max_output_tokens=max_output_tokens, temperature=temperature) # type: ignore
 
209
  if txt:
210
  return True, txt, 200
211
  return True, str(resp), 200
 
212
  if hasattr(GENAI_CLIENT, "generate_content"):
213
  resp = GENAI_CLIENT.generate_content(prompt, max_output_tokens=max_output_tokens, temperature=temperature)
214
  if hasattr(resp, "text") and resp.text:
215
  return True, resp.text, 200
216
  return True, str(resp), 200
217
  except requests.exceptions.HTTPError as he:
 
218
  try:
219
  code = he.response.status_code
220
  except Exception:
 
222
  return False, str(he), code
223
  except Exception as e:
224
  return False, str(e), 0
225
+
226
  # REST fallback
227
  key = CFG.get("GEMINI_API_KEY") or ""
228
  if not key:
 
233
  "prompt": {
234
  "messages": [
235
  {"author": "system", "content": {"text": "You are a helpful assistant."}},
236
+ {"author": "user", "content": {"text": prompt}},
237
  ]
238
  },
239
  "maxOutputTokens": max_output_tokens,
240
+ "temperature": temperature,
241
  }
242
  try:
243
  r = requests.post(url, params={"key": key}, json=payload, headers=headers, timeout=30)
 
247
  if status >= 400:
248
  return False, f"HTTP {status}: {r.text}", status
249
  j = r.json()
250
+ # try parse candidates
251
  cand = j.get("candidates")
252
  if cand and isinstance(cand, list):
253
  c0 = cand[0]
 
273
 
274
  def call_gemini_with_fallbacks(prompt: str, max_output_tokens: int = 1024, temperature: float = 0.2) -> Dict[str, Any]:
275
  """
276
+ Try models in MODEL_FALLBACK sequence; return dict with ok/text/model or error.
 
277
  """
278
  if not CFG.get("GEMINI_API_KEY"):
279
  return {"ok": False, "error": "Gemini API key not configured (set GEMINI_API_KEY in New secret)"}
 
281
  for model in MODEL_FALLBACK:
282
  if not model:
283
  continue
284
+ ok, text_or_err, status = gemini_call_single(model, prompt, max_output_tokens, temperature)
285
  if ok:
286
+ return {"ok": True, "text": text_or_err, "model": model}
287
+ last_error = {"model": model, "status": status, "error": text_or_err}
288
+ logger.warning("Model %s failed: %s (status=%s)", model, text_or_err, status)
289
+ # continue to next for robustness; some errors (403) may persist but try anyway
 
 
 
 
290
  return {"ok": False, "error": f"All models failed. Last: {last_error}", "last": last_error}
291
 
292
  # -------------------------
293
+ # TTS: ElevenLabs optional -> gTTS fallback
294
  # -------------------------
295
  def tts_elevenlabs_bytes(text: str, voice_id: str, api_key: str) -> bytes:
296
  url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
 
302
 
303
  def tts_gtts_bytes(text: str, lang: str = "vi") -> bytes:
304
  if not GTTS_AVAILABLE:
305
+ raise RuntimeError("gTTS not available in environment")
306
  t = gTTS(text=text, lang=lang)
307
  bio = io.BytesIO()
308
  t.write_to_fp(bio)
 
330
  return False, f"TTS error: {e}"
331
 
332
  # -------------------------
333
+ # STT: server-side speech-to-text (optional)
334
  # -------------------------
335
  def speech_to_text(wav_bytes: bytes) -> Tuple[bool, str]:
336
  if not STT_AVAILABLE:
 
346
  return False, str(e)
347
 
348
  # -------------------------
349
+ # Telegram helper (optional)
350
  # -------------------------
351
  def send_telegram_message(text: str) -> bool:
352
  token = CFG.get("TELEGRAM_TOKEN") or ""
 
378
  button{padding:10px 14px;border-radius:8px;background:#0ea5ff;color:#012;border:none;cursor:pointer}
379
  #resp{white-space:pre-wrap;margin-top:12px;background:#021220;padding:12px;border-radius:6px}
380
  .small{font-size:0.9rem;color:#9fb3c8}
381
+ </style></head><body>
 
382
  <div class="container">
383
  <h1>🤖 KCrobot AI — Vmax (Voice-first)</h1>
384
  <p class="small">Model fallback: {{models}} — Gemini key: {{gemini}} — Telegram: {{tg}}</p>
385
+ <p>Giao diện chat là phụ — ưu tiên voice (ESP32 gửi audio). Bạn có thể thử "Xin chào" để nghe trả lời.</p>
386
  <textarea id="q" rows="4" placeholder="Nhập tiếng Việt / English..."></textarea>
387
  <p><label><input id="voice" type="checkbox" checked> Voice ON</label>
388
  <button onclick="send()">Gửi & Nghe</button></p>
 
448
  if ok:
449
  play_url = "/play_latest"
450
  try:
 
451
  threading.Thread(target=send_telegram_message, args=(f"Q: {q}\nA: {answer}",)).start()
452
  except Exception:
453
  pass
 
458
 
459
  @app.route("/esp/send_text", methods=["POST"])
460
  def esp_send_text():
 
461
  return chat_text()
462
 
463
  @app.route("/chat_audio", methods=["POST"])
464
  def chat_audio():
 
 
 
 
 
 
 
 
465
  wav_bytes = None
466
  if 'file' in request.files:
467
  f = request.files['file']
 
470
  wav_bytes = request.get_data()
471
  if not wav_bytes:
472
  return jsonify({"error": "no audio provided"}), 400
 
473
  try:
474
  ts = int(time.time())
475
  (DATA_DIR / f"uploaded_{ts}.wav").write_bytes(wav_bytes)
 
531
  })
532
 
533
  # -------------------------
534
+ # Start server
535
  # -------------------------
536
  if __name__ == "__main__":
 
537
  load_json_safe(HISTORY_FILE, [])
538
  load_json_safe(USAGE_FILE, {})
539
  logger.info("KCrobot Vmax starting. Gemini key present: %s, SDK present: %s, STT available: %s",