kcrobot25 commited on
Commit
024fd07
·
verified ·
1 Parent(s): a6dcb29
Files changed (1) hide show
  1. app.py +283 -313
app.py CHANGED
@@ -1,152 +1,105 @@
1
 
2
- # app.py — KC Robot AI v5.5 FINAL
3
- # Flask server for Hugging Face Space
4
- # - Requirements: see requirements.txt
5
- # - Secrets expected: HF_API_TOKEN (required), optional: HF_MODEL, HF_TTS_MODEL, HF_STT_MODEL, TELEGRAM_TOKEN, TELEGRAM_CHATID
 
 
 
 
6
 
7
  import os
8
  import io
9
  import time
10
  import json
11
- import base64
12
  import threading
13
  import logging
14
- from typing import Optional
15
  from pathlib import Path
 
16
 
17
  import requests
18
- from flask import Flask, request, jsonify, render_template_string
19
 
20
- # Fallback TTS
21
- from gtts import gTTS
22
-
23
- # Logging
24
  logging.basicConfig(level=logging.INFO)
25
- logger = logging.getLogger("kcrobot.v5.5.final")
26
 
27
  app = Flask(__name__)
28
 
29
- # Config / Secrets (set in Space -> Settings -> Secrets)
 
 
 
30
  HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
31
- HF_MODEL = os.getenv("HF_MODEL", "bkai-foundation-models/vietnamese-llama2-7b").strip()
32
- HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "doanthang/vietTTS-southern-female").strip() # optional public HF TTS
33
- HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip() # optional
 
34
  TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
35
  TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip()
36
 
37
- HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
38
-
39
- # Temp storage for history
40
- TMP_DIR = Path("/tmp/kcrobot")
41
- TMP_DIR.mkdir(parents=True, exist_ok=True)
42
- HISTORY_FILE = TMP_DIR / "history.json"
43
-
44
- def read_history():
45
- try:
46
- if HISTORY_FILE.exists():
47
- with open(HISTORY_FILE, "r", encoding="utf-8") as f:
48
- return json.load(f)
49
- except Exception:
50
- logger.exception("read_history")
51
- return []
52
 
53
- def append_history(user_text, bot_text):
54
- rec = {"user": user_text, "bot": bot_text, "ts": time.time()}
55
- data = read_history()
56
- data.append(rec)
57
- try:
58
- with open(HISTORY_FILE, "w", encoding="utf-8") as f:
59
- json.dump(data, f, ensure_ascii=False, indent=2)
60
- except Exception:
61
- logger.exception("append_history")
62
 
63
- def clear_history():
64
- try:
65
- if HISTORY_FILE.exists():
66
- HISTORY_FILE.unlink()
67
- except Exception:
68
- logger.exception("clear_history")
69
 
70
- # Language detection heuristic (Vietnamese characters)
71
- VI_CHARS = set("ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụýỳỷỹỵ")
 
72
 
73
- def detect_lang(text: str) -> str:
74
- if not text:
75
- return "en"
76
- for ch in text.lower():
77
- if ch in VI_CHARS:
78
- return "vi"
79
- return "en"
80
 
81
- # ---------------- Hugging Face helpers ----------------
82
  def hf_post_json(model_id: str, payload: dict, timeout: int = 120):
83
  if not HF_API_TOKEN:
84
- raise RuntimeError("HF_API_TOKEN not set in Space Secrets.")
85
  url = f"https://api-inference.huggingface.co/models/{model_id}"
86
- headers = {**HF_HEADERS, "Content-Type": "application/json"}
87
- r = requests.post(url, headers=headers, json=payload, timeout=timeout)
88
  if not r.ok:
89
- logger.warning("HF json POST %s returned %s: %s", model_id, r.status_code, r.text[:300])
90
  r.raise_for_status()
91
  try:
92
  return r.json()
93
  except Exception:
94
  return r.content
95
 
96
- def hf_post_bytes(model_id: str, bytes_data: bytes, content_type: str = "application/octet-stream", timeout: int = 180):
97
  if not HF_API_TOKEN:
98
- raise RuntimeError("HF_API_TOKEN not set in Space Secrets.")
99
  url = f"https://api-inference.huggingface.co/models/{model_id}"
100
  headers = dict(HF_HEADERS)
101
  headers["Content-Type"] = content_type
102
- r = requests.post(url, headers=headers, data=bytes_data, timeout=timeout)
103
  if not r.ok:
104
- logger.warning("HF bytes POST %s returned %s: %s", model_id, r.status_code, r.text[:300])
105
  r.raise_for_status()
106
  return r
107
 
108
  def hf_text_generate(prompt: str, model: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
109
  model = model or HF_MODEL
110
- payload = {
111
- "inputs": prompt,
112
- "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)},
113
- "options": {"wait_for_model": True}
114
- }
115
  out = hf_post_json(model, payload, timeout=120)
116
- # parse common shapes
117
- try:
118
- if isinstance(out, list) and len(out) and isinstance(out[0], dict):
119
- return out[0].get("generated_text") or out[0].get("text") or str(out[0])
120
- if isinstance(out, dict):
121
- if "generated_text" in out:
122
- return out.get("generated_text")
123
- if "text" in out:
124
- return out.get("text")
125
- # some models return choices...
126
- if "choices" in out and isinstance(out["choices"], list) and out["choices"]:
127
- c = out["choices"][0]
128
- return c.get("text") or c.get("message", {}).get("content", "") or str(c)
129
- return str(out)
130
- except Exception:
131
- logger.exception("hf_text_generate parse")
132
- return str(out)
133
-
134
- def hf_tts_bytes(text: str, model: Optional[str] = None) -> Optional[bytes]:
135
  model = model or HF_TTS_MODEL
136
- if not model:
137
- return None
138
- try:
139
- payload = {"inputs": text}
140
- url = f"https://api-inference.huggingface.co/models/{model}"
141
- r = requests.post(url, headers={**HF_HEADERS, "Content-Type": "application/json"}, json=payload, timeout=120)
142
- if r.ok:
143
- return r.content
144
- else:
145
- logger.warning("hf_tts_bytes returned %s: %s", r.status_code, r.text[:200])
146
- return None
147
- except Exception:
148
- logger.exception("hf_tts_bytes")
149
- return None
150
 
151
  def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str:
152
  model = model or HF_STT_MODEL
@@ -155,41 +108,18 @@ def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str:
155
  j = r.json()
156
  if isinstance(j, dict) and "text" in j:
157
  return j["text"]
 
158
  if isinstance(j, list) and len(j) and isinstance(j[0], dict) and "text" in j[0]:
159
  return j[0]["text"]
160
  return str(j)
161
  except Exception:
 
162
  return r.text if hasattr(r, "text") else ""
163
 
164
- # ---------------- TTS fallback using gTTS ----------------
165
- def tts_gtts_base64(text: str, lang: str = "vi") -> str:
166
- try:
167
- tts = gTTS(text=text, lang=lang)
168
- bio = io.BytesIO()
169
- tts.write_to_fp(bio)
170
- bio.seek(0)
171
- return base64.b64encode(bio.read()).decode("ascii")
172
- except Exception:
173
- logger.exception("tts_gtts_base64 failed")
174
- return ""
175
-
176
- def tts_get_audio_for_text(text: str, detected_lang: str = "vi"):
177
- # 1. try HF TTS model if configured
178
- audio_bytes = None
179
- if HF_TTS_MODEL:
180
- audio_bytes = hf_tts_bytes(text, HF_TTS_MODEL)
181
- if audio_bytes:
182
- return {"audio_base64": base64.b64encode(audio_bytes).decode("ascii"), "mime": "audio/mpeg"}
183
- # 2. fallback to gTTS for vi/en
184
- lang = "vi" if detected_lang == "vi" else "en"
185
- b64 = tts_gtts_base64(text, lang=lang)
186
- if b64:
187
- return {"audio_base64": b64, "mime": "audio/mpeg"}
188
- return {"audio_base64": "", "mime": ""}
189
-
190
- # ---------------- Telegram ----------------
191
  def send_telegram_message(text: str):
192
- if not (TELEGRAM_TOKEN and TELEGRAM_CHATID):
 
193
  return False
194
  try:
195
  url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
@@ -198,16 +128,31 @@ def send_telegram_message(text: str):
198
  logger.warning("Telegram send failed: %s %s", r.status_code, r.text[:200])
199
  return r.ok
200
  except Exception:
201
- logger.exception("send_telegram_message")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  return False
203
 
204
  def telegram_poll_loop():
205
  if not TELEGRAM_TOKEN:
206
- logger.info("telegram poll disabled")
207
  return
208
  base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"
209
  offset = None
210
- logger.info("Starting telegram poller")
211
  while True:
212
  try:
213
  params = {"timeout": 30}
@@ -234,16 +179,15 @@ def telegram_poll_loop():
234
  try:
235
  requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
236
  except Exception:
237
- logger.exception("telegram reply failed")
238
  elif low.startswith("/say "):
239
  phrase = text[5:].strip()
240
- # TTS and send audio
241
  try:
242
- audio = hf_tts_bytes(phrase) or base64.b64decode(tts_gtts_base64(phrase, lang="vi" if detect_lang(phrase)=="vi" else "en"))
243
- files = {"audio": ("say.mp3", audio, "audio/mpeg")}
244
- requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30)
245
  except Exception:
246
- logger.exception("telegram say failed")
247
  elif low.startswith("/status"):
248
  try:
249
  requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running."}, timeout=10)
@@ -255,231 +199,257 @@ def telegram_poll_loop():
255
  except Exception:
256
  pass
257
  except Exception:
258
- logger.exception("telegram poll loop error")
259
  time.sleep(3)
260
 
261
- # start telegram poller thread
262
  if TELEGRAM_TOKEN:
263
  try:
264
  t = threading.Thread(target=telegram_poll_loop, daemon=True)
265
  t.start()
266
  except Exception:
267
- logger.exception("start telegram thread failed")
268
-
269
- # ---------------- Web UI HTML ----------------
270
- INDEX_HTML = """
271
- <!doctype html><html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
272
- <title>KC Robot AI v5.5 Final</title>
273
- <style>
274
- body{font-family:Arial;background:#06111a;color:#dff; padding:12px}
275
- .container{max-width:980px;margin:auto}
276
- #chat{background:#04101a;padding:10px;border-radius:8px;height:420px;overflow:auto;border:1px solid #223344}
277
- .user{color:#bfe7ff;text-align:right;margin:6px}
278
- .bot{color:#dfffdc;text-align:left;margin:6px}
279
- .controls{display:flex;gap:8px;margin-top:8px}
280
- input[type=text]{flex:1;padding:10px;border-radius:8px;border:1px solid #223344;background:#021427;color:#e6eef6}
281
- button{padding:10px 12px;border-radius:8px;border:none;background:#0ea5a4;color:#fff;cursor:pointer}
282
- small{color:#99a0b0}
283
- </style></head><body>
284
- <div class="container">
285
- <h2>🤖 KC Robot AI v5.5 — Final (Miền Nam voice, song ngữ)</h2>
286
- <div id="chat"></div>
287
- <div class="controls">
288
- <input id="txt" placeholder="Gõ câu hỏi (VN/EN) hoặc bấm Ghi..." type="text"/>
289
- <button id="sendBtn">Gửi</button>
290
- <button id="recBtn">🎙 Ghi</button>
291
- <button id="greetBtn">▶ Chào</button>
292
- <button id="historyBtn">🗂 Lịch sử</button>
293
- </div>
294
- <audio id="player" controls style="width:100%;margin-top:10px"></audio>
295
- <p><small>Secrets: HF_API_TOKEN (required). Optionals: HF_MODEL, HF_TTS_MODEL, TELEGRAM_TOKEN, TELEGRAM_CHATID</small></p>
296
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  <script>
298
  let mediaRecorder, audioChunks=[];
299
- const chat=document.getElementById('chat'), player=document.getElementById('player');
 
300
 
301
- function appendUser(t){ chat.innerHTML += '<div class="user"><b>You:</b> '+escapeHtml(t)+'</div>'; chat.scrollTop = chat.scrollHeight; }
302
- function appendBot(t){ chat.innerHTML += '<div class="bot"><b>Robot:</b> '+escapeHtml(t)+'</div>'; chat.scrollTop = chat.scrollHeight; }
303
  function escapeHtml(s){ return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
304
 
305
  document.getElementById('sendBtn').onclick = async ()=>{
306
- const v = document.getElementById('txt').value.trim(); if(!v) return;
 
307
  appendUser(v); document.getElementById('txt').value='';
308
  const res = await fetch('/ask',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text:v})});
309
  const j = await res.json();
310
  const ans = j.answer || j.error || 'No answer';
311
  appendBot(ans);
312
- if(j.audio_base64){
313
- const blob = base64ToBlob(j.audio_base64, j.mime || 'audio/mpeg');
314
- const url = URL.createObjectURL(blob); player.src = url; player.play();
 
315
  }
316
  };
317
 
318
- document.getElementById('greetBtn').onclick = async ()=>{
319
- const r = await fetch('/presence',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({note:'Xin chào chủ nhân'})});
320
- const j = await r.json();
321
- appendBot(j.greeting || j.error || '');
322
- if(j.audio_base64){ const blob = base64ToBlob(j.audio_base64, j.mime||'audio/mpeg'); player.src = URL.createObjectURL(blob); player.play(); }
323
- else if(j.music_url){ player.src = j.music_url; player.play(); }
324
- };
325
-
326
- document.getElementById('historyBtn').onclick = async ()=>{
327
- const r = await fetch('/history'); const j = await r.json(); chat.innerHTML=''; j.forEach(it=>{ appendUser(it.user); appendBot(it.bot); });
328
- };
329
-
330
  document.getElementById('recBtn').onclick = async ()=>{
331
  if(mediaRecorder && mediaRecorder.state === 'recording'){ mediaRecorder.stop(); return; }
332
- if(!navigator.mediaDevices) return alert('No mic support');
333
  try{
334
  const stream = await navigator.mediaDevices.getUserMedia({audio:true});
335
  mediaRecorder = new MediaRecorder(stream);
336
- audioChunks=[];
337
  mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
338
- mediaRecorder.onstop = async ()=>{
339
  const blob = new Blob(audioChunks, {type:'audio/webm'});
340
  const fd = new FormData(); fd.append('file', blob, 'rec.webm');
341
  const r = await fetch('/stt',{method:'POST', body: fd});
342
  const j = await r.json();
343
  if(j.text){
344
  appendUser('[voice] '+ j.text);
 
345
  const res = await fetch('/ask',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text: j.text})});
346
  const aj = await res.json(); const ans = aj.answer || aj.error || 'No answer';
347
  appendBot(ans);
348
- if(aj.audio_base64){ const blob2 = base64ToBlob(aj.audio_base64, aj.mime||'audio/mpeg'); player.src = URL.createObjectURL(blob2); player.play();}
349
- } else { appendBot('[STT error] '+JSON.stringify(j)); }
 
 
 
350
  };
351
- mediaRecorder.start(); document.getElementById('recBtn').textContent='■ Dừng';
352
- } catch(e){ alert('Mic error: '+e); }
 
 
 
353
  };
354
 
355
- function base64ToBlob(b64, mime){ const bytes = atob(b64); let len = bytes.length; const buf = new Uint8Array(len); for(let i=0;i<len;i++) buf[i]=bytes.charCodeAt(i); return new Blob([buf], {type:mime}); }
 
 
 
 
 
 
 
 
356
  </script>
357
- </body></html>
 
358
  """
359
 
360
- # ---------------- Endpoints ----------------
361
  @app.route("/", methods=["GET"])
362
  def index():
363
  return render_template_string(INDEX_HTML)
364
 
365
- @app.route("/config", methods=["GET"])
366
- def get_config():
367
- return jsonify({
368
- "hf_token": bool(HF_API_TOKEN),
369
- "hf_model": HF_MODEL,
370
- "hf_tts_model": HF_TTS_MODEL,
371
- "hf_stt_model": HF_STT_MODEL,
372
- "telegram": bool(TELEGRAM_TOKEN and TELEGRAM_CHATID)
373
- })
374
-
375
- @app.route("/ask", methods=["POST"])
376
- def ask_route():
377
- data = request.get_json(force=True, silent=True) or {}
378
- text = (data.get("text") or "").strip()
379
- if not text:
380
- return jsonify({"error":"no text"}), 400
381
- lang = detect_lang(text)
382
- if lang == "vi":
383
- prompt = f"Bạn là trợ lý thông minh, trả lời bằng tiếng Việt, rõ ràng và ngắn gọn:\\n\\n{text}"
384
- else:
385
- prompt = f"You are a helpful assistant. Answer in clear English:\\n\\n{text}"
386
- try:
387
- answer = hf_text_generate(prompt)
388
- except Exception as e:
389
- logger.exception("hf_text_generate error")
390
- return jsonify({"error": str(e)}), 500
391
- append_history(text, answer)
392
- # prepare audio
393
- tts = tts_get_audio_for_text(answer, detected_lang=lang)
394
- result = {"answer": answer}
395
- result.update(tts)
396
- return jsonify(result)
397
-
398
- @app.route("/tts", methods=["POST"])
399
- def tts_route():
400
- data = request.get_json(force=True, silent=True) or {}
401
- text = (data.get("text") or "").strip()
402
- if not text:
403
- return jsonify({"error":"no text"}), 400
404
- lang = detect_lang(text)
405
- return jsonify(tts_get_audio_for_text(text, detected_lang=lang))
406
-
407
- @app.route("/stt", methods=["POST"])
408
- def stt_route():
409
- try:
410
- if "file" in request.files:
411
- f = request.files["file"]
412
- audio_bytes = f.read()
413
- else:
414
- audio_bytes = request.get_data() or b""
415
- if not audio_bytes:
416
- return jsonify({"error":"no audio"}), 400
417
  try:
418
- txt = hf_stt_from_bytes(audio_bytes)
419
- except Exception as e:
420
- logger.exception("hf_stt failed")
421
- return jsonify({"error": str(e)}), 500
422
- return jsonify({"text": txt})
423
- except Exception:
424
- logger.exception("stt_route")
425
- return jsonify({"error":"stt internal error"}), 500
426
-
427
- @app.route("/presence", methods=["POST"])
428
- def presence_route():
429
- data = request.get_json(force=True, silent=True) or {}
430
- note = (data.get("note") or "Có người đến gần robot").strip()
431
- greeting_vi = f"Xin chào! {note}"
432
- greeting_en = "Hello! Someone is near the robot."
433
- combined = f"{greeting_vi}\\n{greeting_en}"
434
- append_history("__presence__", combined)
435
- # prepare greeting audio
436
- tts = tts_get_audio_for_text(greeting_vi, detected_lang="vi")
437
- # telegram notify
438
- if TELEGRAM_TOKEN and TELEGRAM_CHATID:
439
- try:
440
- send_telegram_message("⚠️ Robot phát hiện: " + note)
441
  except Exception:
442
- logger.exception("telegram notify failed")
443
- resp = {"greeting": combined}
444
- if tts.get("audio_base64"):
445
- resp.update(tts)
446
- else:
447
- # if no TTS available, return a sample music url (client can play)
448
- resp["music_url"] = os.getenv("HF_MUSIC_URL", "https://www.soundhelix.com/examples/mp3/SoundHelix-Song-1.mp3")
449
- return jsonify(resp)
450
-
451
- @app.route("/history", methods=["GET"])
452
- def history_route():
453
- return jsonify(read_history())
454
-
455
- @app.route("/clear_history", methods=["POST"])
456
- def clear_history_route():
457
- clear_history()
458
- return jsonify({"cleared": True})
459
-
460
- # startup warmup
461
- def warmup():
462
- logger.info("Warmup: attempting lightweight calls (non-blocking)")
463
- def _w():
464
  try:
465
- if HF_API_TOKEN:
466
- try:
467
- hf_text_generate("Xin chào. Hãy trả lời ngắn gọn: Xin chào!")
468
- except Exception:
469
- pass
470
- try:
471
- if HF_TTS_MODEL:
472
- hf_tts_bytes("Xin chào chủ nhân")
473
- except Exception:
474
- pass
475
  except Exception:
476
- logger.exception("warmup errors")
477
- threading.Thread(target=_w, daemon=True).start()
478
 
479
  @app.before_first_request
480
- def before_first():
481
- warmup()
482
 
 
483
  if __name__ == "__main__":
484
- logger.info("Starting KC Robot AI v5.5 FINAL")
485
- app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
 
1
 
2
+ # app.py — KC Robot AI V5.2 (Full)
3
+ # Flask server + Hugging Face inference (LLM/STT/TTS) + Telegram poller + web UI (browser mic)
4
+ # Secrets expected in HF Space Settings:
5
+ # HF_API_TOKEN (required)
6
+ # TELEGRAM_TOKEN (optional)
7
+ # TELEGRAM_CHATID (optional)
8
+ # Optional env overrides:
9
+ # HF_MODEL, HF_TTS_MODEL, HF_STT_MODEL, PORT
10
 
11
  import os
12
  import io
13
  import time
14
  import json
 
15
  import threading
16
  import logging
 
17
  from pathlib import Path
18
+ from typing import List, Tuple, Optional
19
 
20
  import requests
21
+ from flask import Flask, request, jsonify, send_file, render_template_string
22
 
23
+ # ---------- config & logging ----------
 
 
 
24
  logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger("kcrobot.v5.2")
26
 
27
  app = Flask(__name__)
28
 
29
+ TMP_DIR = Path("/tmp/kcrobot")
30
+ TMP_DIR.mkdir(parents=True, exist_ok=True)
31
+
32
+ # Environment / Secrets (set in Space -> Settings -> Secrets)
33
  HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
34
+ HF_MODEL = os.getenv("HF_MODEL", "google/flan-t5-large").strip()
35
+ HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "doanthang/vietTTS-southern-female").strip()
36
+ HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
37
+
38
  TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
39
  TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip()
40
 
41
+ PORT = int(os.getenv("PORT", 7860))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ if not HF_API_TOKEN:
44
+ logger.warning("⚠️ HF_API_TOKEN not set put your Hugging Face token into Space Secrets (HF_API_TOKEN).")
 
 
 
 
 
 
 
45
 
46
+ HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
 
 
 
 
 
47
 
48
+ # ---------- in-memory state ----------
49
+ CONV: List[Tuple[str, str]] = [] # (user, bot)
50
+ DISPLAY_LINES: List[str] = [] # lines for small OLED displays
51
 
52
+ def push_display(line: str, limit: int = 8):
53
+ DISPLAY_LINES.append(line)
54
+ if len(DISPLAY_LINES) > limit:
55
+ del DISPLAY_LINES[0: len(DISPLAY_LINES)-limit]
 
 
 
56
 
57
+ # ---------- Hugging Face helpers ----------
58
  def hf_post_json(model_id: str, payload: dict, timeout: int = 120):
59
  if not HF_API_TOKEN:
60
+ raise RuntimeError("HF_API_TOKEN missing (set in Space Secrets).")
61
  url = f"https://api-inference.huggingface.co/models/{model_id}"
62
+ r = requests.post(url, headers={**HF_HEADERS, "Content-Type": "application/json"}, json=payload, timeout=timeout)
 
63
  if not r.ok:
64
+ logger.error("HF JSON POST error %s: %s", r.status_code, r.text[:400])
65
  r.raise_for_status()
66
  try:
67
  return r.json()
68
  except Exception:
69
  return r.content
70
 
71
+ def hf_post_bytes(model_id: str, data: bytes, content_type: str = "application/octet-stream", timeout: int = 180):
72
  if not HF_API_TOKEN:
73
+ raise RuntimeError("HF_API_TOKEN missing (set in Space Secrets).")
74
  url = f"https://api-inference.huggingface.co/models/{model_id}"
75
  headers = dict(HF_HEADERS)
76
  headers["Content-Type"] = content_type
77
+ r = requests.post(url, headers=headers, data=data, timeout=timeout)
78
  if not r.ok:
79
+ logger.error("HF BYTES POST error %s: %s", r.status_code, r.text[:400])
80
  r.raise_for_status()
81
  return r
82
 
83
  def hf_text_generate(prompt: str, model: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
84
  model = model or HF_MODEL
85
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)}, "options": {"wait_for_model": True}}
 
 
 
 
86
  out = hf_post_json(model, payload, timeout=120)
87
+ # parse typical shapes
88
+ if isinstance(out, list) and len(out) and isinstance(out[0], dict):
89
+ return out[0].get("generated_text", "") or str(out[0])
90
+ if isinstance(out, dict):
91
+ return out.get("generated_text") or out.get("text") or str(out)
92
+ return str(out)
93
+
94
+ def hf_tts_bytes(text: str, model: Optional[str] = None) -> bytes:
 
 
 
 
 
 
 
 
 
 
 
95
  model = model or HF_TTS_MODEL
96
+ payload = {"inputs": text}
97
+ url = f"https://api-inference.huggingface.co/models/{model}"
98
+ r = requests.post(url, headers={**HF_HEADERS, "Content-Type": "application/json"}, json=payload, timeout=120)
99
+ if not r.ok:
100
+ logger.error("HF TTS error %s: %s", r.status_code, r.text[:400])
101
+ r.raise_for_status()
102
+ return r.content
 
 
 
 
 
 
 
103
 
104
  def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str:
105
  model = model or HF_STT_MODEL
 
108
  j = r.json()
109
  if isinstance(j, dict) and "text" in j:
110
  return j["text"]
111
+ # external shapes
112
  if isinstance(j, list) and len(j) and isinstance(j[0], dict) and "text" in j[0]:
113
  return j[0]["text"]
114
  return str(j)
115
  except Exception:
116
+ # r may be raw string
117
  return r.text if hasattr(r, "text") else ""
118
 
119
+ # ---------- Telegram helpers ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  def send_telegram_message(text: str):
121
+ if not TELEGRAM_TOKEN or not TELEGRAM_CHATID:
122
+ logger.debug("Telegram not configured.")
123
  return False
124
  try:
125
  url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
 
128
  logger.warning("Telegram send failed: %s %s", r.status_code, r.text[:200])
129
  return r.ok
130
  except Exception:
131
+ logger.exception("send_telegram_message failed")
132
+ return False
133
+
134
+ def telegram_send_audio(chat_id: str, audio_bytes: bytes, filename: str = "reply.mp3"):
135
+ if not TELEGRAM_TOKEN:
136
+ return False
137
+ try:
138
+ url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendAudio"
139
+ files = {"audio": (filename, audio_bytes, "audio/mpeg")}
140
+ data = {"chat_id": chat_id}
141
+ r = requests.post(url, files=files, data=data, timeout=30)
142
+ if not r.ok:
143
+ logger.warning("Telegram sendAudio failed: %s %s", r.status_code, r.text[:200])
144
+ return r.ok
145
+ except Exception:
146
+ logger.exception("telegram_send_audio failed")
147
  return False
148
 
149
  def telegram_poll_loop():
150
  if not TELEGRAM_TOKEN:
151
+ logger.info("Telegram poller disabled (no TELEGRAM_TOKEN).")
152
  return
153
  base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"
154
  offset = None
155
+ logger.info("Telegram poller started.")
156
  while True:
157
  try:
158
  params = {"timeout": 30}
 
179
  try:
180
  requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
181
  except Exception:
182
+ logger.exception("tg reply failed")
183
  elif low.startswith("/say "):
184
  phrase = text[5:].strip()
185
+ # try TTS then send audio
186
  try:
187
+ audio = hf_tts_bytes(phrase)
188
+ telegram_send_audio(chat_id, audio, filename="say.mp3")
 
189
  except Exception:
190
+ logger.exception("tg say failed")
191
  elif low.startswith("/status"):
192
  try:
193
  requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running."}, timeout=10)
 
199
  except Exception:
200
  pass
201
  except Exception:
202
+ logger.exception("telegram_poll_loop crashed, sleeping 3s")
203
  time.sleep(3)
204
 
205
+ # start telegram poller thread if token exists
206
  if TELEGRAM_TOKEN:
207
  try:
208
  t = threading.Thread(target=telegram_poll_loop, daemon=True)
209
  t.start()
210
  except Exception:
211
+ logger.exception("Failed to start telegram poller thread")
212
+
213
+ # ---------- Routes (ESP32 & web) ----------
214
+ @app.route("/health", methods=["GET"])
215
+ def health():
216
+ return jsonify({
217
+ "ok": True,
218
+ "hf_token": bool(HF_API_TOKEN),
219
+ "hf_model": HF_MODEL,
220
+ "tts_model": HF_TTS_MODEL,
221
+ "stt_model": HF_STT_MODEL,
222
+ "telegram": bool(TELEGRAM_TOKEN)
223
+ })
224
+
225
+ @app.route("/ask", methods=["POST"])
226
+ def route_ask():
227
+ data = request.get_json(force=True, silent=True) or {}
228
+ text = (data.get("text") or "").strip()
229
+ lang = (data.get("lang") or "auto").strip().lower()
230
+ if not text:
231
+ return jsonify({"error": "no text"}), 400
232
+ # make instructive prompt
233
+ if lang == "vi":
234
+ prompt = "Bạn là trợ lý thông minh, trả lời bằng tiếng Việt, rõ ràng và ngắn gọn:\n\n" + text
235
+ elif lang == "en":
236
+ prompt = "You are a helpful assistant. Answer in clear English:\n\n" + text
237
+ else:
238
+ prompt = "You are a bilingual assistant (Vietnamese/English). Answer in the language of the user.\n\n" + text
239
+ try:
240
+ ans = hf_text_generate(prompt)
241
+ except Exception as e:
242
+ logger.exception("hf_text_generate failed")
243
+ return jsonify({"error": str(e)}), 500
244
+ CONV.append((text, ans))
245
+ push_display("YOU: " + text[:80])
246
+ push_display("BOT: " + ans[:80])
247
+ # optionally notify telegram summary
248
+ if TELEGRAM_TOKEN and TELEGRAM_CHATID:
249
+ try:
250
+ send_telegram_message(f"You: {text}\nBot: {ans}")
251
+ except Exception:
252
+ logger.exception("telegram notify failed")
253
+ return jsonify({"answer": ans})
254
+
255
+ @app.route("/tts", methods=["POST"])
256
+ def route_tts():
257
+ data = request.get_json(force=True, silent=True) or {}
258
+ text = (data.get("text") or "").strip()
259
+ if not text:
260
+ return jsonify({"error": "no text"}), 400
261
+ try:
262
+ audio = hf_tts_bytes(text)
263
+ except Exception as e:
264
+ logger.exception("hf_tts failed")
265
+ return jsonify({"error": str(e)}), 500
266
+ # return audio bytes as audio/mpeg (most outputs are mp3 or wav; we choose audio/mpeg)
267
+ return send_file(io.BytesIO(audio), mimetype="audio/mpeg", as_attachment=False, download_name="tts.mp3")
268
+
269
+ @app.route("/stt", methods=["POST"])
270
+ def route_stt():
271
+ # Accepts multipart form 'file' or raw bytes
272
+ try:
273
+ if "file" in request.files:
274
+ f = request.files["file"]
275
+ audio_bytes = f.read()
276
+ else:
277
+ audio_bytes = request.get_data() or b""
278
+ if not audio_bytes:
279
+ return jsonify({"error": "no audio"}), 400
280
+ txt = hf_stt_from_bytes(audio_bytes)
281
+ push_display("STT: " + (txt[:80] if isinstance(txt, str) else str(txt)))
282
+ return jsonify({"text": txt})
283
+ except Exception as e:
284
+ logger.exception("route_stt failed")
285
+ return jsonify({"error": str(e)}), 500
286
+
287
+ @app.route("/presence", methods=["POST"])
288
+ def route_presence():
289
+ data = request.get_json(force=True, silent=True) or {}
290
+ note = (data.get("note") or "Có người đến gần").strip()
291
+ greeting_vi = f"Xin chào! {note}"
292
+ greeting_en = "Hello! Someone is near the robot."
293
+ combined = f"{greeting_vi}\n{greeting_en}"
294
+ CONV.append(("__presence__", combined))
295
+ push_display("RADAR: " + note[:80])
296
+ # generate greeting TTS asynchronously (pre-warm HF model) so ESP32 can call /tts if it wants
297
+ def gen_greeting():
298
+ try:
299
+ _ = hf_tts_bytes(greeting_vi)
300
+ except Exception:
301
+ logger.exception("generate greeting vi failed")
302
+ try:
303
+ _ = hf_tts_bytes(greeting_en)
304
+ except Exception:
305
+ logger.exception("generate greeting en failed")
306
+ threading.Thread(target=gen_greeting, daemon=True).start()
307
+ # notify telegram
308
+ if TELEGRAM_TOKEN and TELEGRAM_CHATID:
309
+ send_telegram_message("⚠️ Robot: Phát hiện người - " + note)
310
+ return jsonify({"greeting": combined})
311
+
312
+ @app.route("/display", methods=["GET"])
313
+ def route_display():
314
+ return jsonify({"lines": DISPLAY_LINES[-8:], "conv_len": len(CONV)})
315
+
316
+ # ---------- Simple Web UI with browser STT & TTS (for testing) ----------
317
+ INDEX_HTML = r"""
318
+ <!doctype html>
319
+ <html lang="vi">
320
+ <head>
321
+ <meta charset="utf-8"/>
322
+ <meta name="viewport" content="width=device-width,initial-scale=1"/>
323
+ <title>KC Robot AI V5.2</title>
324
+ <style>
325
+ body{font-family:Arial,Helvetica,sans-serif;background:#0f1720;color:#e6eef6;margin:12px}
326
+ .wrap{max-width:980px;margin:auto}
327
+ h1{color:#7ee787}
328
+ #chat{background:#08111a;border-radius:8px;padding:10px;height:420px;overflow:auto;border:1px solid #223344}
329
+ .user{color:#bfe7ff; text-align:right}
330
+ .bot{color:#dfffdc; text-align:left}
331
+ .controls{display:flex;gap:8px;margin-top:8px}
332
+ input[type=text]{flex:1;padding:10px;border-radius:8px;border:1px solid #223344;background:#021427;color:#e6eef6}
333
+ button{padding:10px 12px;border-radius:8px;border:none;background:#0ea5a4;color:#fff;cursor:pointer}
334
+ audio{width:100%;margin-top:8px}
335
+ .small{font-size:12px;color:#99a0b0}
336
+ </style>
337
+ </head>
338
+ <body>
339
+ <div class="wrap">
340
+ <h1>🤖 KC Robot AI V5.2 — Full</h1>
341
+ <div id="chat"></div>
342
+ <div class="controls">
343
+ <input id="txt" type="text" placeholder="Gõ câu hỏi (VN/EN) hoặc bấm Ghi để nói..." />
344
+ <button id="sendBtn">Gửi</button>
345
+ <button id="recBtn">🎙 Ghi</button>
346
+ <button id="greetBtn">▶ Chào</button>
347
+ </div>
348
+ <audio id="player" controls></audio>
349
+ <p class="small">Cần HF_API_TOKEN trong Secrets để STT/TTS/LLM hoạt động.</p>
350
+ </div>
351
+
352
  <script>
353
  let mediaRecorder, audioChunks=[];
354
+ const chat = document.getElementById('chat');
355
+ const player = document.getElementById('player');
356
 
357
+ function appendUser(t){ chat.innerHTML += `<div class="user"><b>You:</b> ${escapeHtml(t)}</div>`; chat.scrollTop = chat.scrollHeight; }
358
+ function appendBot(t){ chat.innerHTML += `<div class="bot"><b>Robot:</b> ${escapeHtml(t)}</div>`; chat.scrollTop = chat.scrollHeight; }
359
  function escapeHtml(s){ return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
360
 
361
  document.getElementById('sendBtn').onclick = async ()=>{
362
+ const v = document.getElementById('txt').value.trim();
363
+ if(!v) return;
364
  appendUser(v); document.getElementById('txt').value='';
365
  const res = await fetch('/ask',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text:v})});
366
  const j = await res.json();
367
  const ans = j.answer || j.error || 'No answer';
368
  appendBot(ans);
369
+ // play TTS of answer
370
+ const tts = await fetch('/tts',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text:ans})});
371
+ if(tts.ok){
372
+ const blob = await tts.blob(); const url = URL.createObjectURL(blob); player.src = url; player.play();
373
  }
374
  };
375
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  document.getElementById('recBtn').onclick = async ()=>{
377
  if(mediaRecorder && mediaRecorder.state === 'recording'){ mediaRecorder.stop(); return; }
378
+ if(!navigator.mediaDevices) return alert('Trình duyệt không hỗ trợ microphone.');
379
  try{
380
  const stream = await navigator.mediaDevices.getUserMedia({audio:true});
381
  mediaRecorder = new MediaRecorder(stream);
382
+ audioChunks = [];
383
  mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
384
+ mediaRecorder.onstop = async () => {
385
  const blob = new Blob(audioChunks, {type:'audio/webm'});
386
  const fd = new FormData(); fd.append('file', blob, 'rec.webm');
387
  const r = await fetch('/stt',{method:'POST', body: fd});
388
  const j = await r.json();
389
  if(j.text){
390
  appendUser('[voice] '+ j.text);
391
+ // auto ask
392
  const res = await fetch('/ask',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text: j.text})});
393
  const aj = await res.json(); const ans = aj.answer || aj.error || 'No answer';
394
  appendBot(ans);
395
+ const tts = await fetch('/tts',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text:ans})});
396
+ if(tts.ok){ const b = await tts.blob(); const url = URL.createObjectURL(b); player.src = url; player.play(); }
397
+ } else {
398
+ appendBot('[STT lỗi] ' + JSON.stringify(j));
399
+ }
400
  };
401
+ mediaRecorder.start();
402
+ document.getElementById('recBtn').textContent = ' Dừng';
403
+ } catch(err){
404
+ alert('Không thể truy cập microphone: ' + err);
405
+ }
406
  };
407
 
408
+ document.getElementById('greetBtn').onclick = async ()=>{
409
+ const r = await fetch('/presence',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({note: 'Xin chào chủ nhân'})});
410
+ const j = await r.json();
411
+ const text = j.greeting || '';
412
+ appendBot(text);
413
+ // play tts
414
+ const tts = await fetch('/tts',{method:'POST',headers:{'Content-Type':'application/json'},body: JSON.stringify({text:text})});
415
+ if(tts.ok){ const b = await tts.blob(); const url = URL.createObjectURL(b); player.src = url; player.play(); }
416
+ };
417
  </script>
418
+ </body>
419
+ </html>
420
  """
421
 
 
422
  @app.route("/", methods=["GET"])
423
  def index():
424
  return render_template_string(INDEX_HTML)
425
 
426
+ # ---------- startup: warm models + greeting ----------
427
+ def startup_actions():
428
+ logger.info("KC Robot AI V5.2 starting up.")
429
+ greeting_vi = "Xin chào chủ nhân! KC Robot đã sẵn sàng."
430
+ greeting_en = "Hello master! KC Robot is ready."
431
+ push_display("SYSTEM: Robot online")
432
+ # warm up: request TTS generation asynchronously (not blocking)
433
+ def gen():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  try:
435
+ if HF_API_TOKEN:
436
+ _ = hf_tts_bytes(greeting_vi)
437
+ _ = hf_tts_bytes(greeting_en)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  except Exception:
439
+ logger.exception("warmup tts failed")
440
+ threading.Thread(target=gen, daemon=True).start()
441
+ # optional: notify telegram about startup
442
+ if TELEGRAM_TOKEN and TELEGRAM_CHATID:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  try:
444
+ send_telegram_message("KC Robot brain is online.")
 
 
 
 
 
 
 
 
 
445
  except Exception:
446
+ logger.exception("telegram startup notify failed")
 
447
 
448
  @app.before_first_request
449
+ def _before_first():
450
+ startup_actions()
451
 
452
+ # ---------- run ----------
453
  if __name__ == "__main__":
454
+ logger.info("Starting KC Robot AI V5.2 on port %s", PORT)
455
+ app.run(host="0.0.0.0", port=PORT)