kcrobot20 commited on
Commit
a35fb27
·
verified ·
1 Parent(s): 8a65d0e

initial commit

Browse files
Files changed (1) hide show
  1. app.py +164 -670
app.py CHANGED
@@ -1,698 +1,192 @@
1
 
2
- # app.py — KC Robot AI V7.2 MAX FINAL
3
- # Flask main app + optional Gradio UI launched in background (for convenience).
4
- # Requires Secrets:
5
- # HF_TOKEN, HF_MODEL, TELEGRAM_TOKEN (optional), TELEGRAM_CHAT_ID (optional)
6
- #
7
- # Endpoints:
8
- # - GET / -> main HTML UI (chat + audio play)
9
- # - GET /health -> status json
10
- # - POST /ask -> {text, lang?} -> {"answer": "..."}
11
- # - POST /tts -> {text} -> audio/mp3
12
- # - POST /stt -> upload file or raw bytes -> {"text":"..."}
13
- # - POST /presence -> {note?} -> greets + notify telegram, returns mp3 if possible
14
- # - GET /display -> display buffer (for ESP32)
15
- # - POST /config -> change runtime config (optional)
16
- #
17
- # How to use on HF Spaces:
18
- # - Upload this file and requirements.txt
19
- # - Set Secrets: HF_TOKEN, HF_MODEL, TELEGRAM_TOKEN (opt), TELEGRAM_CHAT_ID (opt)
20
- # - Start Space (Flask runtime). Visit the Space URL to test.
21
- #
22
- # Notes:
23
- # - Calls to Hugging Face inference API have robust fallback and clear error messages.
24
- # - TTS: tries HF TTS model (HF_TTS_MODEL if set), otherwise fallback to gTTS.
25
- # - STT: uses HF_STT_MODEL if set (default openai/whisper-small).
26
- #
27
- # Author: KC Robot helper (generated)
28
- # Version: v7.2-max-final
29
 
30
- import os
31
- import io
32
- import sys
33
- import time
34
- import json
35
- import uuid
36
- import logging
37
- import threading
38
- from typing import List, Tuple, Optional, Any
39
- from pathlib import Path
40
 
 
 
41
  import requests
42
- from flask import Flask, request, jsonify, send_file, render_template_string, abort, Response
43
-
44
- # Optional gTTS fallback
45
- try:
46
- from gtts import gTTS
47
- _HAS_GTTS = True
48
- except Exception:
49
- _HAS_GTTS = False
50
-
51
- # Optional Gradio UI
52
- try:
53
- import gradio as gr
54
- _HAS_GRADIO = True
55
- except Exception:
56
- _HAS_GRADIO = False
57
-
58
- # Logging config
59
- logging.basicConfig(stream=sys.stdout, level=logging.INFO,
60
- format="%(asctime)s %(levelname)s %(name)s: %(message)s")
61
- logger = logging.getLogger("kcrobot.v7.2")
62
-
63
- # --------- Load env / secrets ----------
64
- HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
65
- HF_MODEL = os.getenv("HF_MODEL", "").strip() # primary model id
66
- HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "").strip() # optional TTS model
67
- HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
68
-
69
- TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
70
- TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID", "").strip()
71
-
72
- # server ports - HF spaces provides PORT env; default 7860
73
- PORT = int(os.environ.get("PORT", 7860))
74
- GRADIO_PORT = 7861 # gradio web ui runs here if enabled
75
-
76
- HF_HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
77
-
78
- # Create data dir for tmp audio
79
- TMPDIR = Path("/tmp/kcrobot") if os.name != "nt" else Path.cwd() / "tmp_kcrobot"
80
- TMPDIR.mkdir(parents=True, exist_ok=True)
81
-
82
- # --------- in-memory state ----------
83
- CONV: List[Tuple[str, str]] = [] # (user, bot)
84
- DISPLAY_BUFFER: List[str] = []
85
- DISPLAY_LIMIT = 8
86
-
87
- def push_display(line: str):
88
- global DISPLAY_BUFFER
89
- DISPLAY_BUFFER.append(line)
90
- if len(DISPLAY_BUFFER) > DISPLAY_LIMIT:
91
- DISPLAY_BUFFER = DISPLAY_BUFFER[-DISPLAY_LIMIT:]
92
-
93
- # ---------- language detector ----------
94
- VI_CHARS = set("ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụứừửữựýỳỷỹỵ")
95
- def detect_vi_or_en(text: str) -> str:
96
- for ch in text.lower():
97
- if ch in VI_CHARS:
98
- return "vi"
99
- return "en"
100
-
101
- # ---------- HF helpers with robust fallback ----------
102
- def hf_post_json_single(model_id: str, payload: dict, timeout: int = 90) -> requests.Response:
103
- """Post JSON to HF inference endpoint for a single model."""
104
- if not HF_TOKEN:
105
- raise RuntimeError("HF_TOKEN not configured in environment/secrets.")
106
- url = f"https://api-inference.huggingface.co/models/{model_id}"
107
- headers = dict(HF_HEADERS)
108
- headers["Content-Type"] = "application/json"
109
- logger.debug("HF JSON POST to %s payload keys: %s", model_id, list(payload.keys()))
110
- r = requests.post(url, headers=headers, json=payload, timeout=timeout)
111
- return r
112
-
113
- def hf_post_json_with_fallback(models: List[str], payload: dict, timeout: int = 90) -> Tuple[str, Any]:
114
- """Try a list of models in order. Return (model_used, parsed_json_or_text)."""
115
- last_err = None
116
- for model in models:
117
- try:
118
- r = hf_post_json_single(model, payload, timeout=timeout)
119
- except Exception as e:
120
- last_err = f"network error for {model}: {e}"
121
- logger.warning(last_err)
122
- continue
123
- if r.status_code == 200:
124
- try:
125
- j = r.json()
126
- return model, j
127
- except Exception:
128
- return model, r.content
129
- else:
130
- last_err = f"HTTP {r.status_code} from {model}: {r.text[:300]}"
131
- logger.warning(last_err)
132
- # if auth error (401/403) or not found (404) skip to next
133
- continue
134
- raise RuntimeError(f"All HF attempts failed. Last error: {last_err}")
135
-
136
- def hf_post_bytes_with_fallback(models: List[str], data: bytes, content_type: str = "application/octet-stream", timeout: int = 120) -> Tuple[str, requests.Response]:
137
- """Post bytes (STT or TTS) to HF; return model used and response object."""
138
- if not HF_TOKEN:
139
- raise RuntimeError("HF_TOKEN not configured.")
140
- headers = dict(HF_HEADERS)
141
- headers["Content-Type"] = content_type
142
- last_err = None
143
- for model in models:
144
- url = f"https://api-inference.huggingface.co/models/{model}"
145
- try:
146
- r = requests.post(url, headers=headers, data=data, timeout=timeout)
147
- except Exception as e:
148
- last_err = f"network error {e} for {model}"
149
- logger.warning(last_err)
150
- continue
151
- if r.status_code == 200:
152
- return model, r
153
- else:
154
- last_err = f"HTTP {r.status_code} for {model}: {r.text[:300]}"
155
- logger.warning(last_err)
156
- continue
157
- raise RuntimeError(f"All HF byte-post attempts failed. Last error: {last_err}")
158
 
159
- def parse_hf_text_resp(obj: Any) -> str:
160
- """Normalize HF text outputs."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  try:
162
- if isinstance(obj, dict):
163
- # various shapes: {"generated_text": "..."} or {"choices":[...]}
164
- if "generated_text" in obj:
165
- return obj.get("generated_text","")
166
- if "text" in obj:
167
- return obj.get("text","")
168
- if "choices" in obj and isinstance(obj["choices"], list) and obj["choices"]:
169
- c0 = obj["choices"][0]
170
- return c0.get("text") or c0.get("message",{}).get("content","") or str(c0)
171
- # fallback - stringify
172
- return json.dumps(obj)
173
- if isinstance(obj, list) and obj:
174
- first = obj[0]
175
- if isinstance(first, dict):
176
- for k in ("generated_text","text"):
177
- if k in first:
178
- return first.get(k,"")
179
- return str(first)
180
- if isinstance(obj, (bytes, bytearray)):
181
- try:
182
- return obj.decode('utf-8', errors='ignore')
183
- except Exception:
184
- return str(obj)
185
- return str(obj)
186
  except Exception as e:
187
- logger.exception("parse_hf_text_resp error")
188
- return f"[parse error] {e}"
189
-
190
- # ---------- High-level LLM / STT / TTS wrappers ----------
191
- def llm_generate(prompt: str, model_override: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
192
- """Generate text from HF LLM. Use HF_MODEL by default; allow override."""
193
- if model_override:
194
- models = [model_override]
195
- else:
196
- if HF_MODEL:
197
- models = [HF_MODEL]
198
- else:
199
- raise RuntimeError("HF_MODEL not configured.")
200
- payload = {
201
- "inputs": prompt,
202
- "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)},
203
- "options": {"wait_for_model": True}
204
- }
205
- model_used, out = hf_post_json_with_fallback(models, payload, timeout=120)
206
- res = parse_hf_text_resp(out)
207
- logger.info("LLM used model=%s len=%d", model_used, len(res))
208
- return res
209
 
210
- def tts_get_bytes_hf(text: str, model_list: Optional[List[str]] = None) -> bytes:
211
- """Try to get TTS bytes from HF models listed, else fallback to gTTS if available."""
212
- if not text:
213
- raise RuntimeError("Empty text for TTS")
214
- models = model_list or ([HF_TTS_MODEL] if HF_TTS_MODEL else [])
215
- if models:
216
- # HF TTS often expects JSON {"inputs":"..."} and returns audio bytes
217
- payload = json.dumps({"inputs": text}).encode("utf-8")
218
- try:
219
- model_used, resp = hf_post_bytes_with_fallback(models, payload, content_type="application/json", timeout=120)
220
- # return raw bytes
221
- logger.info("HF TTS used %s return bytes len=%d", model_used, len(resp.content))
222
- return resp.content
223
- except Exception as e:
224
- logger.warning("HF TTS attempts failed: %s", e)
225
- # fallback to gTTS if available
226
- if _HAS_GTTS:
227
- try:
228
- tts = gTTS(text=text, lang="vi" if detect_vi_or_en(text)=="vi" else "en")
229
- bio = io.BytesIO()
230
- tts.write_to_fp(bio)
231
- bio.seek(0)
232
- data = bio.read()
233
- logger.info("gTTS fallback created bytes len=%d", len(data))
234
- return data
235
- except Exception as e:
236
- logger.exception("gTTS fallback failed")
237
- raise RuntimeError(f"TTS failed: {e}")
238
- raise RuntimeError("No TTS method available (no HF_TTS_MODEL and gTTS missing).")
239
 
240
- def stt_from_bytes_hf(audio_bytes: bytes, model_list: Optional[List[str]] = None) -> str:
241
- """Run STT via HF; returns recognized text."""
242
- models = model_list or ([HF_STT_MODEL] if HF_STT_MODEL else [])
243
- if not models:
244
- raise RuntimeError("No STT model configured.")
245
- model_used, resp = hf_post_bytes_with_fallback(models, audio_bytes, content_type="application/octet-stream", timeout=180)
246
- # parse response
247
  try:
248
- j = resp.json()
249
- if isinstance(j, dict) and "text" in j:
250
- return j["text"]
251
- return parse_hf_text_resp(j)
252
- except Exception:
253
- try:
254
- return resp.text
255
- except Exception:
256
- return "[stt parse failed]"
257
-
258
- # ---------- Telegram helpers ----------
259
- def send_telegram_message(text: str) -> bool:
260
- if not TELEGRAM_TOKEN or not TELEGRAM_CHAT_ID:
261
- logger.debug("telegram not configured")
262
- return False
263
- try:
264
- url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
265
- r = requests.post(url, json={"chat_id": TELEGRAM_CHAT_ID, "text": text}, timeout=8)
266
- if r.status_code == 200:
267
- return True
268
- logger.warning("Telegram send failed %s %s", r.status_code, r.text[:300])
269
- return False
270
- except Exception:
271
- logger.exception("send_telegram_message exception")
272
- return False
273
-
274
- def telegram_poll_loop():
275
- """Background poller: respond to /ask, /say, /status commands"""
276
- if not TELEGRAM_TOKEN:
277
- logger.info("Telegram not configured - poll disabled")
278
- return
279
- logger.info("Starting Telegram poller")
280
- base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"
281
- offset = None
282
- while True:
283
- try:
284
- params = {"timeout": 30}
285
- if offset:
286
- params["offset"] = offset
287
- r = requests.get(base + "/getUpdates", params=params, timeout=35)
288
- if r.status_code != 200:
289
- logger.warning("Telegram getUpdates failed: %s", r.status_code)
290
- time.sleep(2)
291
- continue
292
- j = r.json()
293
- for upd in j.get("result", []):
294
- offset = upd.get("update_id", 0) + 1
295
- msg = upd.get("message") or {}
296
- chat = msg.get("chat", {})
297
- chat_id = chat.get("id")
298
- text = (msg.get("text") or "").strip()
299
- if not text:
300
- continue
301
- logger.info("TG msg from %s: %s", chat_id, text[:120])
302
- lower = text.lower()
303
- if lower.startswith("/ask "):
304
- q = text[5:].strip()
305
- try:
306
- ans = llm_generate(q)
307
- except Exception as e:
308
- ans = f"[HF error] {e}"
309
- try:
310
- requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
311
- except Exception:
312
- logger.exception("tg reply failed")
313
- elif lower.startswith("/say "):
314
- phrase = text[5:].strip()
315
- try:
316
- audio_bytes = tts_get_bytes_hf(phrase)
317
- files = {"audio": ("reply.mp3", audio_bytes, "audio/mpeg")}
318
- requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30)
319
- except Exception:
320
- logger.exception("tg say failed")
321
- elif lower.startswith("/status"):
322
- try:
323
- requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running"}, timeout=10)
324
- except Exception:
325
- logger.exception("tg status failed")
326
- else:
327
- try:
328
- requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "Commands: /ask <q> | /say <text> | /status"}, timeout=10)
329
- except Exception:
330
- logger.exception("tg help failed")
331
- except Exception:
332
- logger.exception("telegram poller exception")
333
- time.sleep(3)
334
 
335
- # Start telegram poller thread if token present
336
- if TELEGRAM_TOKEN:
 
 
337
  try:
338
- t = threading.Thread(target=telegram_poll_loop, daemon=True)
339
- t.start()
340
- except Exception:
341
- logger.exception("Failed to start telegram thread")
342
-
343
- # ---------- Flask app & routes ----------
344
- app = Flask(__name__)
345
-
346
- # Simple HTML UI (mobile friendly) - main page includes chatbox and audio play button
347
- INDEX_HTML = """
348
- <!doctype html>
 
 
 
 
 
 
 
 
349
  <html>
350
  <head>
351
- <meta charset="utf-8">
352
- <meta name="viewport" content="width=device-width,initial-scale=1">
353
- <title>KC Robot AI v7.2</title>
354
  <style>
355
- body{font-family:Inter,Arial,Helvetica,sans-serif;margin:10px;color:#111}
356
- .container{max-width:900px;margin:auto}
357
- .header{display:flex;align-items:center;gap:12px}
358
- .h1{font-size:20px;font-weight:700}
359
- .controls{margin-top:8px}
360
- textarea{width:100%;min-height:70px;padding:10px;font-size:15px;border-radius:8px;border:1px solid #ddd}
361
- button{background:#0066cc;color:white;border:none;padding:10px 14px;border-radius:8px;font-weight:600;cursor:pointer}
362
- .chatbox{margin-top:12px;border:1px solid #eee;padding:8px;border-radius:8px;background:#fafafa;height:300px;overflow:auto}
363
- .msg-user{color:#0b63d6;margin:6px 0}
364
- .msg-bot{color:#0b8a3f;margin:6px 0}
365
- .small{font-size:13px;color:#666}
366
- .controls-row{display:flex;gap:8px;align-items:center}
367
- .select{padding:6px;border-radius:6px;border:1px solid #ddd}
368
  </style>
369
  </head>
370
  <body>
371
- <div class="container">
372
- <div class="header">
373
- <div class="h1">🤖 KC Robot AI v7.2 — Final</div>
374
- </div>
375
- <div class="small">Model: <span id="modelName">loading...</span></div>
376
- <div class="controls">
377
- <textarea id="userText" placeholder="Nhập tiếng Việt hoặc English..."></textarea>
378
- <div class="controls-row">
379
- <select id="lang" class="select"><option value="auto">Auto</option><option value="vi">Vietnamese</option><option value="en">English</option></select>
380
- <button onclick="sendMsg()">Gửi</button>
381
- <button onclick="playLast()">Phát âm</button>
382
- <button onclick="clearChat()">Xóa</button>
383
- </div>
384
- </div>
385
- <div class="chatbox" id="chatbox"></div>
386
- <div style="margin-top:12px">
387
- <input type="file" id="audioFile" accept="audio/*"><button onclick="uploadAudio()">Upload → STT</button>
388
- </div>
389
- <div style="margin-top:12px" class="small">Kết nối Telegram: <span id="tgstatus">checking...</span></div>
390
- </div>
391
 
392
  <script>
393
- let lastAnswer = "";
394
- function appendUser(t){ const cb=document.getElementById('chatbox'); cb.innerHTML += '<div class="msg-user"><b>You:</b> '+escapeHtml(t)+'</div>'; cb.scrollTop = cb.scrollHeight; }
395
- function appendBot(t){ const cb=document.getElementById('chatbox'); cb.innerHTML += '<div class="msg-bot"><b>Robot:</b> '+escapeHtml(t)+'</div>'; cb.scrollTop = cb.scrollHeight; }
396
- function escapeHtml(s){ return (s+'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
397
- async function sendMsg(){
398
- let t=document.getElementById('userText').value.trim(); if(!t) return;
399
- appendUser(t);
400
- document.getElementById('userText').value='';
401
- const lang=document.getElementById('lang').value;
402
- try {
403
- const res = await fetch('/ask', {method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify({text: t, lang: lang})});
404
- const j = await res.json();
405
- if(j.answer){
406
- lastAnswer = j.answer;
407
- appendBot(j.answer);
408
- } else {
409
- appendBot("[Error] " + JSON.stringify(j));
410
- }
411
- } catch(e){
412
- appendBot("[Network Error] " + e);
413
- }
414
- }
415
- async function playLast(){
416
- if(!lastAnswer) return alert('No answer to play');
417
- try {
418
- const r = await fetch('/tts', {method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify({text: lastAnswer})});
419
- if(!r.ok){ alert('TTS failed'); return; }
420
- const blob = await r.blob();
421
- const url = URL.createObjectURL(blob);
422
- const a = new Audio(url);
423
- a.play();
424
- } catch(e){
425
- alert('Play error: '+e);
426
  }
 
427
  }
428
- async function uploadAudio(){
429
- const f = document.getElementById('audioFile').files[0];
430
- if(!f) return alert('Chọn file audio');
431
- const fd = new FormData(); fd.append('file', f);
432
- const r = await fetch('/stt', {method:'POST', body: fd});
433
- const j = await r.json();
434
- if(j.text) { appendUser('[voice] ' + j.text); lastAnswer=''; }
435
- else appendUser('[stt error] ' + JSON.stringify(j));
436
- }
437
- async function clearChat(){ document.getElementById('chatbox').innerHTML=''; lastAnswer=''; }
438
- async function loadStatus(){
439
- try{
440
- const r=await fetch('/health'); const j=await r.json();
441
- document.getElementById('modelName').innerText = j.hf_model || '(not set)';
442
- document.getElementById('tgstatus').innerText = j.telegram ? 'enabled' : 'disabled';
443
- }catch(e){
444
- document.getElementById('modelName').innerText='(error)';
445
- document.getElementById('tgstatus').innerText='error';
446
- }
447
- }
448
- loadStatus();
449
  </script>
450
  </body>
451
  </html>
452
  """
453
 
454
- @app.route("/", methods=["GET"])
455
- def index():
456
- return render_template_string(INDEX_HTML)
457
-
458
- @app.route("/health", methods=["GET"])
459
- def health():
460
- return jsonify({
461
- "ok": True,
462
- "hf_token": bool(HF_TOKEN),
463
- "hf_model": HF_MODEL,
464
- "hf_tts_model": HF_TTS_MODEL,
465
- "hf_stt_model": HF_STT_MODEL,
466
- "telegram": bool(TELEGRAM_TOKEN and TELEGRAM_CHAT_ID),
467
- "conv_len": len(CONV),
468
- "display_len": len(DISPLAY_BUFFER)
469
- })
470
-
471
- @app.route("/ask", methods=["POST"])
472
- def route_ask():
473
- try:
474
- j = request.get_json(force=True) or {}
475
- text = (j.get("text","") or "").strip()
476
- lang = (j.get("lang","auto") or "auto")
477
- if not text:
478
- return jsonify({"error":"no text"}), 400
479
- # prepare prompt instruct
480
- if lang == "vi":
481
- prompt = f"Bạn là trợ lý thông minh, trả lời bằng tiếng Việt, rõ ràng và lịch sự. Trả lời ngắn gọn:\n\n{text}"
482
- elif lang == "en":
483
- prompt = f"You are a helpful assistant. Answer in clear English, concise:\n\n{text}"
484
- else:
485
- prompt = f"You are a bilingual assistant. Answer in the same language as the user, clearly and concisely:\n\n{text}"
486
- try:
487
- ans = llm_generate(prompt)
488
- except Exception as e:
489
- logger.exception("LLM error")
490
- return jsonify({"error": f"LLM error: {e}"}), 500
491
- CONV.append((text, ans))
492
- push_display("YOU: " + (text[:60]))
493
- push_display("BOT: " + (ans[:60] if isinstance(ans, str) else str(ans)[:60]))
494
- # notify telegram optionally (short)
495
- if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
496
- try:
497
- send_telegram_message(f"You: {text}\nBot: {ans[:300]}")
498
- except Exception:
499
- logger.exception("telegram notify failed")
500
- return jsonify({"answer": ans})
501
- except Exception as e:
502
- logger.exception("route_ask failed")
503
- return jsonify({"error": str(e)}), 500
504
-
505
- @app.route("/tts", methods=["POST"])
506
- def route_tts():
507
- try:
508
- j = request.get_json(force=True) or {}
509
- text = (j.get("text","") or "").strip()
510
- if not text:
511
- return jsonify({"error":"no text"}), 400
512
- # try HF TTS model first
513
- try:
514
- # prefer HF_TTS_MODEL if set
515
- models = [HF_TTS_MODEL] if HF_TTS_MODEL else []
516
- audio_bytes = tts_get_bytes_hf(text, model_list=models if models else None)
517
- except Exception as e:
518
- logger.warning("TTS HF failed: %s", e)
519
- # fallback to gTTS if possible
520
- if _HAS_GTTS:
521
- try:
522
- audio_bytes = tts_get_bytes_hf(text, model_list=[])
523
- except Exception as e2:
524
- logger.exception("gTTS fallback also failed")
525
- return jsonify({"error": f"TTS failed: {e2}"}), 500
526
- else:
527
- return jsonify({"error": f"TTS failed: {e}"}), 500
528
- # return mp3
529
- return Response(audio_bytes, mimetype="audio/mpeg")
530
- except Exception as e:
531
- logger.exception("route_tts exception")
532
- return jsonify({"error": str(e)}), 500
533
-
534
- @app.route("/stt", methods=["POST"])
535
- def route_stt():
536
- try:
537
- if "file" in request.files:
538
- f = request.files["file"]
539
- audio_bytes = f.read()
540
- else:
541
- audio_bytes = request.get_data()
542
- if not audio_bytes:
543
- return jsonify({"error":"no audio provided"}), 400
544
- try:
545
- txt = stt_from_bytes_hf(audio_bytes)
546
- except Exception as e:
547
- logger.exception("STT failed")
548
- return jsonify({"error": str(e)}), 500
549
- CONV.append((f"[voice] {txt}", ""))
550
- push_display("VOICE: " + (txt[:60] if isinstance(txt,str) else str(txt)))
551
- return jsonify({"text": txt})
552
- except Exception as e:
553
- logger.exception("route_stt exception")
554
- return jsonify({"error": str(e)}), 500
555
-
556
- @app.route("/presence", methods=["POST"])
557
- def route_presence():
558
- try:
559
- j = request.get_json(force=True) or {}
560
- note = (j.get("note","Có người phía trước") or "Có người phía trước")
561
- greeting = f"Xin chào! {note}"
562
- CONV.append(("__presence__", greeting))
563
- push_display("RADAR: " + note[:60])
564
- # Telegram notify
565
- if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
566
- try:
567
- send_telegram_message(f"⚠️ Robot: Phát hiện người - {note}")
568
- except Exception:
569
- logger.exception("telegram notify error")
570
- # Return greeting audio if possible
571
- try:
572
- audio_bytes = tts_get_bytes_hf(greeting, model_list=[HF_TTS_MODEL] if HF_TTS_MODEL else None)
573
- return Response(audio_bytes, mimetype="audio/mpeg")
574
- except Exception:
575
- # fallback to text only
576
- return jsonify({"greeting": greeting})
577
- except Exception as e:
578
- logger.exception("presence error")
579
- return jsonify({"error": str(e)}), 500
580
-
581
- @app.route("/display", methods=["GET"])
582
- def route_display():
583
- return jsonify({"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONV)})
584
-
585
- @app.route("/config", methods=["GET","POST"])
586
- def route_config():
587
- # quick config view/change via JSON (NOT secure — for debug only)
588
- if request.method == "GET":
589
- return jsonify({
590
- "hf_token_set": bool(HF_TOKEN),
591
- "hf_model": HF_MODEL,
592
- "hf_tts_model": HF_TTS_MODEL,
593
- "hf_stt_model": HF_STT_MODEL,
594
- "telegram": bool(TELEGRAM_TOKEN and TELEGRAM_CHAT_ID)
595
- })
596
- else:
597
- try:
598
- j = request.get_json(force=True) or {}
599
- # we cannot change secrets here; just accept display config changes
600
- return jsonify({"ok": True, "received": j})
601
- except Exception as e:
602
- return jsonify({"error": str(e)}), 400
603
-
604
- # ---------- Gradio UI (optional) ----------
605
- def start_gradio_in_thread():
606
- if not _HAS_GRADIO:
607
- logger.info("Gradio not installed - skipping Gradio UI")
608
- return
609
- try:
610
- import gradio as gr
611
- def gradio_chat(audio, text, temp, max_tokens, model_override):
612
- user_text = (text or "").strip()
613
- if audio:
614
- # read path (gradio returns path)
615
- try:
616
- with open(audio, "rb") as f:
617
- b = f.read()
618
- stt = stt_from_bytes_hf(b)
619
- if stt and not stt.startswith("[ERROR]"):
620
- user_text = stt
621
- except Exception:
622
- logger.exception("gradio stt failed")
623
- if not user_text:
624
- return None, ""
625
- prompt = f"You are KC Robot AI, bilingual assistant. Answer in the same language as the user.\\nUser: {user_text}\\nAssistant:"
626
- model = model_override.strip() if model_override else HF_MODEL
627
- try:
628
- ans = llm_generate(prompt, model_override=model, max_new_tokens=int(max_tokens), temperature=float(temp))
629
- except Exception as e:
630
- ans = f"[LLM error] {e}"
631
- # generate tts bytes for preview
632
- try:
633
- audio_bytes = tts_get_bytes_hf(ans, model_list=[HF_TTS_MODEL] if HF_TTS_MODEL else None)
634
- except Exception:
635
- audio_bytes = None
636
- chat_history = [( "You", user_text ), ( "Bot", ans )]
637
- if audio_bytes:
638
- return (chat_history, (audio_bytes, "audio/mpeg"))
639
- return (chat_history, None)
640
-
641
- with gr.Blocks(title="KC Robot AI v7.2 (Gradio)") as demo:
642
- gr.Markdown("## KC Robot AI v7.2 Gradio UI")
643
- with gr.Row():
644
- with gr.Column(scale=2):
645
- chatbot = gr.Chatbot([], elem_id="chatbot").style(height=420)
646
- txt = gr.Textbox(lines=2, placeholder="Nhập câu (VN/EN)...", label="Text input")
647
- mic = gr.Audio(source="microphone", type="filepath", label="Record voice")
648
- btn = gr.Button("Send")
649
- with gr.Row():
650
- temp = gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
651
- tokens = gr.Slider(32, 1024, value=256, step=16, label="Max tokens")
652
- model_override = gr.Textbox(label="Model override (optional)", placeholder=HF_MODEL)
653
- with gr.Column(scale=1):
654
- gr.Markdown("### TTS / STT")
655
- tts_in = gr.Textbox(lines=2, label="Text → TTS")
656
- tts_btn = gr.Button("Create TTS")
657
- tts_audio = gr.Audio(label="TTS audio", interactive=False)
658
- up = gr.Audio(source="upload", type="filepath", label="Upload audio")
659
- stt_btn = gr.Button("Transcribe")
660
- stt_out = gr.Textbox(label="Transcription")
661
- def send_click(audio_file, typed, temp_v, max_toks, model_o, chat_history):
662
- # reuse gradio_chat
663
- result = gradio_chat(audio_file, typed, temp_v, max_toks, model_o)
664
- if result is None:
665
- return chat_history or [], ""
666
- (chat_hist, audio_blob) = result
667
- history = chat_history or []
668
- # append
669
- for item in chat_hist:
670
- history.append(item)
671
- return history, ""
672
- btn.click(send_click, inputs=[mic, txt, temp, tokens, model_override, chatbot], outputs=[chatbot, txt])
673
- tts_btn.click(lambda txt_in: tts_get_bytes_hf(txt_in) if txt_in else None, inputs=[tts_in], outputs=[tts_audio])
674
-
675
- # Run gradio in new thread
676
- def _run():
677
- try:
678
- demo.launch(server_name="0.0.0.0", server_port=GRADIO_PORT, share=False, prevent_thread_lock=True)
679
- except Exception:
680
- logger.exception("Gradio failed to launch")
681
- t = threading.Thread(target=_run, daemon=True)
682
- t.start()
683
- logger.info("Gradio thread started on port %s", GRADIO_PORT)
684
- except Exception:
685
- logger.exception("start_gradio_in_thread failed")
686
-
687
- # start gradio if available
688
- start_gradio_in_thread()
689
-
690
- # ---------- Run app ----------
691
  if __name__ == "__main__":
692
- logger.info("KC Robot AI v7.2 starting on port %s", PORT)
693
- if not HF_TOKEN:
694
- logger.warning("HF_TOKEN not set. Add HF_TOKEN to Space Secrets.")
695
- if not HF_MODEL:
696
- logger.warning("HF_MODEL not set. Add HF_MODEL to Space Secrets.")
697
- # start flask
698
- app.run(host="0.0.0.0", port=PORT)
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ # ==========================================================
4
+ # KC ROBOT AI - APP.PY (V2.0 MAX FINAL)
5
+ # Cloud AI Robot with Gemini 2.5 Flash + ESP32 + Telegram
6
+ # ==========================================================
 
 
 
 
 
 
7
 
8
+ from flask import Flask, request, jsonify, render_template_string
9
+ from google import genai
10
  import requests
11
+ import os
12
+ import time
13
+ from gtts import gTTS
14
+ from langdetect import detect
15
+ import tempfile
16
+ import base64
17
+
18
+ # ==========================================================
19
+ # CONFIGURATION
20
+ # ==========================================================
21
+
22
+ # Load environment variables from secrets (Cloud Run or Hugging Face)
23
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
+ GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
25
+ TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
26
+ TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID")
27
+
28
+ # Create Flask app
29
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # ==========================================================
32
+ # SETUP GEMINI CLIENT
33
+ # ==========================================================
34
+ if not GEMINI_API_KEY:
35
+ print("❌ ERROR: No Gemini API Key found. Please add GEMINI_API_KEY in Secrets.")
36
+ client = None
37
+ else:
38
+ client = genai.Client(api_key=GEMINI_API_KEY)
39
+
40
+ # ==========================================================
41
+ # TELEGRAM UTILITIES
42
+ # ==========================================================
43
+ def send_telegram_message(text):
44
+ if not TELEGRAM_TOKEN or not TELEGRAM_CHAT_ID:
45
+ print("⚠️ Telegram not configured.")
46
+ return
47
+ url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
48
+ payload = {"chat_id": TELEGRAM_CHAT_ID, "text": text}
49
  try:
50
+ requests.post(url, json=payload, timeout=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
+ print("Telegram Error:", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ # ==========================================================
55
+ # GEMINI AI RESPONSE
56
+ # ==========================================================
57
+ def ask_gemini(prompt: str):
58
+ if not client:
59
+ return "⚠️ Gemini API key missing. Please configure in Secrets."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
 
 
 
 
61
  try:
62
+ response = client.models.generate_content(
63
+ model=GEMINI_MODEL,
64
+ contents=prompt
65
+ )
66
+ if hasattr(response, "text"):
67
+ return response.text.strip()
68
+ elif "text" in response:
69
+ return response["text"].strip()
70
+ else:
71
+ return "⚠️ No response text from Gemini."
72
+ except Exception as e:
73
+ print("Gemini Error:", e)
74
+ return f"⚠️ Gemini Error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ # ==========================================================
77
+ # LANGUAGE DETECTION & TTS
78
+ # ==========================================================
79
+ def text_to_speech(text):
80
  try:
81
+ lang = detect(text)
82
+ if lang not in ["vi", "en"]:
83
+ lang = "en"
84
+ tts = gTTS(text=text, lang=lang)
85
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
86
+ tts.save(tmp.name)
87
+ with open(tmp.name, "rb") as f:
88
+ audio_b64 = base64.b64encode(f.read()).decode("utf-8")
89
+ os.unlink(tmp.name)
90
+ return audio_b64
91
+ except Exception as e:
92
+ print("TTS Error:", e)
93
+ return None
94
+
95
+ # ==========================================================
96
+ # SIMPLE HTML INTERFACE (for testing)
97
+ # ==========================================================
98
+ HTML_PAGE = """
99
+ <!DOCTYPE html>
100
  <html>
101
  <head>
102
+ <title>KC Robot AI v2.0</title>
 
 
103
  <style>
104
+ body { font-family: Arial; text-align: center; background-color: #101010; color: white; }
105
+ input, button { padding: 10px; font-size: 16px; margin: 5px; }
106
+ #chat { max-width: 700px; margin: auto; text-align: left; background: #202020; padding: 20px; border-radius: 10px; }
107
+ .msg-user { color: #4af; }
108
+ .msg-bot { color: #fa4; margin-left: 20px; }
109
+ audio { margin-top: 10px; }
 
 
 
 
 
 
 
110
  </style>
111
  </head>
112
  <body>
113
+ <h1>🤖 KC Robot AI v2.0 MAX FINAL</h1>
114
+ <div id="chat"></div>
115
+ <br>
116
+ <input id="user_input" placeholder="Nói gì đó..." style="width:60%">
117
+ <button onclick="sendMessage()">Gửi</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  <script>
120
+ async function sendMessage() {
121
+ const input = document.getElementById("user_input").value;
122
+ if (!input) return;
123
+ const chat = document.getElementById("chat");
124
+ chat.innerHTML += `<div class='msg-user'><b>Bạn:</b> ${input}</div>`;
125
+ document.getElementById("user_input").value = "";
126
+ const res = await fetch("/api/chat", {
127
+ method: "POST",
128
+ headers: {"Content-Type": "application/json"},
129
+ body: JSON.stringify({message: input})
130
+ });
131
+ const data = await res.json();
132
+ chat.innerHTML += `<div class='msg-bot'><b>Robot:</b> ${data.reply}</div>`;
133
+ if (data.audio) {
134
+ const audio = document.createElement("audio");
135
+ audio.src = "data:audio/mp3;base64," + data.audio;
136
+ audio.controls = true;
137
+ chat.appendChild(audio);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
+ chat.scrollTop = chat.scrollHeight;
140
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  </script>
142
  </body>
143
  </html>
144
  """
145
 
146
+ @app.route("/")
147
+ def home():
148
+ return render_template_string(HTML_PAGE)
149
+
150
+ # ==========================================================
151
+ # API ENDPOINTS
152
+ # ==========================================================
153
+
154
+ @app.route("/api/chat", methods=["POST"])
155
+ def api_chat():
156
+ data = request.get_json()
157
+ if not data or "message" not in data:
158
+ return jsonify({"error": "Missing 'message'"}), 400
159
+
160
+ user_message = data["message"]
161
+ print(f"🧠 User said: {user_message}")
162
+ send_telegram_message(f"User: {user_message}")
163
+
164
+ ai_reply = ask_gemini(user_message)
165
+ send_telegram_message(f"Robot: {ai_reply}")
166
+
167
+ audio_b64 = text_to_speech(ai_reply)
168
+ return jsonify({"reply": ai_reply, "audio": audio_b64})
169
+
170
+ # ESP32 sensor endpoint
171
+ @app.route("/api/sensor", methods=["POST"])
172
+ def sensor_data():
173
+ data = request.get_json()
174
+ if not data:
175
+ return jsonify({"error": "No data"}), 400
176
+ msg = f"👁️ ESP32 Sensor update: {data}"
177
+ send_telegram_message(msg)
178
+ return jsonify({"status": "received"})
179
+
180
+ # Health check
181
+ @app.route("/ping")
182
+ def ping():
183
+ return jsonify({"status": "ok", "model": GEMINI_MODEL})
184
+
185
+ # ==========================================================
186
+ # MAIN ENTRY POINT
187
+ # ==========================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  if __name__ == "__main__":
189
+ port = int(os.getenv("PORT", 8080))
190
+ print(f"🚀 KC Robot AI v2.0 running on port {port}")
191
+ app.run(host="0.0.0.0", port=port)
192
+