kcrobot25 commited on
Commit
04c3137
·
verified ·
1 Parent(s): d1eb143

initial commit

Browse files
Files changed (1) hide show
  1. app.py +176 -315
app.py CHANGED
@@ -1,88 +1,57 @@
1
 
2
  # app.py — KC Robot AI V7.4 MAX FINAL
3
- # 2025 Final build for ESP32 robot brain (Hugging Face inference + Telegram + Flask UI)
4
- #
5
- # REQUIRED Secrets / Environment variables:
6
- # HF_TOKEN -> Hugging Face API token (read/write ideally)
7
- # HF_MODEL -> Model id (e.g. kcrobot40/kc40ai or mistralai/Mistral-7B-Instruct-v0.3)
8
- # TELEGRAM_TOKEN -> Telegram bot token (optional but recommended)
9
- # TELEGRAM_CHAT_ID -> Telegram chat id (optional)
10
  # Optional:
11
- # HF_TTS_MODEL -> HF TTS model id (if available)
12
- # HF_STT_MODEL -> HF STT model id (default openai/whisper-small)
13
- #
14
- # Endpoints:
15
- # GET / -> UI
16
- # GET /health -> status
17
- # POST /ask -> JSON {text, lang?} -> {"answer": "..."}
18
- # POST /tts -> JSON {text} -> audio/mp3
19
- # POST /stt -> multipart/form-data file or raw bytes -> {"text": "..."}
20
- # POST /presence -> JSON {note?} -> greeting (and audio if TTS available)
21
- # GET /display -> JSON lines for OLED/HUB75
22
- # GET /model_check -> check HF token & model (help debug 403/404)
23
- #
24
- # Notes:
25
- # - This file aims to be robust: good error messages for 403/404/400 and helpful logs.
26
- # - TTS fallback uses gTTS when HF TTS isn't configured or fails.
27
- # - Telegram integration uses direct requests to Telegram API (no extra dependency).
28
- # - Saves conversation to conversation_log.jsonl for future analysis.
29
- #
30
- # Author: KC Robot helper (assistant)
31
- # Version: v7.4-max-final
32
 
33
  import os
34
  import io
35
  import sys
36
- import json
37
  import time
 
38
  import uuid
39
- import queue
40
  import logging
41
  import threading
42
- from typing import Any, Dict, List, Tuple, Optional
43
  from pathlib import Path
44
 
45
  import requests
46
- from flask import Flask, request, jsonify, render_template_string, Response, send_file
47
 
48
- # Optional TTS fallback
49
  try:
50
  from gtts import gTTS
51
  _HAS_GTTS = True
52
  except Exception:
53
  _HAS_GTTS = False
54
 
55
- # Optional Gradio (we won't force it)
56
- try:
57
- import gradio as gr # noqa: F401
58
- _HAS_GRADIO = True
59
- except Exception:
60
- _HAS_GRADIO = False
61
-
62
  # Logging
63
- logging.basicConfig(stream=sys.stdout, level=logging.INFO,
 
64
  format="%(asctime)s %(levelname)s %(name)s: %(message)s")
65
  logger = logging.getLogger("kcrobot.v7.4")
66
 
67
- # ===== Environment / secrets =====
68
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
69
- HF_MODEL = os.getenv("HF_MODEL", "").strip()
70
- HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "").strip() # optional
71
  HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
72
 
73
  TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
74
  TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID", "").strip()
75
 
76
  PORT = int(os.getenv("PORT", 7860))
77
- # HF headers
78
  HF_HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
79
 
80
- # tmp dir and conversation log
81
  TMPDIR = Path("/tmp/kcrobot") if os.name != "nt" else Path.cwd() / "tmp_kcrobot"
82
  TMPDIR.mkdir(parents=True, exist_ok=True)
83
  CONV_LOG = TMPDIR / "conversation_log.jsonl"
84
 
85
- # ===== in-memory buffers =====
86
  CONVERSATION: List[Tuple[str, str]] = []
87
  DISPLAY_BUFFER: List[str] = []
88
  DISPLAY_LIMIT = 6
@@ -93,241 +62,190 @@ def push_display(line: str):
93
  if len(DISPLAY_BUFFER) > DISPLAY_LIMIT:
94
  DISPLAY_BUFFER = DISPLAY_BUFFER[-DISPLAY_LIMIT:]
95
 
96
- def save_conversation(user: str, bot: str):
97
  try:
98
  with open(CONV_LOG, "a", encoding="utf-8") as f:
99
  f.write(json.dumps({"time": time.time(), "user": user, "bot": bot}, ensure_ascii=False) + "\n")
100
  except Exception:
101
- logger.exception("Failed to write conversation log")
102
 
103
- # ===== language detection & cleaning =====
104
- VI_CHARS = set("ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụứừửữựýỳỷỹỵ")
105
-
106
- def detect_language(text: str) -> str:
107
- """Simple heuristic: returns 'vi' or 'en'."""
108
- if not text:
109
- return "en"
110
- for ch in text.lower():
111
- if ch in VI_CHARS:
112
- return "vi"
113
- return "en"
114
-
115
- import re
116
-
117
- def clean_input_text(text: str) -> str:
118
- """Normalize whitespace and remove control chars; keep punctuation."""
119
  if not isinstance(text, str):
120
  text = str(text)
121
- # remove undesirable control characters
 
122
  text = re.sub(r'[\x00-\x08\x0b-\x0c\x0e-\x1f]+', ' ', text)
123
- # normalize whitespace
124
  text = re.sub(r'\s+', ' ', text).strip()
125
  return text
126
 
127
- def tts_postprocess_text_for_prosody(text: str) -> str:
128
- """
129
- Improve TTS naturalness for gTTS fallback by inserting small pauses:
130
- - ensure sentences end with a period/exclamation/question
131
- - add commas in long sentences to encourage small pauses
132
- This is a lightweight heuristic; HF TTS models may produce better prosody.
133
- """
134
- text = text.strip()
135
- if not text:
136
- return text
137
- # ensure terminal punctuation
138
- if text[-1] not in ".!?":
139
- text = text + "."
140
- # insert commas for long stretches (every ~12-16 words) to help gTTS
141
- words = text.split()
142
- if len(words) > 16:
143
- chunks = []
144
- for i in range(0, len(words), 12):
145
- chunks.append(" ".join(words[i:i+12]))
146
- text = ", ".join(chunks)
147
- if text[-1] not in ".!?":
148
- text = text + "."
149
- return text
150
 
151
- # ===== Hugging Face helpers =====
152
  def hf_post_json(model_id: str, payload: dict, timeout: int = 90) -> requests.Response:
153
  if not HF_TOKEN:
154
  raise RuntimeError("HF_TOKEN not configured")
155
  url = f"https://api-inference.huggingface.co/models/{model_id}"
156
  headers = dict(HF_HEADERS)
157
  headers["Content-Type"] = "application/json"
158
- logger.debug("HF POST JSON -> %s payload keys: %s", model_id, list(payload.keys()))
159
- r = requests.post(url, headers=headers, json=payload, timeout=timeout)
160
- return r
161
-
162
- def hf_try_models_json(models: List[str], payload: dict, timeout: int = 90) -> Tuple[str, Any]:
163
- """Try each model in models; return (model_used, parsed_json_or_bytes)."""
164
- last_err = None
165
- for m in models:
166
- try:
167
- r = hf_post_json(m, payload, timeout=timeout)
168
- except Exception as e:
169
- last_err = f"network error for {m}: {e}"
170
- logger.warning(last_err)
171
- continue
172
- logger.debug("HF status for %s = %s", m, r.status_code)
173
- if r.status_code == 200:
174
- try:
175
- return m, r.json()
176
- except Exception:
177
- return m, r.content
178
- else:
179
- last_err = f"HTTP {r.status_code} for {m}: {r.text[:300]}"
180
- logger.warning(last_err)
181
- # continue to next model
182
- raise RuntimeError(f"All HF attempts failed. Last error: {last_err}")
183
 
184
- def hf_post_bytes(model_id: str, data: bytes, content_type: str = "application/octet-stream", timeout: int = 120) -> requests.Response:
185
  if not HF_TOKEN:
186
  raise RuntimeError("HF_TOKEN not configured")
187
  url = f"https://api-inference.huggingface.co/models/{model_id}"
188
  headers = dict(HF_HEADERS)
189
  headers["Content-Type"] = content_type
190
- r = requests.post(url, headers=headers, data=data, timeout=timeout)
191
- return r
192
-
193
- def hf_try_models_bytes(models: List[str], data: bytes, content_type: str = "application/octet-stream", timeout: int = 120) -> Tuple[str, requests.Response]:
194
- last_err = None
195
- for m in models:
196
- try:
197
- r = hf_post_bytes(m, data, content_type=content_type, timeout=timeout)
198
- except Exception as e:
199
- last_err = f"network error for {m}: {e}"
200
- logger.warning(last_err)
201
- continue
202
- if r.status_code == 200:
203
- return m, r
204
- else:
205
- last_err = f"HTTP {r.status_code} for {m}: {r.text[:300]}"
206
- logger.warning(last_err)
207
- raise RuntimeError(f"All HF byte-post attempts failed. Last error: {last_err}")
208
 
209
  def parse_hf_text_output(obj: Any) -> str:
210
- """Normalize HF text output shapes."""
211
  try:
212
  if isinstance(obj, dict):
213
  if "generated_text" in obj:
214
- return obj.get("generated_text", "")
215
  if "text" in obj:
216
- return obj.get("text", "")
217
  if "choices" in obj and isinstance(obj["choices"], list) and obj["choices"]:
218
  c = obj["choices"][0]
219
- return c.get("text") or c.get("message", {}).get("content", "") or str(c)
220
- # fallback
221
  return json.dumps(obj, ensure_ascii=False)
222
  if isinstance(obj, list) and obj:
223
  first = obj[0]
224
  if isinstance(first, dict):
225
- for k in ("generated_text", "text"):
226
  if k in first:
227
- return first.get(k, "")
228
  return str(first)
229
  return str(obj)
230
  except Exception:
231
- logger.exception("parse_hf_text_output error")
232
  return str(obj)
233
 
234
- # High-level wrappers
235
  def hf_text_generate(prompt: str, model_override: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
236
- models = []
237
- if model_override:
238
- models.append(model_override)
239
- elif HF_MODEL:
240
- models.append(HF_MODEL)
241
- else:
242
  raise RuntimeError("HF_MODEL not configured")
243
  payload = {
244
  "inputs": prompt,
245
  "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)},
246
  "options": {"wait_for_model": True}
247
  }
248
- model_used, out = hf_try_models_json(models, payload, timeout=120)
249
- return parse_hf_text_output(out)
250
-
251
- def hf_stt_from_bytes(bytes_data: bytes, model_override: Optional[str] = None) -> str:
252
- models = [model_override] if model_override else ([HF_STT_MODEL] if HF_STT_MODEL else [])
253
- if not models:
254
- raise RuntimeError("STT model not configured")
255
- model_used, resp = hf_try_models_bytes(models, bytes_data, content_type="application/octet-stream", timeout=180)
256
- try:
257
- j = resp.json()
258
- if isinstance(j, dict) and "text" in j:
259
- return j["text"]
260
- return parse_hf_text_output(j)
261
- except Exception:
262
- return resp.text or ""
263
-
264
- def hf_tts_get_bytes(text: str, model_list: Optional[List[str]] = None) -> bytes:
265
- """Try HF TTS models (if set) then fallback to gTTS."""
266
- txt = text.strip()
267
- if not txt:
268
- raise RuntimeError("Empty text for TTS")
269
- models = model_list if model_list else ([HF_TTS_MODEL] if HF_TTS_MODEL else [])
270
- if models:
271
- payload = {"inputs": txt}
272
  try:
273
- model_used, resp = hf_try_models_json(models, payload, timeout=120)
274
- # some HF TTS returns binary audio directly; requests gives .content
275
- if isinstance(resp, (bytes, bytearray)):
276
- return bytes(resp)
277
- # if json with base64? unlikely; try to detect
278
- if isinstance(resp, dict) and "error" in resp:
279
- raise RuntimeError(f"HF TTS model error: {resp.get('error')}")
280
- # if response is binary streamed we would have returned earlier; fallback:
281
- except Exception as e:
282
- logger.warning("HF TTS failed: %s", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # fallback gTTS
284
  if _HAS_GTTS:
285
  try:
286
- prosody = tts_postprocess_text_for_prosody(txt)
287
- tts = gTTS(text=prosody, lang="vi" if detect_language(txt) == "vi" else "en")
 
 
 
 
 
 
 
 
 
288
  bio = io.BytesIO()
289
  tts.write_to_fp(bio)
290
  bio.seek(0)
291
  return bio.read()
292
- except Exception as e:
293
  logger.exception("gTTS fallback failed")
294
- raise RuntimeError(f"TTS fallback failed: {e}")
295
  raise RuntimeError("No TTS available (no HF_TTS_MODEL and gTTS not installed)")
296
 
297
- # ===== Telegram helpers (simple requests) =====
298
  def telegram_send_message(chat_id: str, text: str) -> bool:
299
  if not TELEGRAM_TOKEN or not chat_id:
300
- logger.debug("telegram not configured")
301
  return False
302
- url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
303
  try:
 
304
  r = requests.post(url, json={"chat_id": chat_id, "text": text}, timeout=8)
305
  if r.status_code != 200:
306
  logger.warning("Telegram sendMessage failed %s: %s", r.status_code, r.text[:300])
307
  return False
308
  return True
309
  except Exception:
310
- logger.exception("telegram_send_message exception")
311
  return False
312
 
313
  def telegram_send_audio(chat_id: str, audio_bytes: bytes, filename: str = "reply.mp3") -> bool:
314
  if not TELEGRAM_TOKEN or not chat_id:
315
- logger.debug("telegram not configured")
316
  return False
317
- url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendAudio"
318
- files = {"audio": (filename, io.BytesIO(audio_bytes), "audio/mpeg")}
319
- data = {"chat_id": chat_id}
320
  try:
 
 
 
321
  r = requests.post(url, files=files, data=data, timeout=30)
322
  if r.status_code != 200:
323
  logger.warning("Telegram sendAudio failed %s: %s", r.status_code, r.text[:300])
324
  return False
325
  return True
326
  except Exception:
327
- logger.exception("telegram_send_audio exception")
328
  return False
329
 
330
- # Telegram poller (long-polling)
331
  def telegram_poller_loop():
332
  if not TELEGRAM_TOKEN:
333
  logger.info("Telegram token not set; poller disabled")
@@ -359,11 +277,11 @@ def telegram_poller_loop():
359
  if lower.startswith("/ask "):
360
  q = text[5:].strip()
361
  try:
362
- ans = hf_text_generate(q)
363
  except Exception as e:
364
- ans = f"[HF error] {e}"
365
  try:
366
- requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
367
  except Exception:
368
  logger.exception("tg reply failed")
369
  elif lower.startswith("/say "):
@@ -384,21 +302,19 @@ def telegram_poller_loop():
384
  except Exception:
385
  pass
386
  except Exception:
387
- logger.exception("telegram poller loop crashed, sleeping 3s")
388
  time.sleep(3)
389
 
390
- # Start telegram thread if token exists
391
  if TELEGRAM_TOKEN:
392
  try:
393
  t = threading.Thread(target=telegram_poller_loop, daemon=True)
394
  t.start()
395
  except Exception:
396
- logger.exception("Failed to start telegram poller thread")
397
 
398
- # ===== Flask app & routes =====
399
  app = Flask(__name__)
400
 
401
- # Simple responsive UI (mobile friendly) - shows model & buttons
402
  INDEX_HTML = """
403
  <!doctype html>
404
  <html>
@@ -409,7 +325,7 @@ INDEX_HTML = """
409
  <style>
410
  body{font-family:Arial,Helvetica,sans-serif;margin:12px;color:#111}
411
  .box{max-width:900px;margin:auto}
412
- textarea{width:100%;height:80px;padding:10px;font-size:16px;border-radius:8px;border:1px solid #ddd}
413
  button{padding:10px 14px;margin:6px 4px;border-radius:8px;background:#0b74de;color:white;border:none;cursor:pointer;font-weight:700}
414
  #chat{border:1px solid #eee;padding:10px;height:320px;overflow:auto;background:#fafafa;border-radius:8px}
415
  .you{color:#0b63d6;margin:6px 0}
@@ -494,7 +410,7 @@ def health():
494
  def route_ask():
495
  try:
496
  j = request.get_json(force=True) or {}
497
- text = clean_input_text(j.get("text","") or "")
498
  lang = (j.get("lang","auto") or "auto")
499
  if not text:
500
  return jsonify({"error":"no text"}), 400
@@ -510,10 +426,10 @@ def route_ask():
510
  logger.exception("hf_text_generate failed")
511
  return jsonify({"error": str(e)}), 500
512
  CONVERSATION.append((text, ans))
513
- save_conversation(text, ans)
514
  push_display("YOU: " + (text[:60]))
515
  push_display("BOT: " + (ans[:60] if isinstance(ans,str) else str(ans)[:60]))
516
- # send short notification to telegram optionally (not full conv)
517
  if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
518
  try:
519
  telegram_send_message(TELEGRAM_CHAT_ID, f"You: {text}\nBot: {ans[:300]}")
@@ -528,14 +444,13 @@ def route_ask():
528
  def route_tts():
529
  try:
530
  j = request.get_json(force=True) or {}
531
- text = clean_input_text(j.get("text","") or "")
532
  if not text:
533
  return jsonify({"error":"no text"}), 400
534
  try:
535
- models = [HF_TTS_MODEL] if HF_TTS_MODEL else None
536
- audio_bytes = hf_tts_get_bytes(text, model_list=models)
537
  except Exception as e:
538
- logger.warning("TTS error, returning textual error: %s", e)
539
  return jsonify({"error": str(e)}), 500
540
  return Response(audio_bytes, mimetype="audio/mpeg")
541
  except Exception as e:
@@ -558,7 +473,7 @@ def route_stt():
558
  logger.exception("STT failed")
559
  return jsonify({"error": str(e)}), 500
560
  CONVERSATION.append((f"[voice] {txt}", ""))
561
- save_conversation(f"[voice] {txt}", "")
562
  push_display("VOICE: " + (txt[:60] if isinstance(txt,str) else str(txt)))
563
  return jsonify({"text": txt})
564
  except Exception as e:
@@ -567,25 +482,27 @@ def route_stt():
567
 
568
  @app.route("/presence", methods=["POST"])
569
  def route_presence():
 
 
 
 
 
570
  try:
571
  j = request.get_json(force=True) or {}
572
- note = clean_input_text(j.get("note","Có người phía trước") or "Có người phía trước")
573
  greeting = f"Xin chào! {note}"
574
  CONVERSATION.append(("__presence__", greeting))
575
- save_conversation("__presence__", greeting)
576
  push_display("RADAR: " + note[:60])
577
- # Telegram notify
578
  if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
579
  try:
580
  telegram_send_message(TELEGRAM_CHAT_ID, f"⚠️ Robot: Phát hiện người - {note}")
581
  except Exception:
582
  logger.exception("telegram notify failed")
583
- # return greeting audio if possible
584
  try:
585
- audio_bytes = hf_tts_get_bytes(greeting, model_list=[HF_TTS_MODEL] if HF_TTS_MODEL else None)
586
  return Response(audio_bytes, mimetype="audio/mpeg")
587
  except Exception:
588
- # fallback: return text greeting
589
  return jsonify({"greeting": greeting})
590
  except Exception as e:
591
  logger.exception("route_presence exception")
@@ -596,110 +513,54 @@ def route_display():
596
  return jsonify({"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONVERSATION)})
597
 
598
  @app.route("/model_check", methods=["GET"])
599
- def route_model_check():
600
- """Quick check for HF_TOKEN + HF_MODEL: calls model with a tiny ping prompt."""
 
 
 
601
  if not HF_TOKEN:
602
  return jsonify({"ok": False, "error": "HF_TOKEN not set"}), 400
603
  if not HF_MODEL:
604
  return jsonify({"ok": False, "error": "HF_MODEL not set"}), 400
605
- test_prompt = "Hello model. Respond with 'OK' only."
 
606
  try:
607
- payload = {"inputs": test_prompt, "parameters": {"max_new_tokens": 8}, "options": {"wait_for_model": True}}
608
- r = requests.post(f"https://api-inference.huggingface.co/models/{HF_MODEL}", headers={**HF_HEADERS, "Content-Type": "application/json"}, json=payload, timeout=30)
609
- if r.status_code == 200:
610
- try:
611
- j = r.json()
612
- txt = parse_hf_text_output(j)
613
- except Exception:
614
- txt = r.text[:400]
615
- return jsonify({"ok": True, "model": HF_MODEL, "response_preview": txt})
616
- else:
617
- return jsonify({"ok": False, "status_code": r.status_code, "text": r.text[:800]}), 500
618
  except Exception as e:
619
  logger.exception("model_check failed")
620
  return jsonify({"ok": False, "error": str(e)}), 500
621
 
622
- # ===== Optional: start a Gradio UI in background to give extra interface (if installed) =====
623
- def start_gradio_thread():
624
- if not _HAS_GRADIO:
625
- logger.info("Gradio not installed; skipping Gradio UI")
626
- return
 
 
 
 
 
627
  try:
628
- import gradio as gr
629
- def gr_chat(audio, text, temp, max_tokens, model_override):
630
- user_text = (text or "").strip()
631
- if audio:
632
- # audio is path to file
633
- try:
634
- with open(audio, "rb") as f:
635
- b = f.read()
636
- stt = hf_stt_from_bytes(b)
637
- if stt and not stt.startswith("[ERROR]"):
638
- user_text = stt
639
- except Exception:
640
- logger.exception("gradio stt failed")
641
- if not user_text:
642
- return None, ""
643
- prompt = f"You are KC Robot AI, bilingual assistant. Answer in same language. User: {user_text}\nAssistant:"
644
- model = model_override.strip() if model_override else HF_MODEL
645
- try:
646
- ans = hf_text_generate(prompt, model_override=model, max_new_tokens=int(max_tokens), temperature=float(temp))
647
- except Exception as e:
648
- ans = f"[LLM error] {e}"
649
- # tts preview
650
- try:
651
- audio_bytes = hf_tts_get_bytes(ans, model_list=[HF_TTS_MODEL] if HF_TTS_MODEL else None)
652
- except Exception:
653
- audio_bytes = None
654
- history = [("You", user_text), ("Bot", ans)]
655
- return history, (audio_bytes, "audio/mpeg") if audio_bytes else None
656
-
657
- with gr.Blocks(title="KC Robot AI v7.4 Gradio UI") as demo:
658
- gr.Markdown("## KC Robot AI v7.4 — Gradio")
659
- with gr.Row():
660
- with gr.Column():
661
- chatbot = gr.Chatbot([], elem_id="chatbot").style(height=420)
662
- txt = gr.Textbox(lines=2, placeholder="Nhập câu (VN/EN)...")
663
- mic = gr.Audio(source="microphone", type="filepath")
664
- btn = gr.Button("Send")
665
- with gr.Row():
666
- temp = gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
667
- tokens = gr.Slider(32, 1024, value=256, step=16, label="Max tokens")
668
- model_override = gr.Textbox(label="Model override (optional)", placeholder=HF_MODEL)
669
- with gr.Column():
670
- gr.Markdown("### TTS / STT")
671
- tts_in = gr.Textbox(lines=2, label="Text → TTS")
672
- tts_btn = gr.Button("Create TTS")
673
- tts_audio = gr.Audio(label="TTS audio", interactive=False)
674
- up = gr.Audio(source="upload", type="filepath")
675
- stt_btn = gr.Button("Transcribe")
676
- stt_out = gr.Textbox(label="Transcription")
677
- def send_click(audio_file, typed, temp_v, max_toks, model_o, chat_history):
678
- result = gr_chat(audio_file, typed, temp_v, max_toks, model_o)
679
- if result is None:
680
- return chat_history or [], ""
681
- chat_hist, audio_blob = result
682
- history = chat_history or []
683
- for it in chat_hist:
684
- history.append(it)
685
- return history, ""
686
- btn.click(send_click, inputs=[mic, txt, temp, tokens, model_override, chatbot], outputs=[chatbot, txt])
687
- tts_btn.click(lambda t: hf_tts_get_bytes(t) if t else None, inputs=[tts_in], outputs=[tts_audio])
688
-
689
- def _run():
690
- try:
691
- demo.launch(server_name="0.0.0.0", server_port=7861, share=False, prevent_thread_lock=True)
692
- except Exception:
693
- logger.exception("Gradio launch failed")
694
- t = threading.Thread(target=_run, daemon=True)
695
- t.start()
696
- logger.info("Gradio UI started on port 7861")
697
- except Exception:
698
- logger.exception("start_gradio_thread failed")
699
-
700
- start_gradio_thread()
701
 
702
- # ===== main =====
703
  if __name__ == "__main__":
704
  logger.info("KC Robot AI v7.4 starting. HF_MODEL=%s HF_TTS=%s HF_STT=%s Telegram=%s",
705
  HF_MODEL or "(not set)", HF_TTS_MODEL or "(not set)", HF_STT_MODEL or "(not set)", bool(TELEGRAM_TOKEN and TELEGRAM_CHAT_ID))
@@ -707,5 +568,5 @@ if __name__ == "__main__":
707
  logger.warning("HF_TOKEN not set — set Secret HF_TOKEN to use Hugging Face inference.")
708
  if not HF_MODEL:
709
  logger.warning("HF_MODEL not set — set Secret HF_MODEL to a model id.")
710
- # run Flask
711
  app.run(host="0.0.0.0", port=PORT)
 
1
 
2
  # app.py — KC Robot AI V7.4 MAX FINAL
3
+ # Flask + Hugging Face inference + gTTS fallback + Telegram poller + ESP32 endpoints
4
+ # Secrets expected (in HF Space Settings -> Secrets):
5
+ # HF_TOKEN, HF_MODEL, TELEGRAM_TOKEN (optional), TELEGRAM_CHAT_ID (optional)
 
 
 
 
6
  # Optional:
7
+ # HF_TTS_MODEL, HF_STT_MODEL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  import os
10
  import io
11
  import sys
 
12
  import time
13
+ import json
14
  import uuid
 
15
  import logging
16
  import threading
17
+ from typing import Any, List, Tuple, Optional
18
  from pathlib import Path
19
 
20
  import requests
21
+ from flask import Flask, request, jsonify, Response, render_template_string, send_file
22
 
23
+ # Optional tts fallback
24
  try:
25
  from gtts import gTTS
26
  _HAS_GTTS = True
27
  except Exception:
28
  _HAS_GTTS = False
29
 
 
 
 
 
 
 
 
30
  # Logging
31
+ logging.basicConfig(stream=sys.stdout,
32
+ level=logging.INFO,
33
  format="%(asctime)s %(levelname)s %(name)s: %(message)s")
34
  logger = logging.getLogger("kcrobot.v7.4")
35
 
36
+ # Environment / Secrets
37
  HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
38
+ HF_MODEL = os.getenv("HF_MODEL", "").strip() # required
39
+ HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "").strip() # optional
40
  HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
41
 
42
  TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
43
  TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID", "").strip()
44
 
45
  PORT = int(os.getenv("PORT", 7860))
46
+
47
  HF_HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
48
 
49
+ # tmp dir & logs
50
  TMPDIR = Path("/tmp/kcrobot") if os.name != "nt" else Path.cwd() / "tmp_kcrobot"
51
  TMPDIR.mkdir(parents=True, exist_ok=True)
52
  CONV_LOG = TMPDIR / "conversation_log.jsonl"
53
 
54
+ # in-memory buffers
55
  CONVERSATION: List[Tuple[str, str]] = []
56
  DISPLAY_BUFFER: List[str] = []
57
  DISPLAY_LIMIT = 6
 
62
  if len(DISPLAY_BUFFER) > DISPLAY_LIMIT:
63
  DISPLAY_BUFFER = DISPLAY_BUFFER[-DISPLAY_LIMIT:]
64
 
65
+ def save_conv(user: str, bot: str):
66
  try:
67
  with open(CONV_LOG, "a", encoding="utf-8") as f:
68
  f.write(json.dumps({"time": time.time(), "user": user, "bot": bot}, ensure_ascii=False) + "\n")
69
  except Exception:
70
+ logger.exception("save_conv failed")
71
 
72
+ # small text helpers
73
+ def clean_text(text: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  if not isinstance(text, str):
75
  text = str(text)
76
+ # remove control chars except usual punctuation, normalize spaces
77
+ import re
78
  text = re.sub(r'[\x00-\x08\x0b-\x0c\x0e-\x1f]+', ' ', text)
 
79
  text = re.sub(r'\s+', ' ', text).strip()
80
  return text
81
 
82
+ VI_CHARS = set("ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụứừửữựýỳỷỹỵ")
83
+ def detect_language(text: str) -> str:
84
+ t = (text or "").lower()
85
+ for ch in t:
86
+ if ch in VI_CHARS:
87
+ return "vi"
88
+ return "en"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
+ # ---------------- Hugging Face helpers ----------------
91
  def hf_post_json(model_id: str, payload: dict, timeout: int = 90) -> requests.Response:
92
  if not HF_TOKEN:
93
  raise RuntimeError("HF_TOKEN not configured")
94
  url = f"https://api-inference.huggingface.co/models/{model_id}"
95
  headers = dict(HF_HEADERS)
96
  headers["Content-Type"] = "application/json"
97
+ return requests.post(url, headers=headers, json=payload, timeout=timeout)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ def hf_post_bytes(model_id: str, data: bytes, content_type: str = "application/octet-stream", timeout: int = 180) -> requests.Response:
100
  if not HF_TOKEN:
101
  raise RuntimeError("HF_TOKEN not configured")
102
  url = f"https://api-inference.huggingface.co/models/{model_id}"
103
  headers = dict(HF_HEADERS)
104
  headers["Content-Type"] = content_type
105
+ return requests.post(url, headers=headers, data=data, timeout=timeout)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  def parse_hf_text_output(obj: Any) -> str:
 
108
  try:
109
  if isinstance(obj, dict):
110
  if "generated_text" in obj:
111
+ return obj.get("generated_text","")
112
  if "text" in obj:
113
+ return obj.get("text","")
114
  if "choices" in obj and isinstance(obj["choices"], list) and obj["choices"]:
115
  c = obj["choices"][0]
116
+ return c.get("text") or c.get("message",{}).get("content","") or str(c)
 
117
  return json.dumps(obj, ensure_ascii=False)
118
  if isinstance(obj, list) and obj:
119
  first = obj[0]
120
  if isinstance(first, dict):
121
+ for k in ("generated_text","text"):
122
  if k in first:
123
+ return first.get(k,"")
124
  return str(first)
125
  return str(obj)
126
  except Exception:
127
+ logger.exception("parse_hf_text_output")
128
  return str(obj)
129
 
 
130
  def hf_text_generate(prompt: str, model_override: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
131
+ model = model_override or HF_MODEL
132
+ if not model:
 
 
 
 
133
  raise RuntimeError("HF_MODEL not configured")
134
  payload = {
135
  "inputs": prompt,
136
  "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)},
137
  "options": {"wait_for_model": True}
138
  }
139
+ r = hf_post_json(model, payload, timeout=120)
140
+ if r.status_code == 200:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  try:
142
+ return parse_hf_text_output(r.json())
143
+ except Exception:
144
+ return r.text
145
+ elif r.status_code == 403:
146
+ raise RuntimeError("HF returned 403 token may lack access rights.")
147
+ elif r.status_code == 404:
148
+ raise RuntimeError("HF returned 404 — model not found (check HF_MODEL).")
149
+ else:
150
+ raise RuntimeError(f"HF returned {r.status_code}: {r.text[:300]}")
151
+
152
+ def hf_stt_from_bytes(audio_bytes: bytes, model_override: Optional[str] = None) -> str:
153
+ model = model_override or HF_STT_MODEL
154
+ if not model:
155
+ raise RuntimeError("HF_STT_MODEL not configured")
156
+ r = hf_post_bytes(model, audio_bytes, content_type="application/octet-stream", timeout=180)
157
+ if r.status_code == 200:
158
+ try:
159
+ j = r.json()
160
+ if isinstance(j, dict) and "text" in j:
161
+ return j["text"]
162
+ return parse_hf_text_output(j)
163
+ except Exception:
164
+ return r.text
165
+ else:
166
+ raise RuntimeError(f"HF STT returned {r.status_code}: {r.text[:300]}")
167
+
168
+ def hf_tts_get_bytes(text: str, model_override: Optional[str] = None) -> bytes:
169
+ """
170
+ Try HF TTS model if configured, fallback to gTTS if available.
171
+ Returns raw mp3 bytes.
172
+ """
173
+ text = text.strip()
174
+ if not text:
175
+ raise RuntimeError("TTS text empty")
176
+ # try HF TTS if provided
177
+ model = model_override or HF_TTS_MODEL
178
+ if model:
179
+ payload = {"inputs": text}
180
+ r = hf_post_json(model, payload, timeout=120)
181
+ if r.status_code == 200:
182
+ # Some TTS models might return audio bytes (rare) or JSON response with base64 or url.
183
+ # We attempt to return raw content if present (requests .content)
184
+ if r.content:
185
+ return r.content
186
+ try:
187
+ j = r.json()
188
+ # if model returned something textual, parse
189
+ return parse_hf_text_output(j).encode("utf-8")
190
+ except Exception:
191
+ return r.content
192
+ else:
193
+ logger.warning("HF TTS model returned %s: %s", r.status_code, r.text[:300])
194
  # fallback gTTS
195
  if _HAS_GTTS:
196
  try:
197
+ lang = "vi" if detect_language(text) == "vi" else "en"
198
+ prosody = text
199
+ # simple prosody improvement
200
+ if len(prosody.split()) > 18:
201
+ # add slight pauses: break into pieces
202
+ parts = []
203
+ w = prosody.split()
204
+ for i in range(0, len(w), 12):
205
+ parts.append(" ".join(w[i:i+12]))
206
+ prosody = ", ".join(parts)
207
+ tts = gTTS(text=prosody, lang=lang)
208
  bio = io.BytesIO()
209
  tts.write_to_fp(bio)
210
  bio.seek(0)
211
  return bio.read()
212
+ except Exception:
213
  logger.exception("gTTS fallback failed")
214
+ raise RuntimeError("gTTS fallback failed")
215
  raise RuntimeError("No TTS available (no HF_TTS_MODEL and gTTS not installed)")
216
 
217
+ # ---------------- Telegram helpers (simple HTTP API) ----------------
218
  def telegram_send_message(chat_id: str, text: str) -> bool:
219
  if not TELEGRAM_TOKEN or not chat_id:
 
220
  return False
 
221
  try:
222
+ url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
223
  r = requests.post(url, json={"chat_id": chat_id, "text": text}, timeout=8)
224
  if r.status_code != 200:
225
  logger.warning("Telegram sendMessage failed %s: %s", r.status_code, r.text[:300])
226
  return False
227
  return True
228
  except Exception:
229
+ logger.exception("telegram_send_message")
230
  return False
231
 
232
  def telegram_send_audio(chat_id: str, audio_bytes: bytes, filename: str = "reply.mp3") -> bool:
233
  if not TELEGRAM_TOKEN or not chat_id:
 
234
  return False
 
 
 
235
  try:
236
+ url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendAudio"
237
+ files = {"audio": (filename, io.BytesIO(audio_bytes), "audio/mpeg")}
238
+ data = {"chat_id": chat_id}
239
  r = requests.post(url, files=files, data=data, timeout=30)
240
  if r.status_code != 200:
241
  logger.warning("Telegram sendAudio failed %s: %s", r.status_code, r.text[:300])
242
  return False
243
  return True
244
  except Exception:
245
+ logger.exception("telegram_send_audio")
246
  return False
247
 
248
+ # Telegram poller thread
249
  def telegram_poller_loop():
250
  if not TELEGRAM_TOKEN:
251
  logger.info("Telegram token not set; poller disabled")
 
277
  if lower.startswith("/ask "):
278
  q = text[5:].strip()
279
  try:
280
+ answer = hf_text_generate(q)
281
  except Exception as e:
282
+ answer = f"[HF error] {e}"
283
  try:
284
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": answer}, timeout=10)
285
  except Exception:
286
  logger.exception("tg reply failed")
287
  elif lower.startswith("/say "):
 
302
  except Exception:
303
  pass
304
  except Exception:
305
+ logger.exception("telegram poller crashed, sleeping 3s")
306
  time.sleep(3)
307
 
 
308
  if TELEGRAM_TOKEN:
309
  try:
310
  t = threading.Thread(target=telegram_poller_loop, daemon=True)
311
  t.start()
312
  except Exception:
313
+ logger.exception("start telegram thread failed")
314
 
315
+ # ---------------- Flask app ----------------
316
  app = Flask(__name__)
317
 
 
318
  INDEX_HTML = """
319
  <!doctype html>
320
  <html>
 
325
  <style>
326
  body{font-family:Arial,Helvetica,sans-serif;margin:12px;color:#111}
327
  .box{max-width:900px;margin:auto}
328
+ textarea{width:100%;height:90px;padding:10px;font-size:16px;border-radius:8px;border:1px solid #ddd}
329
  button{padding:10px 14px;margin:6px 4px;border-radius:8px;background:#0b74de;color:white;border:none;cursor:pointer;font-weight:700}
330
  #chat{border:1px solid #eee;padding:10px;height:320px;overflow:auto;background:#fafafa;border-radius:8px}
331
  .you{color:#0b63d6;margin:6px 0}
 
410
  def route_ask():
411
  try:
412
  j = request.get_json(force=True) or {}
413
+ text = clean_text(j.get("text","") or "")
414
  lang = (j.get("lang","auto") or "auto")
415
  if not text:
416
  return jsonify({"error":"no text"}), 400
 
426
  logger.exception("hf_text_generate failed")
427
  return jsonify({"error": str(e)}), 500
428
  CONVERSATION.append((text, ans))
429
+ save_conv(text, ans)
430
  push_display("YOU: " + (text[:60]))
431
  push_display("BOT: " + (ans[:60] if isinstance(ans,str) else str(ans)[:60]))
432
+ # notify telegram
433
  if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
434
  try:
435
  telegram_send_message(TELEGRAM_CHAT_ID, f"You: {text}\nBot: {ans[:300]}")
 
444
  def route_tts():
445
  try:
446
  j = request.get_json(force=True) or {}
447
+ text = clean_text(j.get("text","") or "")
448
  if not text:
449
  return jsonify({"error":"no text"}), 400
450
  try:
451
+ audio_bytes = hf_tts_get_bytes(text)
 
452
  except Exception as e:
453
+ logger.exception("tts generation failed")
454
  return jsonify({"error": str(e)}), 500
455
  return Response(audio_bytes, mimetype="audio/mpeg")
456
  except Exception as e:
 
473
  logger.exception("STT failed")
474
  return jsonify({"error": str(e)}), 500
475
  CONVERSATION.append((f"[voice] {txt}", ""))
476
+ save_conv(f"[voice] {txt}", "")
477
  push_display("VOICE: " + (txt[:60] if isinstance(txt,str) else str(txt)))
478
  return jsonify({"text": txt})
479
  except Exception as e:
 
482
 
483
  @app.route("/presence", methods=["POST"])
484
  def route_presence():
485
+ """
486
+ ESP32 radar should POST JSON {"note":"..."}.
487
+ Server returns greeting audio (if TTS available) or JSON greeting.
488
+ Also sends telegram notification if configured.
489
+ """
490
  try:
491
  j = request.get_json(force=True) or {}
492
+ note = clean_text(j.get("note","Có người phía trước") or "Có người phía trước")
493
  greeting = f"Xin chào! {note}"
494
  CONVERSATION.append(("__presence__", greeting))
495
+ save_conv("__presence__", greeting)
496
  push_display("RADAR: " + note[:60])
 
497
  if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
498
  try:
499
  telegram_send_message(TELEGRAM_CHAT_ID, f"⚠️ Robot: Phát hiện người - {note}")
500
  except Exception:
501
  logger.exception("telegram notify failed")
 
502
  try:
503
+ audio_bytes = hf_tts_get_bytes(greeting)
504
  return Response(audio_bytes, mimetype="audio/mpeg")
505
  except Exception:
 
506
  return jsonify({"greeting": greeting})
507
  except Exception as e:
508
  logger.exception("route_presence exception")
 
513
  return jsonify({"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONVERSATION)})
514
 
515
  @app.route("/model_check", methods=["GET"])
516
+ def model_check():
517
+ """
518
+ Quick check of HF token + model. Returns HF response (or error).
519
+ Useful to debug 403/404.
520
+ """
521
  if not HF_TOKEN:
522
  return jsonify({"ok": False, "error": "HF_TOKEN not set"}), 400
523
  if not HF_MODEL:
524
  return jsonify({"ok": False, "error": "HF_MODEL not set"}), 400
525
+ ping = "Please reply 'OK' only."
526
+ payload = {"inputs": ping, "parameters": {"max_new_tokens": 6}, "options": {"wait_for_model": True}}
527
  try:
528
+ r = requests.post(f"https://api-inference.huggingface.co/models/{HF_MODEL}",
529
+ headers={**HF_HEADERS, "Content-Type": "application/json"},
530
+ json=payload, timeout=30)
531
+ return jsonify({"status_code": r.status_code, "text": r.text})
 
 
 
 
 
 
 
532
  except Exception as e:
533
  logger.exception("model_check failed")
534
  return jsonify({"ok": False, "error": str(e)}), 500
535
 
536
+ @app.route("/config", methods=["GET","POST"])
537
+ def config():
538
+ """
539
+ GET returns current simple config.
540
+ POST JSON can change runtime values (temporary).
541
+ Example POST: {"hf_model":"...", "hf_tts_model":"..."}
542
+ """
543
+ global HF_MODEL, HF_TTS_MODEL, HF_STT_MODEL
544
+ if request.method == "GET":
545
+ return jsonify({"hf_model": HF_MODEL, "hf_tts_model": HF_TTS_MODEL, "hf_stt_model": HF_STT_MODEL})
546
  try:
547
+ j = request.get_json(force=True) or {}
548
+ changed = {}
549
+ if "hf_model" in j:
550
+ HF_MODEL = j["hf_model"]
551
+ changed["hf_model"] = HF_MODEL
552
+ if "hf_tts_model" in j:
553
+ HF_TTS_MODEL = j["hf_tts_model"]
554
+ changed["hf_tts_model"] = HF_TTS_MODEL
555
+ if "hf_stt_model" in j:
556
+ HF_STT_MODEL = j["hf_stt_model"]
557
+ changed["hf_stt_model"] = HF_STT_MODEL
558
+ return jsonify({"changed": changed})
559
+ except Exception as e:
560
+ logger.exception("config post failed")
561
+ return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
 
563
+ # --------------- run ---------------
564
  if __name__ == "__main__":
565
  logger.info("KC Robot AI v7.4 starting. HF_MODEL=%s HF_TTS=%s HF_STT=%s Telegram=%s",
566
  HF_MODEL or "(not set)", HF_TTS_MODEL or "(not set)", HF_STT_MODEL or "(not set)", bool(TELEGRAM_TOKEN and TELEGRAM_CHAT_ID))
 
568
  logger.warning("HF_TOKEN not set — set Secret HF_TOKEN to use Hugging Face inference.")
569
  if not HF_MODEL:
570
  logger.warning("HF_MODEL not set — set Secret HF_MODEL to a model id.")
571
+ # start Flask
572
  app.run(host="0.0.0.0", port=PORT)