kcrobot25 commited on
Commit
59642c9
·
verified ·
1 Parent(s): 9a12789

initial commit

Browse files
Files changed (1) hide show
  1. app.py +670 -164
app.py CHANGED
@@ -1,192 +1,698 @@
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- # ==========================================================
4
- # KC ROBOT AI - APP.PY (V2.0 MAX FINAL)
5
- # Cloud AI Robot with Gemini 2.5 Flash + ESP32 + Telegram
6
- # ==========================================================
7
-
8
- from flask import Flask, request, jsonify, render_template_string
9
- from google import genai
10
- import requests
11
  import os
 
 
12
  import time
13
- from gtts import gTTS
14
- from langdetect import detect
15
- import tempfile
16
- import base64
17
-
18
- # ==========================================================
19
- # CONFIGURATION
20
- # ==========================================================
21
-
22
- # Load environment variables from secrets (Cloud Run or Hugging Face)
23
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
24
- GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
25
- TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN")
26
- TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID")
27
-
28
- # Create Flask app
29
- app = Flask(__name__)
30
 
31
- # ==========================================================
32
- # SETUP GEMINI CLIENT
33
- # ==========================================================
34
- if not GEMINI_API_KEY:
35
- print("❌ ERROR: No Gemini API Key found. Please add GEMINI_API_KEY in Secrets.")
36
- client = None
37
- else:
38
- client = genai.Client(api_key=GEMINI_API_KEY)
39
-
40
- # ==========================================================
41
- # TELEGRAM UTILITIES
42
- # ==========================================================
43
- def send_telegram_message(text):
44
- if not TELEGRAM_TOKEN or not TELEGRAM_CHAT_ID:
45
- print("⚠️ Telegram not configured.")
46
- return
47
- url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
48
- payload = {"chat_id": TELEGRAM_CHAT_ID, "text": text}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  try:
50
- requests.post(url, json=payload, timeout=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
- print("Telegram Error:", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- # ==========================================================
55
- # GEMINI AI RESPONSE
56
- # ==========================================================
57
- def ask_gemini(prompt: str):
58
- if not client:
59
- return "⚠️ Gemini API key missing. Please configure in Secrets."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
 
 
 
 
61
  try:
62
- response = client.models.generate_content(
63
- model=GEMINI_MODEL,
64
- contents=prompt
65
- )
66
- if hasattr(response, "text"):
67
- return response.text.strip()
68
- elif "text" in response:
69
- return response["text"].strip()
70
- else:
71
- return "⚠️ No response text from Gemini."
72
- except Exception as e:
73
- print("Gemini Error:", e)
74
- return f"⚠️ Gemini Error: {e}"
75
 
76
- # ==========================================================
77
- # LANGUAGE DETECTION & TTS
78
- # ==========================================================
79
- def text_to_speech(text):
 
80
  try:
81
- lang = detect(text)
82
- if lang not in ["vi", "en"]:
83
- lang = "en"
84
- tts = gTTS(text=text, lang=lang)
85
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
86
- tts.save(tmp.name)
87
- with open(tmp.name, "rb") as f:
88
- audio_b64 = base64.b64encode(f.read()).decode("utf-8")
89
- os.unlink(tmp.name)
90
- return audio_b64
91
- except Exception as e:
92
- print("TTS Error:", e)
93
- return None
94
-
95
- # ==========================================================
96
- # SIMPLE HTML INTERFACE (for testing)
97
- # ==========================================================
98
- HTML_PAGE = """
99
- <!DOCTYPE html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  <html>
101
  <head>
102
- <title>KC Robot AI v2.0</title>
 
 
103
  <style>
104
- body { font-family: Arial; text-align: center; background-color: #101010; color: white; }
105
- input, button { padding: 10px; font-size: 16px; margin: 5px; }
106
- #chat { max-width: 700px; margin: auto; text-align: left; background: #202020; padding: 20px; border-radius: 10px; }
107
- .msg-user { color: #4af; }
108
- .msg-bot { color: #fa4; margin-left: 20px; }
109
- audio { margin-top: 10px; }
 
 
 
 
 
 
 
110
  </style>
111
  </head>
112
  <body>
113
- <h1>🤖 KC Robot AI v2.0 MAX FINAL</h1>
114
- <div id="chat"></div>
115
- <br>
116
- <input id="user_input" placeholder="Nói gì đó..." style="width:60%">
117
- <button onclick="sendMessage()">Gửi</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  <script>
120
- async function sendMessage() {
121
- const input = document.getElementById("user_input").value;
122
- if (!input) return;
123
- const chat = document.getElementById("chat");
124
- chat.innerHTML += `<div class='msg-user'><b>Bạn:</b> ${input}</div>`;
125
- document.getElementById("user_input").value = "";
126
- const res = await fetch("/api/chat", {
127
- method: "POST",
128
- headers: {"Content-Type": "application/json"},
129
- body: JSON.stringify({message: input})
130
- });
131
- const data = await res.json();
132
- chat.innerHTML += `<div class='msg-bot'><b>Robot:</b> ${data.reply}</div>`;
133
- if (data.audio) {
134
- const audio = document.createElement("audio");
135
- audio.src = "data:audio/mp3;base64," + data.audio;
136
- audio.controls = true;
137
- chat.appendChild(audio);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  }
139
- chat.scrollTop = chat.scrollHeight;
140
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  </script>
142
  </body>
143
  </html>
144
  """
145
 
146
- @app.route("/")
147
- def home():
148
- return render_template_string(HTML_PAGE)
149
-
150
- # ==========================================================
151
- # API ENDPOINTS
152
- # ==========================================================
153
-
154
- @app.route("/api/chat", methods=["POST"])
155
- def api_chat():
156
- data = request.get_json()
157
- if not data or "message" not in data:
158
- return jsonify({"error": "Missing 'message'"}), 400
159
-
160
- user_message = data["message"]
161
- print(f"🧠 User said: {user_message}")
162
- send_telegram_message(f"User: {user_message}")
163
-
164
- ai_reply = ask_gemini(user_message)
165
- send_telegram_message(f"Robot: {ai_reply}")
166
-
167
- audio_b64 = text_to_speech(ai_reply)
168
- return jsonify({"reply": ai_reply, "audio": audio_b64})
169
-
170
- # ESP32 sensor endpoint
171
- @app.route("/api/sensor", methods=["POST"])
172
- def sensor_data():
173
- data = request.get_json()
174
- if not data:
175
- return jsonify({"error": "No data"}), 400
176
- msg = f"👁️ ESP32 Sensor update: {data}"
177
- send_telegram_message(msg)
178
- return jsonify({"status": "received"})
179
-
180
- # Health check
181
- @app.route("/ping")
182
- def ping():
183
- return jsonify({"status": "ok", "model": GEMINI_MODEL})
184
-
185
- # ==========================================================
186
- # MAIN ENTRY POINT
187
- # ==========================================================
188
- if __name__ == "__main__":
189
- port = int(os.getenv("PORT", 8080))
190
- print(f"🚀 KC Robot AI v2.0 running on port {port}")
191
- app.run(host="0.0.0.0", port=port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ # app.py — KC Robot AI V7.2 MAX FINAL
3
+ # Flask main app + optional Gradio UI launched in background (for convenience).
4
+ # Requires Secrets:
5
+ # HF_TOKEN, HF_MODEL, TELEGRAM_TOKEN (optional), TELEGRAM_CHAT_ID (optional)
6
+ #
7
+ # Endpoints:
8
+ # - GET / -> main HTML UI (chat + audio play)
9
+ # - GET /health -> status json
10
+ # - POST /ask -> {text, lang?} -> {"answer": "..."}
11
+ # - POST /tts -> {text} -> audio/mp3
12
+ # - POST /stt -> upload file or raw bytes -> {"text":"..."}
13
+ # - POST /presence -> {note?} -> greets + notify telegram, returns mp3 if possible
14
+ # - GET /display -> display buffer (for ESP32)
15
+ # - POST /config -> change runtime config (optional)
16
+ #
17
+ # How to use on HF Spaces:
18
+ # - Upload this file and requirements.txt
19
+ # - Set Secrets: HF_TOKEN, HF_MODEL, TELEGRAM_TOKEN (opt), TELEGRAM_CHAT_ID (opt)
20
+ # - Start Space (Flask runtime). Visit the Space URL to test.
21
+ #
22
+ # Notes:
23
+ # - Calls to Hugging Face inference API have robust fallback and clear error messages.
24
+ # - TTS: tries HF TTS model (HF_TTS_MODEL if set), otherwise fallback to gTTS.
25
+ # - STT: uses HF_STT_MODEL if set (default openai/whisper-small).
26
+ #
27
+ # Author: KC Robot helper (generated)
28
+ # Version: v7.2-max-final
29
 
 
 
 
 
 
 
 
 
30
  import os
31
+ import io
32
+ import sys
33
  import time
34
+ import json
35
+ import uuid
36
+ import logging
37
+ import threading
38
+ from typing import List, Tuple, Optional, Any
39
+ from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ import requests
42
+ from flask import Flask, request, jsonify, send_file, render_template_string, abort, Response
43
+
44
+ # Optional gTTS fallback
45
+ try:
46
+ from gtts import gTTS
47
+ _HAS_GTTS = True
48
+ except Exception:
49
+ _HAS_GTTS = False
50
+
51
+ # Optional Gradio UI
52
+ try:
53
+ import gradio as gr
54
+ _HAS_GRADIO = True
55
+ except Exception:
56
+ _HAS_GRADIO = False
57
+
58
+ # Logging config
59
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO,
60
+ format="%(asctime)s %(levelname)s %(name)s: %(message)s")
61
+ logger = logging.getLogger("kcrobot.v7.2")
62
+
63
+ # --------- Load env / secrets ----------
64
+ HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
65
+ HF_MODEL = os.getenv("HF_MODEL", "").strip() # primary model id
66
+ HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "").strip() # optional TTS model
67
+ HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small").strip()
68
+
69
+ TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
70
+ TELEGRAM_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID", "").strip()
71
+
72
+ # server ports - HF spaces provides PORT env; default 7860
73
+ PORT = int(os.environ.get("PORT", 7860))
74
+ GRADIO_PORT = 7861 # gradio web ui runs here if enabled
75
+
76
+ HF_HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
77
+
78
+ # Create data dir for tmp audio
79
+ TMPDIR = Path("/tmp/kcrobot") if os.name != "nt" else Path.cwd() / "tmp_kcrobot"
80
+ TMPDIR.mkdir(parents=True, exist_ok=True)
81
+
82
+ # --------- in-memory state ----------
83
+ CONV: List[Tuple[str, str]] = [] # (user, bot)
84
+ DISPLAY_BUFFER: List[str] = []
85
+ DISPLAY_LIMIT = 8
86
+
87
+ def push_display(line: str):
88
+ global DISPLAY_BUFFER
89
+ DISPLAY_BUFFER.append(line)
90
+ if len(DISPLAY_BUFFER) > DISPLAY_LIMIT:
91
+ DISPLAY_BUFFER = DISPLAY_BUFFER[-DISPLAY_LIMIT:]
92
+
93
+ # ---------- language detector ----------
94
+ VI_CHARS = set("ăâđêôơưáàảãạắằẳẵặấầẩẫậéèẻẽẹíìỉĩịóòỏõọúùủũụứừửữựýỳỷỹỵ")
95
+ def detect_vi_or_en(text: str) -> str:
96
+ for ch in text.lower():
97
+ if ch in VI_CHARS:
98
+ return "vi"
99
+ return "en"
100
+
101
+ # ---------- HF helpers with robust fallback ----------
102
+ def hf_post_json_single(model_id: str, payload: dict, timeout: int = 90) -> requests.Response:
103
+ """Post JSON to HF inference endpoint for a single model."""
104
+ if not HF_TOKEN:
105
+ raise RuntimeError("HF_TOKEN not configured in environment/secrets.")
106
+ url = f"https://api-inference.huggingface.co/models/{model_id}"
107
+ headers = dict(HF_HEADERS)
108
+ headers["Content-Type"] = "application/json"
109
+ logger.debug("HF JSON POST to %s payload keys: %s", model_id, list(payload.keys()))
110
+ r = requests.post(url, headers=headers, json=payload, timeout=timeout)
111
+ return r
112
+
113
+ def hf_post_json_with_fallback(models: List[str], payload: dict, timeout: int = 90) -> Tuple[str, Any]:
114
+ """Try a list of models in order. Return (model_used, parsed_json_or_text)."""
115
+ last_err = None
116
+ for model in models:
117
+ try:
118
+ r = hf_post_json_single(model, payload, timeout=timeout)
119
+ except Exception as e:
120
+ last_err = f"network error for {model}: {e}"
121
+ logger.warning(last_err)
122
+ continue
123
+ if r.status_code == 200:
124
+ try:
125
+ j = r.json()
126
+ return model, j
127
+ except Exception:
128
+ return model, r.content
129
+ else:
130
+ last_err = f"HTTP {r.status_code} from {model}: {r.text[:300]}"
131
+ logger.warning(last_err)
132
+ # if auth error (401/403) or not found (404) skip to next
133
+ continue
134
+ raise RuntimeError(f"All HF attempts failed. Last error: {last_err}")
135
+
136
+ def hf_post_bytes_with_fallback(models: List[str], data: bytes, content_type: str = "application/octet-stream", timeout: int = 120) -> Tuple[str, requests.Response]:
137
+ """Post bytes (STT or TTS) to HF; return model used and response object."""
138
+ if not HF_TOKEN:
139
+ raise RuntimeError("HF_TOKEN not configured.")
140
+ headers = dict(HF_HEADERS)
141
+ headers["Content-Type"] = content_type
142
+ last_err = None
143
+ for model in models:
144
+ url = f"https://api-inference.huggingface.co/models/{model}"
145
+ try:
146
+ r = requests.post(url, headers=headers, data=data, timeout=timeout)
147
+ except Exception as e:
148
+ last_err = f"network error {e} for {model}"
149
+ logger.warning(last_err)
150
+ continue
151
+ if r.status_code == 200:
152
+ return model, r
153
+ else:
154
+ last_err = f"HTTP {r.status_code} for {model}: {r.text[:300]}"
155
+ logger.warning(last_err)
156
+ continue
157
+ raise RuntimeError(f"All HF byte-post attempts failed. Last error: {last_err}")
158
+
159
+ def parse_hf_text_resp(obj: Any) -> str:
160
+ """Normalize HF text outputs."""
161
  try:
162
+ if isinstance(obj, dict):
163
+ # various shapes: {"generated_text": "..."} or {"choices":[...]}
164
+ if "generated_text" in obj:
165
+ return obj.get("generated_text","")
166
+ if "text" in obj:
167
+ return obj.get("text","")
168
+ if "choices" in obj and isinstance(obj["choices"], list) and obj["choices"]:
169
+ c0 = obj["choices"][0]
170
+ return c0.get("text") or c0.get("message",{}).get("content","") or str(c0)
171
+ # fallback - stringify
172
+ return json.dumps(obj)
173
+ if isinstance(obj, list) and obj:
174
+ first = obj[0]
175
+ if isinstance(first, dict):
176
+ for k in ("generated_text","text"):
177
+ if k in first:
178
+ return first.get(k,"")
179
+ return str(first)
180
+ if isinstance(obj, (bytes, bytearray)):
181
+ try:
182
+ return obj.decode('utf-8', errors='ignore')
183
+ except Exception:
184
+ return str(obj)
185
+ return str(obj)
186
  except Exception as e:
187
+ logger.exception("parse_hf_text_resp error")
188
+ return f"[parse error] {e}"
189
+
190
+ # ---------- High-level LLM / STT / TTS wrappers ----------
191
+ def llm_generate(prompt: str, model_override: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
192
+ """Generate text from HF LLM. Use HF_MODEL by default; allow override."""
193
+ if model_override:
194
+ models = [model_override]
195
+ else:
196
+ if HF_MODEL:
197
+ models = [HF_MODEL]
198
+ else:
199
+ raise RuntimeError("HF_MODEL not configured.")
200
+ payload = {
201
+ "inputs": prompt,
202
+ "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)},
203
+ "options": {"wait_for_model": True}
204
+ }
205
+ model_used, out = hf_post_json_with_fallback(models, payload, timeout=120)
206
+ res = parse_hf_text_resp(out)
207
+ logger.info("LLM used model=%s len=%d", model_used, len(res))
208
+ return res
209
 
210
+ def tts_get_bytes_hf(text: str, model_list: Optional[List[str]] = None) -> bytes:
211
+ """Try to get TTS bytes from HF models listed, else fallback to gTTS if available."""
212
+ if not text:
213
+ raise RuntimeError("Empty text for TTS")
214
+ models = model_list or ([HF_TTS_MODEL] if HF_TTS_MODEL else [])
215
+ if models:
216
+ # HF TTS often expects JSON {"inputs":"..."} and returns audio bytes
217
+ payload = json.dumps({"inputs": text}).encode("utf-8")
218
+ try:
219
+ model_used, resp = hf_post_bytes_with_fallback(models, payload, content_type="application/json", timeout=120)
220
+ # return raw bytes
221
+ logger.info("HF TTS used %s return bytes len=%d", model_used, len(resp.content))
222
+ return resp.content
223
+ except Exception as e:
224
+ logger.warning("HF TTS attempts failed: %s", e)
225
+ # fallback to gTTS if available
226
+ if _HAS_GTTS:
227
+ try:
228
+ tts = gTTS(text=text, lang="vi" if detect_vi_or_en(text)=="vi" else "en")
229
+ bio = io.BytesIO()
230
+ tts.write_to_fp(bio)
231
+ bio.seek(0)
232
+ data = bio.read()
233
+ logger.info("gTTS fallback created bytes len=%d", len(data))
234
+ return data
235
+ except Exception as e:
236
+ logger.exception("gTTS fallback failed")
237
+ raise RuntimeError(f"TTS failed: {e}")
238
+ raise RuntimeError("No TTS method available (no HF_TTS_MODEL and gTTS missing).")
239
 
240
+ def stt_from_bytes_hf(audio_bytes: bytes, model_list: Optional[List[str]] = None) -> str:
241
+ """Run STT via HF; returns recognized text."""
242
+ models = model_list or ([HF_STT_MODEL] if HF_STT_MODEL else [])
243
+ if not models:
244
+ raise RuntimeError("No STT model configured.")
245
+ model_used, resp = hf_post_bytes_with_fallback(models, audio_bytes, content_type="application/octet-stream", timeout=180)
246
+ # parse response
247
  try:
248
+ j = resp.json()
249
+ if isinstance(j, dict) and "text" in j:
250
+ return j["text"]
251
+ return parse_hf_text_resp(j)
252
+ except Exception:
253
+ try:
254
+ return resp.text
255
+ except Exception:
256
+ return "[stt parse failed]"
 
 
 
 
257
 
258
+ # ---------- Telegram helpers ----------
259
+ def send_telegram_message(text: str) -> bool:
260
+ if not TELEGRAM_TOKEN or not TELEGRAM_CHAT_ID:
261
+ logger.debug("telegram not configured")
262
+ return False
263
  try:
264
+ url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
265
+ r = requests.post(url, json={"chat_id": TELEGRAM_CHAT_ID, "text": text}, timeout=8)
266
+ if r.status_code == 200:
267
+ return True
268
+ logger.warning("Telegram send failed %s %s", r.status_code, r.text[:300])
269
+ return False
270
+ except Exception:
271
+ logger.exception("send_telegram_message exception")
272
+ return False
273
+
274
+ def telegram_poll_loop():
275
+ """Background poller: respond to /ask, /say, /status commands"""
276
+ if not TELEGRAM_TOKEN:
277
+ logger.info("Telegram not configured - poll disabled")
278
+ return
279
+ logger.info("Starting Telegram poller")
280
+ base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"
281
+ offset = None
282
+ while True:
283
+ try:
284
+ params = {"timeout": 30}
285
+ if offset:
286
+ params["offset"] = offset
287
+ r = requests.get(base + "/getUpdates", params=params, timeout=35)
288
+ if r.status_code != 200:
289
+ logger.warning("Telegram getUpdates failed: %s", r.status_code)
290
+ time.sleep(2)
291
+ continue
292
+ j = r.json()
293
+ for upd in j.get("result", []):
294
+ offset = upd.get("update_id", 0) + 1
295
+ msg = upd.get("message") or {}
296
+ chat = msg.get("chat", {})
297
+ chat_id = chat.get("id")
298
+ text = (msg.get("text") or "").strip()
299
+ if not text:
300
+ continue
301
+ logger.info("TG msg from %s: %s", chat_id, text[:120])
302
+ lower = text.lower()
303
+ if lower.startswith("/ask "):
304
+ q = text[5:].strip()
305
+ try:
306
+ ans = llm_generate(q)
307
+ except Exception as e:
308
+ ans = f"[HF error] {e}"
309
+ try:
310
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
311
+ except Exception:
312
+ logger.exception("tg reply failed")
313
+ elif lower.startswith("/say "):
314
+ phrase = text[5:].strip()
315
+ try:
316
+ audio_bytes = tts_get_bytes_hf(phrase)
317
+ files = {"audio": ("reply.mp3", audio_bytes, "audio/mpeg")}
318
+ requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30)
319
+ except Exception:
320
+ logger.exception("tg say failed")
321
+ elif lower.startswith("/status"):
322
+ try:
323
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running"}, timeout=10)
324
+ except Exception:
325
+ logger.exception("tg status failed")
326
+ else:
327
+ try:
328
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "Commands: /ask <q> | /say <text> | /status"}, timeout=10)
329
+ except Exception:
330
+ logger.exception("tg help failed")
331
+ except Exception:
332
+ logger.exception("telegram poller exception")
333
+ time.sleep(3)
334
+
335
+ # Start telegram poller thread if token present
336
+ if TELEGRAM_TOKEN:
337
+ try:
338
+ t = threading.Thread(target=telegram_poll_loop, daemon=True)
339
+ t.start()
340
+ except Exception:
341
+ logger.exception("Failed to start telegram thread")
342
+
343
+ # ---------- Flask app & routes ----------
344
+ app = Flask(__name__)
345
+
346
+ # Simple HTML UI (mobile friendly) - main page includes chatbox and audio play button
347
+ INDEX_HTML = """
348
+ <!doctype html>
349
  <html>
350
  <head>
351
+ <meta charset="utf-8">
352
+ <meta name="viewport" content="width=device-width,initial-scale=1">
353
+ <title>KC Robot AI v7.2</title>
354
  <style>
355
+ body{font-family:Inter,Arial,Helvetica,sans-serif;margin:10px;color:#111}
356
+ .container{max-width:900px;margin:auto}
357
+ .header{display:flex;align-items:center;gap:12px}
358
+ .h1{font-size:20px;font-weight:700}
359
+ .controls{margin-top:8px}
360
+ textarea{width:100%;min-height:70px;padding:10px;font-size:15px;border-radius:8px;border:1px solid #ddd}
361
+ button{background:#0066cc;color:white;border:none;padding:10px 14px;border-radius:8px;font-weight:600;cursor:pointer}
362
+ .chatbox{margin-top:12px;border:1px solid #eee;padding:8px;border-radius:8px;background:#fafafa;height:300px;overflow:auto}
363
+ .msg-user{color:#0b63d6;margin:6px 0}
364
+ .msg-bot{color:#0b8a3f;margin:6px 0}
365
+ .small{font-size:13px;color:#666}
366
+ .controls-row{display:flex;gap:8px;align-items:center}
367
+ .select{padding:6px;border-radius:6px;border:1px solid #ddd}
368
  </style>
369
  </head>
370
  <body>
371
+ <div class="container">
372
+ <div class="header">
373
+ <div class="h1">🤖 KC Robot AI v7.2 — Final</div>
374
+ </div>
375
+ <div class="small">Model: <span id="modelName">loading...</span></div>
376
+ <div class="controls">
377
+ <textarea id="userText" placeholder="Nhập tiếng Việt hoặc English..."></textarea>
378
+ <div class="controls-row">
379
+ <select id="lang" class="select"><option value="auto">Auto</option><option value="vi">Vietnamese</option><option value="en">English</option></select>
380
+ <button onclick="sendMsg()">Gửi</button>
381
+ <button onclick="playLast()">Phát âm</button>
382
+ <button onclick="clearChat()">Xóa</button>
383
+ </div>
384
+ </div>
385
+ <div class="chatbox" id="chatbox"></div>
386
+ <div style="margin-top:12px">
387
+ <input type="file" id="audioFile" accept="audio/*"><button onclick="uploadAudio()">Upload → STT</button>
388
+ </div>
389
+ <div style="margin-top:12px" class="small">Kết nối Telegram: <span id="tgstatus">checking...</span></div>
390
+ </div>
391
 
392
  <script>
393
+ let lastAnswer = "";
394
+ function appendUser(t){ const cb=document.getElementById('chatbox'); cb.innerHTML += '<div class="msg-user"><b>You:</b> '+escapeHtml(t)+'</div>'; cb.scrollTop = cb.scrollHeight; }
395
+ function appendBot(t){ const cb=document.getElementById('chatbox'); cb.innerHTML += '<div class="msg-bot"><b>Robot:</b> '+escapeHtml(t)+'</div>'; cb.scrollTop = cb.scrollHeight; }
396
+ function escapeHtml(s){ return (s+'').replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;'); }
397
+ async function sendMsg(){
398
+ let t=document.getElementById('userText').value.trim(); if(!t) return;
399
+ appendUser(t);
400
+ document.getElementById('userText').value='';
401
+ const lang=document.getElementById('lang').value;
402
+ try {
403
+ const res = await fetch('/ask', {method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify({text: t, lang: lang})});
404
+ const j = await res.json();
405
+ if(j.answer){
406
+ lastAnswer = j.answer;
407
+ appendBot(j.answer);
408
+ } else {
409
+ appendBot("[Error] " + JSON.stringify(j));
410
+ }
411
+ } catch(e){
412
+ appendBot("[Network Error] " + e);
413
+ }
414
+ }
415
+ async function playLast(){
416
+ if(!lastAnswer) return alert('No answer to play');
417
+ try {
418
+ const r = await fetch('/tts', {method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify({text: lastAnswer})});
419
+ if(!r.ok){ alert('TTS failed'); return; }
420
+ const blob = await r.blob();
421
+ const url = URL.createObjectURL(blob);
422
+ const a = new Audio(url);
423
+ a.play();
424
+ } catch(e){
425
+ alert('Play error: '+e);
426
  }
 
427
  }
428
+ async function uploadAudio(){
429
+ const f = document.getElementById('audioFile').files[0];
430
+ if(!f) return alert('Chọn file audio');
431
+ const fd = new FormData(); fd.append('file', f);
432
+ const r = await fetch('/stt', {method:'POST', body: fd});
433
+ const j = await r.json();
434
+ if(j.text) { appendUser('[voice] ' + j.text); lastAnswer=''; }
435
+ else appendUser('[stt error] ' + JSON.stringify(j));
436
+ }
437
+ async function clearChat(){ document.getElementById('chatbox').innerHTML=''; lastAnswer=''; }
438
+ async function loadStatus(){
439
+ try{
440
+ const r=await fetch('/health'); const j=await r.json();
441
+ document.getElementById('modelName').innerText = j.hf_model || '(not set)';
442
+ document.getElementById('tgstatus').innerText = j.telegram ? 'enabled' : 'disabled';
443
+ }catch(e){
444
+ document.getElementById('modelName').innerText='(error)';
445
+ document.getElementById('tgstatus').innerText='error';
446
+ }
447
+ }
448
+ loadStatus();
449
  </script>
450
  </body>
451
  </html>
452
  """
453
 
454
+ @app.route("/", methods=["GET"])
455
+ def index():
456
+ return render_template_string(INDEX_HTML)
457
+
458
+ @app.route("/health", methods=["GET"])
459
+ def health():
460
+ return jsonify({
461
+ "ok": True,
462
+ "hf_token": bool(HF_TOKEN),
463
+ "hf_model": HF_MODEL,
464
+ "hf_tts_model": HF_TTS_MODEL,
465
+ "hf_stt_model": HF_STT_MODEL,
466
+ "telegram": bool(TELEGRAM_TOKEN and TELEGRAM_CHAT_ID),
467
+ "conv_len": len(CONV),
468
+ "display_len": len(DISPLAY_BUFFER)
469
+ })
470
+
471
+ @app.route("/ask", methods=["POST"])
472
+ def route_ask():
473
+ try:
474
+ j = request.get_json(force=True) or {}
475
+ text = (j.get("text","") or "").strip()
476
+ lang = (j.get("lang","auto") or "auto")
477
+ if not text:
478
+ return jsonify({"error":"no text"}), 400
479
+ # prepare prompt instruct
480
+ if lang == "vi":
481
+ prompt = f"Bạn là trợ lý thông minh, trả lời bằng tiếng Việt, rõ ràng và lịch sự. Trả lời ngắn gọn:\n\n{text}"
482
+ elif lang == "en":
483
+ prompt = f"You are a helpful assistant. Answer in clear English, concise:\n\n{text}"
484
+ else:
485
+ prompt = f"You are a bilingual assistant. Answer in the same language as the user, clearly and concisely:\n\n{text}"
486
+ try:
487
+ ans = llm_generate(prompt)
488
+ except Exception as e:
489
+ logger.exception("LLM error")
490
+ return jsonify({"error": f"LLM error: {e}"}), 500
491
+ CONV.append((text, ans))
492
+ push_display("YOU: " + (text[:60]))
493
+ push_display("BOT: " + (ans[:60] if isinstance(ans, str) else str(ans)[:60]))
494
+ # notify telegram optionally (short)
495
+ if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
496
+ try:
497
+ send_telegram_message(f"You: {text}\nBot: {ans[:300]}")
498
+ except Exception:
499
+ logger.exception("telegram notify failed")
500
+ return jsonify({"answer": ans})
501
+ except Exception as e:
502
+ logger.exception("route_ask failed")
503
+ return jsonify({"error": str(e)}), 500
504
+
505
+ @app.route("/tts", methods=["POST"])
506
+ def route_tts():
507
+ try:
508
+ j = request.get_json(force=True) or {}
509
+ text = (j.get("text","") or "").strip()
510
+ if not text:
511
+ return jsonify({"error":"no text"}), 400
512
+ # try HF TTS model first
513
+ try:
514
+ # prefer HF_TTS_MODEL if set
515
+ models = [HF_TTS_MODEL] if HF_TTS_MODEL else []
516
+ audio_bytes = tts_get_bytes_hf(text, model_list=models if models else None)
517
+ except Exception as e:
518
+ logger.warning("TTS HF failed: %s", e)
519
+ # fallback to gTTS if possible
520
+ if _HAS_GTTS:
521
+ try:
522
+ audio_bytes = tts_get_bytes_hf(text, model_list=[])
523
+ except Exception as e2:
524
+ logger.exception("gTTS fallback also failed")
525
+ return jsonify({"error": f"TTS failed: {e2}"}), 500
526
+ else:
527
+ return jsonify({"error": f"TTS failed: {e}"}), 500
528
+ # return mp3
529
+ return Response(audio_bytes, mimetype="audio/mpeg")
530
+ except Exception as e:
531
+ logger.exception("route_tts exception")
532
+ return jsonify({"error": str(e)}), 500
533
 
534
+ @app.route("/stt", methods=["POST"])
535
+ def route_stt():
536
+ try:
537
+ if "file" in request.files:
538
+ f = request.files["file"]
539
+ audio_bytes = f.read()
540
+ else:
541
+ audio_bytes = request.get_data()
542
+ if not audio_bytes:
543
+ return jsonify({"error":"no audio provided"}), 400
544
+ try:
545
+ txt = stt_from_bytes_hf(audio_bytes)
546
+ except Exception as e:
547
+ logger.exception("STT failed")
548
+ return jsonify({"error": str(e)}), 500
549
+ CONV.append((f"[voice] {txt}", ""))
550
+ push_display("VOICE: " + (txt[:60] if isinstance(txt,str) else str(txt)))
551
+ return jsonify({"text": txt})
552
+ except Exception as e:
553
+ logger.exception("route_stt exception")
554
+ return jsonify({"error": str(e)}), 500
555
+
556
+ @app.route("/presence", methods=["POST"])
557
+ def route_presence():
558
+ try:
559
+ j = request.get_json(force=True) or {}
560
+ note = (j.get("note","Có người phía trước") or "Có người phía trước")
561
+ greeting = f"Xin chào! {note}"
562
+ CONV.append(("__presence__", greeting))
563
+ push_display("RADAR: " + note[:60])
564
+ # Telegram notify
565
+ if TELEGRAM_TOKEN and TELEGRAM_CHAT_ID:
566
+ try:
567
+ send_telegram_message(f"⚠️ Robot: Phát hiện người - {note}")
568
+ except Exception:
569
+ logger.exception("telegram notify error")
570
+ # Return greeting audio if possible
571
+ try:
572
+ audio_bytes = tts_get_bytes_hf(greeting, model_list=[HF_TTS_MODEL] if HF_TTS_MODEL else None)
573
+ return Response(audio_bytes, mimetype="audio/mpeg")
574
+ except Exception:
575
+ # fallback to text only
576
+ return jsonify({"greeting": greeting})
577
+ except Exception as e:
578
+ logger.exception("presence error")
579
+ return jsonify({"error": str(e)}), 500
580
+
581
+ @app.route("/display", methods=["GET"])
582
+ def route_display():
583
+ return jsonify({"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONV)})
584
+
585
+ @app.route("/config", methods=["GET","POST"])
586
+ def route_config():
587
+ # quick config view/change via JSON (NOT secure — for debug only)
588
+ if request.method == "GET":
589
+ return jsonify({
590
+ "hf_token_set": bool(HF_TOKEN),
591
+ "hf_model": HF_MODEL,
592
+ "hf_tts_model": HF_TTS_MODEL,
593
+ "hf_stt_model": HF_STT_MODEL,
594
+ "telegram": bool(TELEGRAM_TOKEN and TELEGRAM_CHAT_ID)
595
+ })
596
+ else:
597
+ try:
598
+ j = request.get_json(force=True) or {}
599
+ # we cannot change secrets here; just accept display config changes
600
+ return jsonify({"ok": True, "received": j})
601
+ except Exception as e:
602
+ return jsonify({"error": str(e)}), 400
603
+
604
+ # ---------- Gradio UI (optional) ----------
605
+ def start_gradio_in_thread():
606
+ if not _HAS_GRADIO:
607
+ logger.info("Gradio not installed - skipping Gradio UI")
608
+ return
609
+ try:
610
+ import gradio as gr
611
+ def gradio_chat(audio, text, temp, max_tokens, model_override):
612
+ user_text = (text or "").strip()
613
+ if audio:
614
+ # read path (gradio returns path)
615
+ try:
616
+ with open(audio, "rb") as f:
617
+ b = f.read()
618
+ stt = stt_from_bytes_hf(b)
619
+ if stt and not stt.startswith("[ERROR]"):
620
+ user_text = stt
621
+ except Exception:
622
+ logger.exception("gradio stt failed")
623
+ if not user_text:
624
+ return None, ""
625
+ prompt = f"You are KC Robot AI, bilingual assistant. Answer in the same language as the user.\\nUser: {user_text}\\nAssistant:"
626
+ model = model_override.strip() if model_override else HF_MODEL
627
+ try:
628
+ ans = llm_generate(prompt, model_override=model, max_new_tokens=int(max_tokens), temperature=float(temp))
629
+ except Exception as e:
630
+ ans = f"[LLM error] {e}"
631
+ # generate tts bytes for preview
632
+ try:
633
+ audio_bytes = tts_get_bytes_hf(ans, model_list=[HF_TTS_MODEL] if HF_TTS_MODEL else None)
634
+ except Exception:
635
+ audio_bytes = None
636
+ chat_history = [( "You", user_text ), ( "Bot", ans )]
637
+ if audio_bytes:
638
+ return (chat_history, (audio_bytes, "audio/mpeg"))
639
+ return (chat_history, None)
640
+
641
+ with gr.Blocks(title="KC Robot AI v7.2 (Gradio)") as demo:
642
+ gr.Markdown("## KC Robot AI v7.2 Gradio UI")
643
+ with gr.Row():
644
+ with gr.Column(scale=2):
645
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=420)
646
+ txt = gr.Textbox(lines=2, placeholder="Nhập câu (VN/EN)...", label="Text input")
647
+ mic = gr.Audio(source="microphone", type="filepath", label="Record voice")
648
+ btn = gr.Button("Send")
649
+ with gr.Row():
650
+ temp = gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
651
+ tokens = gr.Slider(32, 1024, value=256, step=16, label="Max tokens")
652
+ model_override = gr.Textbox(label="Model override (optional)", placeholder=HF_MODEL)
653
+ with gr.Column(scale=1):
654
+ gr.Markdown("### TTS / STT")
655
+ tts_in = gr.Textbox(lines=2, label="Text → TTS")
656
+ tts_btn = gr.Button("Create TTS")
657
+ tts_audio = gr.Audio(label="TTS audio", interactive=False)
658
+ up = gr.Audio(source="upload", type="filepath", label="Upload audio")
659
+ stt_btn = gr.Button("Transcribe")
660
+ stt_out = gr.Textbox(label="Transcription")
661
+ def send_click(audio_file, typed, temp_v, max_toks, model_o, chat_history):
662
+ # reuse gradio_chat
663
+ result = gradio_chat(audio_file, typed, temp_v, max_toks, model_o)
664
+ if result is None:
665
+ return chat_history or [], ""
666
+ (chat_hist, audio_blob) = result
667
+ history = chat_history or []
668
+ # append
669
+ for item in chat_hist:
670
+ history.append(item)
671
+ return history, ""
672
+ btn.click(send_click, inputs=[mic, txt, temp, tokens, model_override, chatbot], outputs=[chatbot, txt])
673
+ tts_btn.click(lambda txt_in: tts_get_bytes_hf(txt_in) if txt_in else None, inputs=[tts_in], outputs=[tts_audio])
674
+
675
+ # Run gradio in new thread
676
+ def _run():
677
+ try:
678
+ demo.launch(server_name="0.0.0.0", server_port=GRADIO_PORT, share=False, prevent_thread_lock=True)
679
+ except Exception:
680
+ logger.exception("Gradio failed to launch")
681
+ t = threading.Thread(target=_run, daemon=True)
682
+ t.start()
683
+ logger.info("Gradio thread started on port %s", GRADIO_PORT)
684
+ except Exception:
685
+ logger.exception("start_gradio_in_thread failed")
686
+
687
+ # start gradio if available
688
+ start_gradio_in_thread()
689
+
690
+ # ---------- Run app ----------
691
+ if __name__ == "__main__":
692
+ logger.info("KC Robot AI v7.2 starting on port %s", PORT)
693
+ if not HF_TOKEN:
694
+ logger.warning("HF_TOKEN not set. Add HF_TOKEN to Space Secrets.")
695
+ if not HF_MODEL:
696
+ logger.warning("HF_MODEL not set. Add HF_MODEL to Space Secrets.")
697
+ # start flask
698
+ app.run(host="0.0.0.0", port=PORT)