kcrobot102 commited on
Commit
b64c02d
·
verified ·
1 Parent(s): 5071c76
Files changed (1) hide show
  1. app.py +360 -430
app.py CHANGED
@@ -1,468 +1,398 @@
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- KCrobot AI app.py v2.0 MAX FINAL
5
- - Run on Cloud Run / HuggingFace / local
6
- - Use Gemini cloud as brain (GEMINI_API_KEY, GEMINI_MODEL)
7
- - TTS: ElevenLabs -> gTTS fallback
8
- - Endpoints for ESP32: /api/chat, /api/chat_audio, /play_latest, /notify
9
- - Save history & usage in data/
10
- """
11
- from __future__ import annotations
12
  import os
13
- import io
14
- import json
15
- import time
16
- import logging
17
- import pathlib
18
  import tempfile
19
- import base64
20
- from typing import Tuple, Optional, Dict, Any
21
- from datetime import datetime
22
- from flask import Flask, request, jsonify, send_file, render_template_string
23
 
24
  # TTS
25
  from gtts import gTTS
26
-
27
- # Optional Google Generative AI SDK (newer)
28
- GENAI_IMPORTED = False
29
- GENAI_CLIENT = None
30
  try:
31
- # try new google-genai
32
  from google import genai
33
- GENAI_IMPORTED = True
34
- # we will create client lazily with key
35
  except Exception:
36
- try:
37
- # try older google.generativeai for fallback
38
- import google.generativeai as genai_old
39
- GENAI_IMPORTED = True
40
- except Exception:
41
- GENAI_IMPORTED = False
42
 
43
  # HTTP
44
  import requests
45
 
46
- # Logging
47
- logging.basicConfig(level=logging.INFO)
48
- logger = logging.getLogger("kcrobot_v2")
49
-
50
- # -------------------------
51
- # CONFIG (via ENV / secrets)
52
- # -------------------------
53
- ADMIN_TOKEN = os.getenv("ADMIN_TOKEN", "") # optional: protect admin endpoints if used
54
- CFG = {
55
- "GEMINI_API_KEY": os.getenv("GEMINI_API_KEY", "").strip(),
56
- "GEMINI_MODEL": os.getenv("GEMINI_MODEL", "models/gemini-2.5-flash").strip(),
57
- "TELEGRAM_TOKEN": os.getenv("TELEGRAM_TOKEN", "").strip(),
58
- "TELEGRAM_CHAT_ID": os.getenv("TELEGRAM_CHAT_ID", "").strip(),
59
- "ELEVEN_API_KEY": os.getenv("ELEVEN_API_KEY", "").strip(),
60
- "ELEVEN_VOICE_ID": os.getenv("ELEVEN_VOICE_ID", "").strip(),
61
- }
62
- # Server settings
63
- PORT = int(os.getenv("PORT", 7860))
64
- HOST = os.getenv("HOST", "0.0.0.0")
65
-
66
- # -------------------------
67
- # STORAGE & FILES
68
- # -------------------------
69
- BASE = pathlib.Path.cwd()
70
- DATA_DIR = BASE / "data"
71
- DATA_DIR.mkdir(exist_ok=True)
72
- HISTORY_FILE = DATA_DIR / "history.json"
73
- USAGE_FILE = DATA_DIR / "usage.json"
74
- LATEST_MP3 = DATA_DIR / "latest_reply.mp3"
75
-
76
- # -------------------------
77
- # Helpers: JSON safe
78
- # -------------------------
79
- def load_json_safe(path: pathlib.Path, default):
80
- try:
81
- if path.exists():
82
- return json.loads(path.read_text(encoding="utf-8"))
83
- except Exception:
84
- logger.exception("load_json_safe failed for %s", path)
85
- return default
86
 
87
- def save_json_safe(path: pathlib.Path, data) -> bool:
 
 
 
 
 
 
 
88
  try:
89
- path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
90
- return True
91
- except Exception:
92
- logger.exception("save_json_safe failed for %s", path)
93
- return False
94
-
95
- # -------------------------
96
- # USAGE & HISTORY
97
- # -------------------------
98
- def today_str() -> str:
99
- return datetime.utcnow().strftime("%Y-%m-%d")
100
-
101
- def load_usage():
102
- default = {"date": today_str(), "requests_today": 0, "tokens_month": 0}
103
- return load_json_safe(USAGE_FILE, default)
104
-
105
- def save_usage(u):
106
- return save_json_safe(USAGE_FILE, u)
107
-
108
- def increment_usage(tokens=1):
109
- u = load_usage()
110
- if u.get("date") != today_str():
111
- u = {"date": today_str(), "requests_today": 0, "tokens_month": u.get("tokens_month", 0)}
112
- u["requests_today"] = u.get("requests_today", 0) + 1
113
- u["tokens_month"] = u.get("tokens_month", 0) + int(tokens)
114
- save_usage(u)
115
-
116
- def append_history(entry: dict):
117
- h = load_json_safe(HISTORY_FILE, [])
118
- h.append(entry)
119
- if len(h) > 1000:
120
- h = h[-1000:]
121
- save_json_safe(HISTORY_FILE, h)
122
-
123
- # -------------------------
124
- # Language detection (simple)
125
- # -------------------------
126
- VIET_CHAR_RE = __import__("re").compile(
127
- r"[àáạảãâầấậẩẫăằắặẳẵđèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹ]",
128
- __import__("re").I
129
  )
130
- def detect_lang(text: str) -> str:
131
- if not text or not isinstance(text, str):
132
- return "en"
133
- if VIET_CHAR_RE.search(text):
134
- return "vi"
135
- low = text.lower()
136
- en_signs = ["hello", "what", "how", "please", "thank", "today", "weather"]
137
- for w in en_signs:
138
- if w in low:
139
- return "en"
140
- return "en"
141
-
142
- # -------------------------
143
- # Gemini wrapper (new client preferred)
144
- # -------------------------
145
- def ensure_genai_client():
146
- global GENAI_CLIENT
147
- if GENAI_CLIENT:
148
- return GENAI_CLIENT
149
- key = CFG.get("GEMINI_API_KEY") or ""
150
- if not key:
151
- logger.warning("No GEMINI key configured.")
152
- return None
153
  try:
154
- # new style
155
- try:
156
- from google import genai as genai_new
157
- GENAI_CLIENT = genai_new.Client(api_key=key)
158
- logger.info("Gemini client (new) configured.")
159
- return GENAI_CLIENT
160
- except Exception:
161
- pass
162
- # older google.generativeai style
163
  try:
164
- import google.generativeai as genai_old
165
- genai_old.configure(api_key=key)
166
- GENAI_CLIENT = genai_old
167
- logger.info("Gemini client (old) configured.")
168
- return GENAI_CLIENT
169
  except Exception:
170
- pass
171
  except Exception:
172
- logger.exception("ensure_genai_client failed")
173
- return None
174
-
175
- def call_gemini(prompt: str, temperature: float = 0.2, max_tokens: int = 512) -> Dict[str, Any]:
176
- key = CFG.get("GEMINI_API_KEY") or ""
177
- model = CFG.get("GEMINI_MODEL") or "models/gemini-2.5-flash"
178
- if not key:
179
- return {"ok": False, "error": "Gemini API key not configured"}
180
- client = ensure_genai_client()
181
- # try SDK client first
182
- if client:
183
  try:
184
- # new Client API (genai.Client)
185
- if hasattr(client, "models") and hasattr(client.models, "generate_content"):
186
- resp = client.models.generate_content(model=model, contents=prompt)
187
- # Response may have text attribute
188
- text = getattr(resp, "text", None)
189
- if text:
190
- return {"ok": True, "text": text}
191
- # fallback: try output field
192
- if isinstance(resp, dict):
193
- return {"ok": True, "text": json.dumps(resp)[:2000]}
194
- return {"ok": True, "text": str(resp)}
195
- # older google.generativeai
196
- if hasattr(client, "generate_text") or hasattr(client, "generate"):
197
- # try older generate_text
198
- try:
199
- resp = client.generate_text(model=model, prompt=prompt, temperature=temperature)
200
- text = getattr(resp, "text", None) or resp.get("candidates", [{}])[0].get("content", "")
201
- return {"ok": True, "text": text}
202
- except Exception:
203
- pass
204
- except Exception:
205
- logger.exception("Gemini SDK call failed; will try REST fallback.")
206
- # REST fallback (v1beta)
207
- try:
208
- url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generate"
209
- headers = {"Content-Type": "application/json"}
210
- payload = {
211
- "prompt": {
212
- "messages": [
213
- {"author": "system", "content": {"text": "You are a helpful assistant."}},
214
- {"author": "user", "content": {"text": prompt}}
215
- ]
216
- },
217
- "maxOutputTokens": max_tokens,
218
- "temperature": temperature
219
- }
220
- r = requests.post(url, params={"key": key}, json=payload, headers=headers, timeout=30)
221
- if r.status_code >= 400:
222
- return {"ok": False, "error": f"HTTP {r.status_code}: {r.text}"}
223
- j = r.json()
224
- # parse candidate content
225
- cand = j.get("candidates")
226
- if cand and isinstance(cand, list):
227
- c0 = cand[0]
228
- content = c0.get("content")
229
- if isinstance(content, list):
230
- parts = []
231
- for c in content:
232
- if isinstance(c, dict) and "text" in c:
233
- parts.append(c["text"])
234
- if parts:
235
- return {"ok": True, "text": "".join(parts)}
236
- # fallback to output field
237
- output = j.get("output")
238
- if isinstance(output, str):
239
- return {"ok": True, "text": output}
240
- # else return truncated json
241
- return {"ok": True, "text": json.dumps(j)[:2000]}
242
- except Exception as e:
243
- logger.exception("Gemini REST error")
244
- return {"ok": False, "error": str(e)}
245
-
246
- # -------------------------
247
- # TTS (ElevenLabs -> gTTS fallback)
248
- # -------------------------
249
- def tts_elevenlabs_bytes(text: str, voice_id: str, api_key: str) -> bytes:
250
- url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
251
- headers = {"xi-api-key": api_key, "Content-Type": "application/json"}
252
- payload = {"text": text, "voice_settings": {"stability": 0.6, "similarity_boost": 0.75}}
253
- r = requests.post(url, json=payload, headers=headers, timeout=30)
254
- r.raise_for_status()
255
- return r.content
256
-
257
- def tts_gtts_bytes(text: str, lang: str = "vi") -> bytes:
258
- tts = gTTS(text=text, lang=lang)
259
- bio = io.BytesIO()
260
- tts.write_to_fp(bio)
261
- bio.seek(0)
262
- return bio.read()
263
-
264
- def synthesize_and_save(answer: str, lang_hint: str = "vi") -> Tuple[bool, str]:
265
  try:
266
- mp3_bytes = None
267
- if CFG.get("ELEVEN_API_KEY") and CFG.get("ELEVEN_VOICE_ID"):
268
- try:
269
- mp3_bytes = tts_elevenlabs_bytes(answer, CFG["ELEVEN_VOICE_ID"], CFG["ELEVEN_API_KEY"])
270
- logger.info("TTS: used ElevenLabs")
271
- except Exception:
272
- logger.exception("ElevenLabs failed -> fallback gTTS")
273
- mp3_bytes = None
274
- if mp3_bytes is None:
275
- lang_code = "vi" if lang_hint.startswith("vi") else "en"
276
- mp3_bytes = tts_gtts_bytes(answer, lang=lang_code)
277
- logger.info("TTS: used gTTS")
278
- with open(LATEST_MP3, "wb") as f:
279
- f.write(mp3_bytes)
280
- return True, str(LATEST_MP3)
281
  except Exception as e:
282
- logger.exception("synthesize_and_save failed")
283
- return False, f"TTS error: {e}"
284
-
285
- # -------------------------
286
- # Telegram notify
287
- # -------------------------
288
- def send_telegram_message(text: str) -> bool:
289
- token = CFG.get("TELEGRAM_TOKEN") or ""
290
- cid = CFG.get("TELEGRAM_CHAT_ID") or ""
291
- if not token or not cid:
292
- logger.info("Telegram not configured.")
293
- return False
294
  try:
295
- url = f"https://api.telegram.org/bot{token}/sendMessage"
296
- requests.post(url, json={"chat_id": cid, "text": text}, timeout=8)
297
- return True
 
 
298
  except Exception:
299
- logger.exception("send_telegram_message failed")
300
- return False
 
 
 
301
 
302
- # -------------------------
303
- # Flask app & UI
304
- # -------------------------
305
- app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
306
 
307
- INDEX_HTML = """
308
- <!doctype html>
309
- <html>
310
- <head><meta charset="utf-8"><title>KCrobot AI V2 Max</title>
311
- <style>
312
- body{font-family:Arial;background:#071225;color:#fff;padding:20px}
313
- .container{max-width:900px;margin:0 auto}
314
- textarea{width:100%;padding:10px;border-radius:8px;background:#0b1221;color:#fff;border:1px solid #134}
315
- button{padding:10px 14px;border-radius:8px;background:#0ea5ff;color:#012;border:none;cursor:pointer}
316
- #resp{white-space:pre-wrap;margin-top:12px;background:#071025;padding:10px;border-radius:6px}
317
- audio{margin-top:12px}
318
- .small{font-size:0.9rem;color:#9fb3c8}
319
- </style>
320
- </head>
321
- <body>
322
- <div class="container">
323
- <h1>🤖 KCrobot AI — V2 MAX</h1>
324
- <p class="small">Model: {{model}} — Gemini Key: {{gemini}} — Telegram: {{tg}}</p>
325
- <textarea id="q" rows="4" placeholder="Nhập tiếng Việt / English..."></textarea>
326
- <p>
327
- <label><input id="voice" type="checkbox" checked> Voice ON</label>
328
- <button onclick="send()">Gửi & Nghe</button>
329
- </p>
330
- <div id="resp"></div>
331
- <audio id="audio" controls style="display:none"></audio>
332
- <script>
333
- async function send(){
334
- const q = document.getElementById('q').value;
335
- const voice = document.getElementById('voice').checked;
336
- if(!q){ alert('Nhập nội dung'); return; }
337
- document.getElementById('resp').innerText = '⏳ Đang xử lý...';
338
- const res = await fetch('/api/chat', {
339
- method: 'POST', headers: {'Content-Type':'application/json'},
340
- body: JSON.stringify({q: q, voice: voice})
341
- });
342
- const j = await res.json();
343
- if(j.error){ document.getElementById('resp').innerText = 'Error: ' + j.error; return; }
344
- document.getElementById('resp').innerText = j.answer;
345
- if(j.play_url){
346
- const audio = document.getElementById('audio');
347
- audio.src = j.play_url + '?t=' + Date.now();
348
- audio.style.display='block';
349
- audio.play();
350
- }
351
- }
352
- </script>
353
- </div>
354
- </body>
355
- </html>
356
- """
357
 
 
358
  @app.route("/", methods=["GET"])
359
- def root():
360
- model = CFG.get("GEMINI_MODEL","n/a")
361
- gem = bool(CFG.get("GEMINI_API_KEY"))
362
- tg = bool(CFG.get("TELEGRAM_TOKEN") and CFG.get("TELEGRAM_CHAT_ID"))
363
- return render_template_string(INDEX_HTML, model=model, gemini=("✅" if gem else "❌"), tg=("✅" if tg else "❌"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  @app.route("/api/chat", methods=["POST"])
366
  def api_chat():
367
- data = request.get_json(silent=True) or {}
368
- q = data.get("q") or data.get("question") or ""
369
- voice_on = bool(data.get("voice", True))
370
- if not q or not str(q).strip():
371
- return jsonify({"error":"missing 'q'"}), 400
372
- lang = detect_lang(q)
373
- prompt = (f"Bạn là robot trợ lý, trả lời bằng tiếng Việt tự nhiên: {q}" if lang=="vi"
374
- else f"You are a helpful assistant. Answer in natural English: {q}")
375
- gem = call_gemini(prompt)
376
- if not gem.get("ok"):
377
- answer = f"[Gemini error] {gem.get('error')}"
378
- else:
379
- answer = gem.get("text","")
380
- increment_usage(max(1, len(answer.split())))
381
- append_history({"ts": time.time(), "q": q, "answer": answer, "lang": lang})
382
- play_url = None
383
- if voice_on:
384
- ok, path_or_err = synthesize_and_save(answer, lang_hint=lang)
385
- if ok:
386
- play_url = "/play_latest"
387
- # Telegram notify in background
388
  try:
389
- # keep simple: send summary
390
- if CFG.get("TELEGRAM_TOKEN") and CFG.get("TELEGRAM_CHAT_ID"):
391
- # non-blocking
392
- import threading
393
- threading.Thread(target=send_telegram_message, args=(f"Q: {q}\nA: {answer}",)).start()
394
- except Exception:
395
- pass
396
- resp = {"answer": answer}
397
- if play_url:
398
- resp["play_url"] = play_url
399
- return jsonify(resp)
400
-
401
- @app.route("/api/chat_audio", methods=["POST"])
402
- def api_chat_audio():
403
- # Accept wav upload as raw body or file named 'file'
404
- wav_bytes = None
405
- if 'file' in request.files:
406
- wav_bytes = request.files['file'].read()
407
- else:
408
- wav_bytes = request.get_data()
409
- if not wav_bytes:
410
- return jsonify({"error":"no audio provided"}),400
411
- # If client provides X-Text header (pre-transcribed text), use it
412
- provided_text = request.headers.get("X-Text","")
413
- if provided_text:
414
- q = provided_text
415
- else:
416
- # Server STT not implemented in this build
417
- return jsonify({"error":"STT not enabled on server. Please send text or add STT implementation."}), 501
418
- lang = detect_lang(q)
419
- prompt = (f"Bạn là robot trợ lý, trả lời bằng tiếng Việt tự nhiên: {q}" if lang=="vi"
420
- else f"You are a helpful assistant. Answer in natural English: {q}")
421
- gem = call_gemini(prompt)
422
- if not gem.get("ok"):
423
- answer = f"[Gemini error] {gem.get('error')}"
424
- else:
425
- answer = gem.get("text","")
426
- append_history({"ts": time.time(), "q": q, "answer": answer, "lang": lang})
427
- ok, path_or_err = synthesize_and_save(answer, lang_hint=lang)
428
- if ok:
429
- return jsonify({"question": q, "answer": answer, "play_url": "/play_latest"})
430
- return jsonify({"error":"tts_failed", "details": path_or_err}), 500
431
-
432
- @app.route("/play_latest", methods=["GET"])
433
- def play_latest():
434
- if not LATEST_MP3.exists():
435
- return jsonify({"error":"no audio"}), 404
436
- return send_file(str(LATEST_MP3), mimetype="audio/mpeg")
437
-
438
- @app.route("/notify", methods=["POST"])
439
- def notify():
440
- data = request.get_json(silent=True) or {}
441
- event = data.get("event","event")
442
- msg = data.get("msg","")
443
  try:
444
- import threading
445
- threading.Thread(target=send_telegram_message, args=(f"[Robot Notify] {event}: {msg}",)).start()
446
- except Exception:
447
- pass
448
- return jsonify({"sent": True})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
 
450
- @app.route("/history", methods=["GET"])
451
- def history():
452
- h = load_json_safe(HISTORY_FILE, [])
453
- return jsonify(h)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
 
455
- @app.route("/health", methods=["GET"])
456
- def health():
457
- return jsonify({"status":"ok","time":time.time(),"model":CFG.get("GEMINI_MODEL","n/a")})
 
 
 
 
 
 
 
 
 
 
 
 
458
 
459
- # -------------------------
460
- # Startup
461
- # -------------------------
462
  if __name__ == "__main__":
463
- # ensure history/usage exist
464
- load_json_safe(HISTORY_FILE, [])
465
- load_usage()
466
- logger.info("KCrobot v2 starting. Gemini: %s, Eleven: %s, Telegram: %s",
467
- bool(CFG.get("GEMINI_API_KEY")), bool(CFG.get("ELEVEN_API_KEY")), bool(CFG.get("TELEGRAM_TOKEN")))
468
- app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
 
1
  #!/usr/bin/env python3
2
+ # KC ROBOT AI - app.py (Tâm hồn) v3.0MAX PRO
3
+ # - Accepts audio uploads (/api/audio) or text (/api/chat)
4
+ # - STT via SpeechRecognition (Google Web Speech) for short WAV
5
+ # - Calls Gemini (if API key provided) to generate reply
6
+ # - Cleans reply (remove punctuation/emoji) before TTS
7
+ # - Synthesizes MP3 via gTTS and serves via /tts-file/<id>
8
+ # - device command queue: /device/commands (ESP32 polls)
9
+ # - forwards sensor events to Telegram via /api/sensor or /api/forward-telegram
 
 
10
  import os
11
+ import re
12
+ import uuid
13
+ import shutil
14
+ import traceback
 
15
  import tempfile
16
+ from pathlib import Path
17
+ from flask import Flask, request, jsonify, send_file, abort
18
+ from dotenv import load_dotenv
 
19
 
20
  # TTS
21
  from gtts import gTTS
22
+ # STT
23
+ import speech_recognition as sr
24
+ # AI client (optional)
 
25
  try:
 
26
  from google import genai
 
 
27
  except Exception:
28
+ genai = None
 
 
 
 
 
29
 
30
  # HTTP
31
  import requests
32
 
33
+ load_dotenv()
34
+
35
+ # Config
36
+ GEMINI_API_KEY_ENV = os.getenv("GEMINI_API_KEY", "")
37
+ GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
38
+ TELEGRAM_TOKEN_ENV = os.getenv("TELEGRAM_TOKEN", "")
39
+ TELEGRAM_CHAT_ID_ENV = os.getenv("TELEGRAM_CHAT_ID", "")
40
+ PORT = int(os.getenv("PORT", "8080"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ TMP_DIR = Path("/tmp/kcrobot_audio")
43
+ TMP_DIR.mkdir(parents=True, exist_ok=True)
44
+
45
+ app = Flask(__name__)
46
+
47
+ # Initialize Gemini client if available
48
+ gemini_client = None
49
+ if GEMINI_API_KEY_ENV and genai is not None:
50
  try:
51
+ gemini_client = genai.Client(api_key=GEMINI_API_KEY_ENV)
52
+ except Exception as e:
53
+ print("Gemini init error:", e)
54
+ gemini_client = None
55
+
56
+ # In-memory commands queue per device_id (simple)
57
+ DEVICE_COMMANDS = {} # device_id -> [ {cmd}, ... ]
58
+
59
+ # Regex / utils for cleaning text (remove punctuation/emoji/digits)
60
+ _EMOJI_RE = re.compile(
61
+ "["
62
+ "\U0001F600-\U0001F64F"
63
+ "\U0001F300-\U0001F5FF"
64
+ "\U0001F680-\U0001F6FF"
65
+ "\U00002600-\U000026FF"
66
+ "\U00002700-\U000027BF"
67
+ "\U0001F1E6-\U0001F1FF"
68
+ "]+", flags=re.UNICODE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  )
70
+
71
+ _PUNCT_WORDS = [
72
+ r"\bdấu\s*chấm\b", r"\bchấm\b",
73
+ r"\bdấu\s*phẩy\b", r"\bphẩy\b", r"\bphay\b",
74
+ r"\bdấu\s*sao\b", r"\bsao\b",
75
+ r"\bdấu\s*hỏi\b", r"\bhỏi\b",
76
+ r"\bdấu\s*hai\s*chấm\b", r"\bcomma\b", r"\bdot\b", r"\bperiod\b"
77
+ ]
78
+
79
+ def clean_text_keep_letters(text: str) -> str:
80
+ if not text:
81
+ return ""
82
+ t = str(text)
83
+ for p in _PUNCT_WORDS:
84
+ t = re.sub(p, " ", t, flags=re.IGNORECASE)
85
+ t = _EMOJI_RE.sub(" ", t)
86
+ # keep letters (including Vietnamese range) and spaces
87
+ t = re.sub(r"[^A-Za-zÀ-ỹ\s]", " ", t)
88
+ t = re.sub(r"\s+", " ", t).strip()
89
+ return t
90
+
91
+ def detect_language(text: str) -> str:
 
92
  try:
93
+ # use simple heuristic: if contains Vietnamese diacritics -> vi
94
+ if re.search(r"[àáảãạăắằẳẵặâầấẩẫậđèéẻẽẹêềếểễệìíỉĩịòóỏõọôồốổỗộơờớởỡợùúủũụưừứửữựỳýỷỹỵ]", text, flags=re.IGNORECASE):
95
+ return "vi"
96
+ # fallback to langdetect if installed
 
 
 
 
 
97
  try:
98
+ from langdetect import detect
99
+ return detect(text)
 
 
 
100
  except Exception:
101
+ return "en"
102
  except Exception:
103
+ return "en"
104
+
105
+ def call_gemini(prompt: str, api_key_override: str = None) -> str:
106
+ """
107
+ Call Gemini to generate reply. If no Gemini client available, return fallback message.
108
+ """
109
+ client = None
110
+ if api_key_override and genai is not None:
 
 
 
111
  try:
112
+ client = genai.Client(api_key=api_key_override)
113
+ except Exception as e:
114
+ print("Gemini override init error:", e)
115
+ client = None
116
+ elif gemini_client:
117
+ client = gemini_client
118
+
119
+ if client is None:
120
+ # fallback: simple echo or canned responses
121
+ # Keep it useful: return acknowledgement and simple help
122
+ return "Xin chào! Mình là KCrobot. (Gemini chưa cấu hình trên server.)"
123
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  try:
125
+ response = client.models.generate_content(model=GEMINI_MODEL, contents=prompt)
126
+ if hasattr(response, "text"):
127
+ return response.text.strip()
128
+ if isinstance(response, dict) and "text" in response:
129
+ return response["text"].strip()
130
+ return str(response)
 
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
+ traceback.print_exc()
133
+ return f"⚠️ Gemini error: {e}"
134
+
135
+ def synthesize_to_mp3_file(text: str, lang_hint: str = None) -> Path:
136
+ cleaned = clean_text_keep_letters(text)
137
+ if not cleaned:
138
+ raise ValueError("No text to synthesize after cleaning.")
139
+ # detect lang
140
+ lang = "en"
 
 
 
141
  try:
142
+ if lang_hint:
143
+ lang = lang_hint
144
+ else:
145
+ lg = detect_language(cleaned)
146
+ lang = "vi" if lg.startswith("vi") else "en"
147
  except Exception:
148
+ lang = "en"
149
+ file_path = TMP_DIR / f"{uuid.uuid4().hex}.mp3"
150
+ tts = gTTS(text=cleaned, lang=lang)
151
+ tts.save(str(file_path))
152
+ return file_path
153
 
154
+ def transcribe_wav_file(path: Path) -> str:
155
+ r = sr.Recognizer()
156
+ with sr.AudioFile(str(path)) as source:
157
+ audio = r.record(source)
158
+ try:
159
+ # Use Google's free web speech API (requires internet, short audio)
160
+ text = r.recognize_google(audio, language="vi-VN") if detect_language(source= None) == "vi" else r.recognize_google(audio)
161
+ # Note: language param detection is heuristic above, but recognize_google fallback works reasonably
162
+ return text
163
+ except sr.UnknownValueError:
164
+ return ""
165
+ except Exception as e:
166
+ print("STT error:", e)
167
+ return ""
168
 
169
+ # Utility to save uploaded file bytes to temp wav path
170
+ def save_upload_to_tempwav(file_storage) -> Path:
171
+ tmp = TMP_DIR / f"{uuid.uuid4().hex}.wav"
172
+ file_storage.save(str(tmp))
173
+ return tmp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ # endpoint: health
176
  @app.route("/", methods=["GET"])
177
+ def index():
178
+ return {"status":"KCrobot AI mind running", "gemini_configured": bool(gemini_client is not None)}
179
+
180
+ # endpoint: receive raw audio file from ESP32 (multipart/form-data name="file")
181
+ # server will STT -> call Gemini -> synthesize TTS -> return reply & audio_url
182
+ @app.route("/api/audio", methods=["POST"])
183
+ def api_audio():
184
+ try:
185
+ if 'file' not in request.files:
186
+ return jsonify({"error":"missing file field"}), 400
187
+ f = request.files['file']
188
+ # save to temp
189
+ wav_path = save_upload_to_tempwav(f)
190
+ # optionally convert with pydub if not proper format (here assume WAV 16k/16bit)
191
+ # transcribe
192
+ transcript = ""
193
+ try:
194
+ r = sr.Recognizer()
195
+ with sr.AudioFile(str(wav_path)) as source:
196
+ audio = r.record(source)
197
+ # try Vietnamese first, then english
198
+ try:
199
+ transcript = r.recognize_google(audio, language="vi-VN")
200
+ except Exception:
201
+ try:
202
+ transcript = r.recognize_google(audio, language="en-US")
203
+ except Exception:
204
+ transcript = ""
205
+ except Exception as e:
206
+ print("STT pipeline error:", e)
207
+ transcript = ""
208
+
209
+ # choose msg for Gemini: if no transcript, fallback to asking generic
210
+ if not transcript:
211
+ return jsonify({"error":"could not transcribe audio"}), 200
212
+
213
+ # optional: gemini key may be passed in payload
214
+ gemini_key = request.form.get("gemini_api_key") or request.json.get("gemini_api_key") if request.is_json else None
215
 
216
+ reply = call_gemini(transcript, api_key_override=gemini_key)
217
+ # parse relay commands from reply (simple)
218
+ commands = parse_relay_commands(reply)
219
+
220
+ # synthesize tts and return audio_url
221
+ try:
222
+ tts_file = synthesize_to_mp3_file(reply)
223
+ file_id = tts_file.stem
224
+ base = request.url_root.rstrip("/")
225
+ audio_url = f"{base}/tts-file/{file_id}"
226
+ except Exception as e:
227
+ print("TTS error:", e)
228
+ audio_url = ""
229
+
230
+ # cleanup uploaded wav
231
+ try:
232
+ wav_path.unlink()
233
+ except Exception:
234
+ pass
235
+
236
+ # if any device commands detected, push to DEVICE_COMMANDS queue
237
+ if commands:
238
+ device_id = request.form.get("device_id") or "esp32_default"
239
+ DEVICE_COMMANDS.setdefault(device_id, []).extend(commands)
240
+
241
+ return jsonify({"reply": reply, "clean_text": clean_text_keep_letters(reply), "audio_url": audio_url, "commands": commands}), 200
242
+
243
+ except Exception as e:
244
+ traceback.print_exc()
245
+ return jsonify({"error": str(e)}), 500
246
+
247
+ # endpoint: accept text and return reply + audio_url
248
  @app.route("/api/chat", methods=["POST"])
249
  def api_chat():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  try:
251
+ data = request.get_json(force=True, silent=True) or {}
252
+ msg = data.get("message", "") or ""
253
+ if not msg:
254
+ return jsonify({"error":"missing message"}), 400
255
+ gemini_key = data.get("gemini_api_key") or None
256
+ reply = call_gemini(msg, api_key_override=gemini_key)
257
+ commands = parse_relay_commands(reply)
258
+ if commands:
259
+ device_id = data.get("device_id") or "esp32_default"
260
+ DEVICE_COMMANDS.setdefault(device_id, []).extend(commands)
261
+ # synthesize
262
+ try:
263
+ mp3path = synthesize_to_mp3_file(reply)
264
+ audio_url = request.url_root.rstrip("/") + f"/tts-file/{mp3path.stem}"
265
+ except Exception as e:
266
+ print("TTS fail:", e)
267
+ audio_url = ""
268
+ return jsonify({"reply": reply, "clean_text": clean_text_keep_letters(reply), "audio_url": audio_url, "commands": commands})
269
+ except Exception as e:
270
+ traceback.print_exc()
271
+ return jsonify({"error": str(e)}), 500
272
+
273
+ # serve mp3
274
+ @app.route("/tts-file/<file_id>", methods=["GET"])
275
+ def tts_file(file_id):
276
+ target = None
277
+ for f in TMP_DIR.iterdir():
278
+ if f.is_file() and f.stem == file_id:
279
+ target = f
280
+ break
281
+ if not target:
282
+ return abort(404)
283
+ return send_file(str(target), mimetype="audio/mpeg")
284
+
285
+ # sensor forwarding -> telegram
286
+ @app.route("/api/sensor", methods=["POST"])
287
+ def api_sensor():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  try:
289
+ data = request.get_json(force=True, silent=True) or {}
290
+ text = data.get("text") or f"Sensor event: {data}"
291
+ token = data.get("telegram_token") or TELEGRAM_TOKEN_ENV
292
+ chat = data.get("telegram_chat_id") or TELEGRAM_CHAT_ID_ENV
293
+ if token and chat:
294
+ try:
295
+ requests.post(f"https://api.telegram.org/bot{token}/sendMessage",
296
+ json={"chat_id": chat, "text": text}, timeout=6)
297
+ except Exception as e:
298
+ print("Telegram send error:", e)
299
+ return jsonify({"status":"ok"})
300
+ except Exception as e:
301
+ return jsonify({"error": str(e)}), 500
302
+
303
+ # forward telegram (ESP32 can call to avoid exposing token)
304
+ @app.route("/api/forward-telegram", methods=["POST"])
305
+ def api_forward_telegram():
306
+ try:
307
+ payload = request.get_json(force=True, silent=True) or {}
308
+ token = payload.get("token") or TELEGRAM_TOKEN_ENV
309
+ chat = payload.get("chat_id") or TELEGRAM_CHAT_ID_ENV
310
+ text = payload.get("text", "")
311
+ if not token or not chat or not text:
312
+ return jsonify({"error":"missing token/chat/text"}), 400
313
+ r = requests.post(f"https://api.telegram.org/bot{token}/sendMessage",
314
+ json={"chat_id": chat, "text": text}, timeout=6)
315
+ return jsonify({"ok": r.ok, "resp": r.text})
316
+ except Exception as e:
317
+ return jsonify({"error": str(e)}), 500
318
+
319
+ # device polls for commands (ESP32 calls periodically)
320
+ @app.route("/device/commands", methods=["GET"])
321
+ def device_commands_get():
322
+ device_id = request.args.get("device_id", "esp32_default")
323
+ cmds = DEVICE_COMMANDS.get(device_id, [])
324
+ # return and clear queue
325
+ DEVICE_COMMANDS[device_id] = []
326
+ return jsonify({"commands": cmds})
327
 
328
+ # simple admin endpoint to add a command to device queue (for testing)
329
+ @app.route("/device/commands", methods=["POST"])
330
+ def device_commands_post():
331
+ data = request.get_json(force=True, silent=True) or {}
332
+ device_id = data.get("device_id", "esp32_default")
333
+ cmd = data.get("command")
334
+ if not cmd:
335
+ return jsonify({"error":"missing command"}), 400
336
+ DEVICE_COMMANDS.setdefault(device_id, []).append(cmd)
337
+ return jsonify({"status":"queued", "device_id": device_id, "command": cmd})
338
+
339
+ # utility: parse relay commands from text (basic heuristic)
340
+ def parse_relay_commands(text: str):
341
+ """
342
+ Return list of commands like {"type":"relay","relay":1,"action":"on"}
343
+ Supports Vietnamese and English simple phrases:
344
+ - 'bật đèn 1', 'tắt đèn 2'
345
+ - 'turn on relay 1', 'turn off relay2'
346
+ """
347
+ cmds = []
348
+ t = text.lower()
349
+ # vietnamese on/off
350
+ m_on = re.findall(r"\b(bật|mở)\s+(?:đèn|relay)?\s*(\d+)", t)
351
+ m_off = re.findall(r"\b(tắt|đóng)\s+(?:đèn|relay)?\s*(\d+)", t)
352
+ for m in m_on:
353
+ try:
354
+ rnum = int(m[1])
355
+ cmds.append({"type":"relay","relay": rnum, "action":"on"})
356
+ except:
357
+ pass
358
+ for m in m_off:
359
+ try:
360
+ rnum = int(m[1])
361
+ cmds.append({"type":"relay","relay": rnum, "action":"off"})
362
+ except:
363
+ pass
364
+ # english
365
+ mon = re.findall(r"\bturn\s+on\s+(?:relay|light)?\s*(\d+)", t)
366
+ moff = re.findall(r"\bturn\s+off\s+(?:relay|light)?\s*(\d+)", t)
367
+ for m in mon:
368
+ try:
369
+ rnum = int(m)
370
+ cmds.append({"type":"relay","relay": rnum, "action":"on"})
371
+ except:
372
+ pass
373
+ for m in moff:
374
+ try:
375
+ rnum = int(m)
376
+ cmds.append({"type":"relay","relay": rnum, "action":"off"})
377
+ except:
378
+ pass
379
+ return cmds
380
 
381
+ # cleanup temp files older than TTL seconds
382
+ @app.route("/_cleanup_tmp", methods=["POST"])
383
+ def cleanup_tmp():
384
+ data = request.get_json(force=True, silent=True) or {}
385
+ ttl = int(data.get("ttl", 3600))
386
+ now = __import__('time').time()
387
+ removed = 0
388
+ for f in TMP_DIR.iterdir():
389
+ try:
390
+ if f.is_file() and (f.stat().st_mtime + ttl) < now:
391
+ f.unlink()
392
+ removed += 1
393
+ except Exception:
394
+ pass
395
+ return {"removed": removed}
396
 
 
 
 
397
  if __name__ == "__main__":
398
+ app.run(host="0.0.0.0", port=PORT)