initial commit
Browse files
app.py
CHANGED
|
@@ -2,21 +2,21 @@
|
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
"""
|
| 4 |
KCrobot AI — Vmax Final (voice-first)
|
| 5 |
-
- Default Gemini model: gemini-1.5-pro (
|
| 6 |
-
- Read secrets from
|
| 7 |
GEMINI_API_KEY, GEMINI_MODEL, TELEGRAM_TOKEN, TELEGRAM_CHAT_ID,
|
| 8 |
ELEVEN_API_KEY, ELEVEN_VOICE_ID, GOOGLE_SPEECH_LANG
|
| 9 |
- Voice-first: ESP32 uploads audio -> /chat_audio -> STT -> Gemini -> TTS -> MP3
|
| 10 |
-
- If STT libs
|
| 11 |
- Endpoints:
|
| 12 |
-
GET /
|
| 13 |
-
POST /chat_text
|
| 14 |
-
POST /chat_audio
|
| 15 |
-
POST /esp/send_text
|
| 16 |
-
GET /play_latest
|
| 17 |
-
GET /_history
|
| 18 |
-
POST /notify
|
| 19 |
-
GET /health
|
| 20 |
"""
|
| 21 |
from __future__ import annotations
|
| 22 |
|
|
@@ -32,33 +32,31 @@ import pathlib
|
|
| 32 |
from datetime import datetime
|
| 33 |
from typing import Tuple, Dict, Any, Optional
|
| 34 |
|
| 35 |
-
import requests
|
| 36 |
from flask import Flask, request, jsonify, send_file, render_template_string
|
|
|
|
| 37 |
|
| 38 |
-
# ---
|
| 39 |
USE_GENAI_SDK = False
|
| 40 |
GENAI_CLIENT = None
|
| 41 |
try:
|
| 42 |
-
# new official: from google import genai
|
| 43 |
from google import genai # type: ignore
|
| 44 |
USE_GENAI_SDK = True
|
| 45 |
except Exception:
|
| 46 |
try:
|
| 47 |
-
# fallback: google.generativeai (older package)
|
| 48 |
import google.generativeai as genai # type: ignore
|
| 49 |
USE_GENAI_SDK = True
|
| 50 |
except Exception:
|
| 51 |
genai = None
|
| 52 |
USE_GENAI_SDK = False
|
| 53 |
|
| 54 |
-
# TTS
|
| 55 |
try:
|
| 56 |
from gtts import gTTS # type: ignore
|
| 57 |
GTTS_AVAILABLE = True
|
| 58 |
except Exception:
|
| 59 |
GTTS_AVAILABLE = False
|
| 60 |
|
| 61 |
-
# Optional STT libs
|
| 62 |
try:
|
| 63 |
import speech_recognition as sr # type: ignore
|
| 64 |
from pydub import AudioSegment # type: ignore
|
|
@@ -69,7 +67,7 @@ except Exception:
|
|
| 69 |
STT_AVAILABLE = False
|
| 70 |
|
| 71 |
# -------------------------
|
| 72 |
-
#
|
| 73 |
# -------------------------
|
| 74 |
CFG = {
|
| 75 |
"GEMINI_API_KEY": os.getenv("GEMINI_API_KEY", "").strip(),
|
|
@@ -81,7 +79,7 @@ CFG = {
|
|
| 81 |
"GOOGLE_SPEECH_LANG": os.getenv("GOOGLE_SPEECH_LANG", "vi-VN").strip(),
|
| 82 |
}
|
| 83 |
|
| 84 |
-
# Model fallback list
|
| 85 |
MODEL_FALLBACK_LIST = [
|
| 86 |
CFG.get("GEMINI_MODEL") or "gemini-1.5-pro",
|
| 87 |
"gemini-1.5-flash",
|
|
@@ -89,7 +87,7 @@ MODEL_FALLBACK_LIST = [
|
|
| 89 |
"gemini-2.5-pro",
|
| 90 |
]
|
| 91 |
|
| 92 |
-
#
|
| 93 |
seen = set()
|
| 94 |
MODEL_FALLBACK = []
|
| 95 |
for m in MODEL_FALLBACK_LIST:
|
|
@@ -101,18 +99,18 @@ for m in MODEL_FALLBACK_LIST:
|
|
| 101 |
GEMINI_KEY = CFG.get("GEMINI_API_KEY") or ""
|
| 102 |
if USE_GENAI_SDK and GEMINI_KEY:
|
| 103 |
try:
|
| 104 |
-
# new SDK style
|
| 105 |
GENAI_CLIENT = genai.Client(api_key=GEMINI_KEY) # type: ignore
|
| 106 |
except Exception:
|
| 107 |
try:
|
| 108 |
-
# older style configure
|
| 109 |
genai.configure(api_key=GEMINI_KEY) # type: ignore
|
| 110 |
GENAI_CLIENT = genai # type: ignore
|
| 111 |
except Exception:
|
| 112 |
GENAI_CLIENT = None
|
| 113 |
|
| 114 |
# -------------------------
|
| 115 |
-
#
|
| 116 |
# -------------------------
|
| 117 |
BASE = pathlib.Path.cwd()
|
| 118 |
DATA_DIR = BASE / "data"
|
|
@@ -125,29 +123,29 @@ LATEST_MP3 = DATA_DIR / "latest_reply.mp3"
|
|
| 125 |
logging.basicConfig(level=logging.INFO)
|
| 126 |
logger = logging.getLogger("kcrobot_vmax")
|
| 127 |
|
| 128 |
-
#
|
| 129 |
try:
|
| 130 |
CFG_SNAPSHOT.write_text(json.dumps({k: bool(CFG.get(k)) for k in CFG}, indent=2), encoding="utf-8")
|
| 131 |
except Exception:
|
| 132 |
pass
|
| 133 |
|
| 134 |
# -------------------------
|
| 135 |
-
# Helpers:
|
| 136 |
# -------------------------
|
| 137 |
def load_json_safe(path: pathlib.Path, default):
|
| 138 |
try:
|
| 139 |
if path.exists():
|
| 140 |
return json.loads(path.read_text(encoding="utf-8"))
|
| 141 |
except Exception as e:
|
| 142 |
-
logger.debug("load_json_safe
|
| 143 |
return default
|
| 144 |
|
| 145 |
def save_json_safe(path: pathlib.Path, data):
|
| 146 |
try:
|
| 147 |
path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 148 |
return True
|
| 149 |
-
except Exception:
|
| 150 |
-
logger.exception("save_json_safe failed for %s", path)
|
| 151 |
return False
|
| 152 |
|
| 153 |
def today_str():
|
|
@@ -175,9 +173,13 @@ def append_history(entry: dict):
|
|
| 175 |
save_json_safe(HISTORY_FILE, h)
|
| 176 |
|
| 177 |
# -------------------------
|
| 178 |
-
# Language detection
|
| 179 |
# -------------------------
|
| 180 |
-
VIET_CHAR_RE = re.compile(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
def detect_lang(text: str) -> str:
|
| 182 |
if not text or not isinstance(text, str):
|
| 183 |
return "en"
|
|
@@ -191,17 +193,15 @@ def detect_lang(text: str) -> str:
|
|
| 191 |
return "en"
|
| 192 |
|
| 193 |
# -------------------------
|
| 194 |
-
# Gemini
|
| 195 |
# -------------------------
|
| 196 |
-
def
|
| 197 |
"""
|
| 198 |
-
Try
|
| 199 |
-
Returns (ok, text_or_error, http_status_or_0)
|
| 200 |
"""
|
| 201 |
-
#
|
| 202 |
if GENAI_CLIENT:
|
| 203 |
try:
|
| 204 |
-
# new SDK method
|
| 205 |
if hasattr(GENAI_CLIENT, "models") and hasattr(GENAI_CLIENT.models, "generate_content"):
|
| 206 |
resp = GENAI_CLIENT.models.generate_content(model=model, contents=prompt,
|
| 207 |
max_output_tokens=max_output_tokens, temperature=temperature) # type: ignore
|
|
@@ -209,14 +209,12 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
|
|
| 209 |
if txt:
|
| 210 |
return True, txt, 200
|
| 211 |
return True, str(resp), 200
|
| 212 |
-
# older compatibility
|
| 213 |
if hasattr(GENAI_CLIENT, "generate_content"):
|
| 214 |
resp = GENAI_CLIENT.generate_content(prompt, max_output_tokens=max_output_tokens, temperature=temperature)
|
| 215 |
if hasattr(resp, "text") and resp.text:
|
| 216 |
return True, resp.text, 200
|
| 217 |
return True, str(resp), 200
|
| 218 |
except requests.exceptions.HTTPError as he:
|
| 219 |
-
# SDK might raise requests HTTPError
|
| 220 |
try:
|
| 221 |
code = he.response.status_code
|
| 222 |
except Exception:
|
|
@@ -224,6 +222,7 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
|
|
| 224 |
return False, str(he), code
|
| 225 |
except Exception as e:
|
| 226 |
return False, str(e), 0
|
|
|
|
| 227 |
# REST fallback
|
| 228 |
key = CFG.get("GEMINI_API_KEY") or ""
|
| 229 |
if not key:
|
|
@@ -234,11 +233,11 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
|
|
| 234 |
"prompt": {
|
| 235 |
"messages": [
|
| 236 |
{"author": "system", "content": {"text": "You are a helpful assistant."}},
|
| 237 |
-
{"author": "user", "content": {"text": prompt}}
|
| 238 |
]
|
| 239 |
},
|
| 240 |
"maxOutputTokens": max_output_tokens,
|
| 241 |
-
"temperature": temperature
|
| 242 |
}
|
| 243 |
try:
|
| 244 |
r = requests.post(url, params={"key": key}, json=payload, headers=headers, timeout=30)
|
|
@@ -248,7 +247,7 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
|
|
| 248 |
if status >= 400:
|
| 249 |
return False, f"HTTP {status}: {r.text}", status
|
| 250 |
j = r.json()
|
| 251 |
-
# parse
|
| 252 |
cand = j.get("candidates")
|
| 253 |
if cand and isinstance(cand, list):
|
| 254 |
c0 = cand[0]
|
|
@@ -274,8 +273,7 @@ def gemini_call_with_model(model: str, prompt: str, max_output_tokens: int = 102
|
|
| 274 |
|
| 275 |
def call_gemini_with_fallbacks(prompt: str, max_output_tokens: int = 1024, temperature: float = 0.2) -> Dict[str, Any]:
|
| 276 |
"""
|
| 277 |
-
Try sequence
|
| 278 |
-
Return dict {"ok":bool, "text":str, "model":str, "error":...}
|
| 279 |
"""
|
| 280 |
if not CFG.get("GEMINI_API_KEY"):
|
| 281 |
return {"ok": False, "error": "Gemini API key not configured (set GEMINI_API_KEY in New secret)"}
|
|
@@ -283,20 +281,16 @@ def call_gemini_with_fallbacks(prompt: str, max_output_tokens: int = 1024, tempe
|
|
| 283 |
for model in MODEL_FALLBACK:
|
| 284 |
if not model:
|
| 285 |
continue
|
| 286 |
-
ok,
|
| 287 |
if ok:
|
| 288 |
-
return {"ok": True, "text":
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
if status not in (404, 0):
|
| 293 |
-
# for some HTTP errors we may stop trying (e.g., 403 unauthorized)
|
| 294 |
-
# but still try next model for robustness
|
| 295 |
-
pass
|
| 296 |
return {"ok": False, "error": f"All models failed. Last: {last_error}", "last": last_error}
|
| 297 |
|
| 298 |
# -------------------------
|
| 299 |
-
# TTS
|
| 300 |
# -------------------------
|
| 301 |
def tts_elevenlabs_bytes(text: str, voice_id: str, api_key: str) -> bytes:
|
| 302 |
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
|
@@ -308,7 +302,7 @@ def tts_elevenlabs_bytes(text: str, voice_id: str, api_key: str) -> bytes:
|
|
| 308 |
|
| 309 |
def tts_gtts_bytes(text: str, lang: str = "vi") -> bytes:
|
| 310 |
if not GTTS_AVAILABLE:
|
| 311 |
-
raise RuntimeError("gTTS not
|
| 312 |
t = gTTS(text=text, lang=lang)
|
| 313 |
bio = io.BytesIO()
|
| 314 |
t.write_to_fp(bio)
|
|
@@ -336,7 +330,7 @@ def synthesize_and_save(answer: str, lang_hint: str = "vi") -> Tuple[bool, str]:
|
|
| 336 |
return False, f"TTS error: {e}"
|
| 337 |
|
| 338 |
# -------------------------
|
| 339 |
-
# STT: server-side speech-to-text
|
| 340 |
# -------------------------
|
| 341 |
def speech_to_text(wav_bytes: bytes) -> Tuple[bool, str]:
|
| 342 |
if not STT_AVAILABLE:
|
|
@@ -352,7 +346,7 @@ def speech_to_text(wav_bytes: bytes) -> Tuple[bool, str]:
|
|
| 352 |
return False, str(e)
|
| 353 |
|
| 354 |
# -------------------------
|
| 355 |
-
# Telegram helper
|
| 356 |
# -------------------------
|
| 357 |
def send_telegram_message(text: str) -> bool:
|
| 358 |
token = CFG.get("TELEGRAM_TOKEN") or ""
|
|
@@ -384,12 +378,11 @@ textarea{width:100%;padding:10px;border-radius:8px;background:#061427;color:#fff
|
|
| 384 |
button{padding:10px 14px;border-radius:8px;background:#0ea5ff;color:#012;border:none;cursor:pointer}
|
| 385 |
#resp{white-space:pre-wrap;margin-top:12px;background:#021220;padding:12px;border-radius:6px}
|
| 386 |
.small{font-size:0.9rem;color:#9fb3c8}
|
| 387 |
-
</style></head>
|
| 388 |
-
<body>
|
| 389 |
<div class="container">
|
| 390 |
<h1>🤖 KCrobot AI — Vmax (Voice-first)</h1>
|
| 391 |
<p class="small">Model fallback: {{models}} — Gemini key: {{gemini}} — Telegram: {{tg}}</p>
|
| 392 |
-
<p>
|
| 393 |
<textarea id="q" rows="4" placeholder="Nhập tiếng Việt / English..."></textarea>
|
| 394 |
<p><label><input id="voice" type="checkbox" checked> Voice ON</label>
|
| 395 |
<button onclick="send()">Gửi & Nghe</button></p>
|
|
@@ -455,7 +448,6 @@ def chat_text():
|
|
| 455 |
if ok:
|
| 456 |
play_url = "/play_latest"
|
| 457 |
try:
|
| 458 |
-
# Telegram notify background
|
| 459 |
threading.Thread(target=send_telegram_message, args=(f"Q: {q}\nA: {answer}",)).start()
|
| 460 |
except Exception:
|
| 461 |
pass
|
|
@@ -466,19 +458,10 @@ def chat_text():
|
|
| 466 |
|
| 467 |
@app.route("/esp/send_text", methods=["POST"])
|
| 468 |
def esp_send_text():
|
| 469 |
-
# wrapper for chat_text for esp32 convenience
|
| 470 |
return chat_text()
|
| 471 |
|
| 472 |
@app.route("/chat_audio", methods=["POST"])
|
| 473 |
def chat_audio():
|
| 474 |
-
"""
|
| 475 |
-
Primary voice endpoint:
|
| 476 |
-
- Accept audio file field 'file' (wav) or raw bytes body
|
| 477 |
-
- Do STT (server-side) if available, else return 501
|
| 478 |
-
- Use Gemini to reply (model fallback)
|
| 479 |
-
- Synthesize reply to latest_reply.mp3 and return play_url
|
| 480 |
-
"""
|
| 481 |
-
# read bytes
|
| 482 |
wav_bytes = None
|
| 483 |
if 'file' in request.files:
|
| 484 |
f = request.files['file']
|
|
@@ -487,7 +470,6 @@ def chat_audio():
|
|
| 487 |
wav_bytes = request.get_data()
|
| 488 |
if not wav_bytes:
|
| 489 |
return jsonify({"error": "no audio provided"}), 400
|
| 490 |
-
# save for debugging
|
| 491 |
try:
|
| 492 |
ts = int(time.time())
|
| 493 |
(DATA_DIR / f"uploaded_{ts}.wav").write_bytes(wav_bytes)
|
|
@@ -549,10 +531,9 @@ def health():
|
|
| 549 |
})
|
| 550 |
|
| 551 |
# -------------------------
|
| 552 |
-
# Start
|
| 553 |
# -------------------------
|
| 554 |
if __name__ == "__main__":
|
| 555 |
-
# ensure files exist
|
| 556 |
load_json_safe(HISTORY_FILE, [])
|
| 557 |
load_json_safe(USAGE_FILE, {})
|
| 558 |
logger.info("KCrobot Vmax starting. Gemini key present: %s, SDK present: %s, STT available: %s",
|
|
|
|
| 2 |
# -*- coding: utf-8 -*-
|
| 3 |
"""
|
| 4 |
KCrobot AI — Vmax Final (voice-first)
|
| 5 |
+
- Default Gemini model: gemini-1.5-pro (fallbacks implemented)
|
| 6 |
+
- Read secrets from env (HF "New secret"):
|
| 7 |
GEMINI_API_KEY, GEMINI_MODEL, TELEGRAM_TOKEN, TELEGRAM_CHAT_ID,
|
| 8 |
ELEVEN_API_KEY, ELEVEN_VOICE_ID, GOOGLE_SPEECH_LANG
|
| 9 |
- Voice-first: ESP32 uploads audio -> /chat_audio -> STT -> Gemini -> TTS -> MP3
|
| 10 |
+
- If STT libs missing, /chat_audio returns 501 (server-side STT optional)
|
| 11 |
- Endpoints:
|
| 12 |
+
GET / -> simple web UI (chat secondary)
|
| 13 |
+
POST /chat_text -> {"q":"...","voice":true}
|
| 14 |
+
POST /chat_audio -> upload wav (multipart 'file' or raw bytes)
|
| 15 |
+
POST /esp/send_text-> wrapper for /chat_text
|
| 16 |
+
GET /play_latest -> latest_reply.mp3
|
| 17 |
+
GET /_history -> recent history
|
| 18 |
+
POST /notify -> forward to Telegram
|
| 19 |
+
GET /health -> health check
|
| 20 |
"""
|
| 21 |
from __future__ import annotations
|
| 22 |
|
|
|
|
| 32 |
from datetime import datetime
|
| 33 |
from typing import Tuple, Dict, Any, Optional
|
| 34 |
|
|
|
|
| 35 |
from flask import Flask, request, jsonify, send_file, render_template_string
|
| 36 |
+
import requests
|
| 37 |
|
| 38 |
+
# --- Attempt to import Google GenAI SDK (new) or older lib
|
| 39 |
USE_GENAI_SDK = False
|
| 40 |
GENAI_CLIENT = None
|
| 41 |
try:
|
| 42 |
+
# new official package pattern: `from google import genai`
|
| 43 |
from google import genai # type: ignore
|
| 44 |
USE_GENAI_SDK = True
|
| 45 |
except Exception:
|
| 46 |
try:
|
|
|
|
| 47 |
import google.generativeai as genai # type: ignore
|
| 48 |
USE_GENAI_SDK = True
|
| 49 |
except Exception:
|
| 50 |
genai = None
|
| 51 |
USE_GENAI_SDK = False
|
| 52 |
|
| 53 |
+
# --- TTS/STT libs (optional)
|
| 54 |
try:
|
| 55 |
from gtts import gTTS # type: ignore
|
| 56 |
GTTS_AVAILABLE = True
|
| 57 |
except Exception:
|
| 58 |
GTTS_AVAILABLE = False
|
| 59 |
|
|
|
|
| 60 |
try:
|
| 61 |
import speech_recognition as sr # type: ignore
|
| 62 |
from pydub import AudioSegment # type: ignore
|
|
|
|
| 67 |
STT_AVAILABLE = False
|
| 68 |
|
| 69 |
# -------------------------
|
| 70 |
+
# Config from env (HF New secret)
|
| 71 |
# -------------------------
|
| 72 |
CFG = {
|
| 73 |
"GEMINI_API_KEY": os.getenv("GEMINI_API_KEY", "").strip(),
|
|
|
|
| 79 |
"GOOGLE_SPEECH_LANG": os.getenv("GOOGLE_SPEECH_LANG", "vi-VN").strip(),
|
| 80 |
}
|
| 81 |
|
| 82 |
+
# Model fallback list: prefer configured model, then alternatives
|
| 83 |
MODEL_FALLBACK_LIST = [
|
| 84 |
CFG.get("GEMINI_MODEL") or "gemini-1.5-pro",
|
| 85 |
"gemini-1.5-flash",
|
|
|
|
| 87 |
"gemini-2.5-pro",
|
| 88 |
]
|
| 89 |
|
| 90 |
+
# dedupe keep order
|
| 91 |
seen = set()
|
| 92 |
MODEL_FALLBACK = []
|
| 93 |
for m in MODEL_FALLBACK_LIST:
|
|
|
|
| 99 |
GEMINI_KEY = CFG.get("GEMINI_API_KEY") or ""
|
| 100 |
if USE_GENAI_SDK and GEMINI_KEY:
|
| 101 |
try:
|
| 102 |
+
# Try new SDK client style
|
| 103 |
GENAI_CLIENT = genai.Client(api_key=GEMINI_KEY) # type: ignore
|
| 104 |
except Exception:
|
| 105 |
try:
|
| 106 |
+
# fallback older style configure
|
| 107 |
genai.configure(api_key=GEMINI_KEY) # type: ignore
|
| 108 |
GENAI_CLIENT = genai # type: ignore
|
| 109 |
except Exception:
|
| 110 |
GENAI_CLIENT = None
|
| 111 |
|
| 112 |
# -------------------------
|
| 113 |
+
# Storage & logging
|
| 114 |
# -------------------------
|
| 115 |
BASE = pathlib.Path.cwd()
|
| 116 |
DATA_DIR = BASE / "data"
|
|
|
|
| 123 |
logging.basicConfig(level=logging.INFO)
|
| 124 |
logger = logging.getLogger("kcrobot_vmax")
|
| 125 |
|
| 126 |
+
# save non-secret snapshot
|
| 127 |
try:
|
| 128 |
CFG_SNAPSHOT.write_text(json.dumps({k: bool(CFG.get(k)) for k in CFG}, indent=2), encoding="utf-8")
|
| 129 |
except Exception:
|
| 130 |
pass
|
| 131 |
|
| 132 |
# -------------------------
|
| 133 |
+
# Helpers: json safe, usage, history
|
| 134 |
# -------------------------
|
| 135 |
def load_json_safe(path: pathlib.Path, default):
|
| 136 |
try:
|
| 137 |
if path.exists():
|
| 138 |
return json.loads(path.read_text(encoding="utf-8"))
|
| 139 |
except Exception as e:
|
| 140 |
+
logger.debug("load_json_safe error %s -> %s", path, e)
|
| 141 |
return default
|
| 142 |
|
| 143 |
def save_json_safe(path: pathlib.Path, data):
|
| 144 |
try:
|
| 145 |
path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 146 |
return True
|
| 147 |
+
except Exception as e:
|
| 148 |
+
logger.exception("save_json_safe failed for %s: %s", path, e)
|
| 149 |
return False
|
| 150 |
|
| 151 |
def today_str():
|
|
|
|
| 173 |
save_json_safe(HISTORY_FILE, h)
|
| 174 |
|
| 175 |
# -------------------------
|
| 176 |
+
# Language detection
|
| 177 |
# -------------------------
|
| 178 |
+
VIET_CHAR_RE = re.compile(
|
| 179 |
+
r"[àáạảãâầấậẩẫăằắặẳẵđèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹ]",
|
| 180 |
+
re.I,
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
def detect_lang(text: str) -> str:
|
| 184 |
if not text or not isinstance(text, str):
|
| 185 |
return "en"
|
|
|
|
| 193 |
return "en"
|
| 194 |
|
| 195 |
# -------------------------
|
| 196 |
+
# Gemini single model call (SDK preferred, REST fallback)
|
| 197 |
# -------------------------
|
| 198 |
+
def gemini_call_single(model: str, prompt: str, max_output_tokens: int = 1024, temperature: float = 0.2) -> Tuple[bool, str, int]:
|
| 199 |
"""
|
| 200 |
+
Try calling a single model. Return (ok, text_or_error, http_status_or_0)
|
|
|
|
| 201 |
"""
|
| 202 |
+
# SDK path
|
| 203 |
if GENAI_CLIENT:
|
| 204 |
try:
|
|
|
|
| 205 |
if hasattr(GENAI_CLIENT, "models") and hasattr(GENAI_CLIENT.models, "generate_content"):
|
| 206 |
resp = GENAI_CLIENT.models.generate_content(model=model, contents=prompt,
|
| 207 |
max_output_tokens=max_output_tokens, temperature=temperature) # type: ignore
|
|
|
|
| 209 |
if txt:
|
| 210 |
return True, txt, 200
|
| 211 |
return True, str(resp), 200
|
|
|
|
| 212 |
if hasattr(GENAI_CLIENT, "generate_content"):
|
| 213 |
resp = GENAI_CLIENT.generate_content(prompt, max_output_tokens=max_output_tokens, temperature=temperature)
|
| 214 |
if hasattr(resp, "text") and resp.text:
|
| 215 |
return True, resp.text, 200
|
| 216 |
return True, str(resp), 200
|
| 217 |
except requests.exceptions.HTTPError as he:
|
|
|
|
| 218 |
try:
|
| 219 |
code = he.response.status_code
|
| 220 |
except Exception:
|
|
|
|
| 222 |
return False, str(he), code
|
| 223 |
except Exception as e:
|
| 224 |
return False, str(e), 0
|
| 225 |
+
|
| 226 |
# REST fallback
|
| 227 |
key = CFG.get("GEMINI_API_KEY") or ""
|
| 228 |
if not key:
|
|
|
|
| 233 |
"prompt": {
|
| 234 |
"messages": [
|
| 235 |
{"author": "system", "content": {"text": "You are a helpful assistant."}},
|
| 236 |
+
{"author": "user", "content": {"text": prompt}},
|
| 237 |
]
|
| 238 |
},
|
| 239 |
"maxOutputTokens": max_output_tokens,
|
| 240 |
+
"temperature": temperature,
|
| 241 |
}
|
| 242 |
try:
|
| 243 |
r = requests.post(url, params={"key": key}, json=payload, headers=headers, timeout=30)
|
|
|
|
| 247 |
if status >= 400:
|
| 248 |
return False, f"HTTP {status}: {r.text}", status
|
| 249 |
j = r.json()
|
| 250 |
+
# try parse candidates
|
| 251 |
cand = j.get("candidates")
|
| 252 |
if cand and isinstance(cand, list):
|
| 253 |
c0 = cand[0]
|
|
|
|
| 273 |
|
| 274 |
def call_gemini_with_fallbacks(prompt: str, max_output_tokens: int = 1024, temperature: float = 0.2) -> Dict[str, Any]:
|
| 275 |
"""
|
| 276 |
+
Try models in MODEL_FALLBACK sequence; return dict with ok/text/model or error.
|
|
|
|
| 277 |
"""
|
| 278 |
if not CFG.get("GEMINI_API_KEY"):
|
| 279 |
return {"ok": False, "error": "Gemini API key not configured (set GEMINI_API_KEY in New secret)"}
|
|
|
|
| 281 |
for model in MODEL_FALLBACK:
|
| 282 |
if not model:
|
| 283 |
continue
|
| 284 |
+
ok, text_or_err, status = gemini_call_single(model, prompt, max_output_tokens, temperature)
|
| 285 |
if ok:
|
| 286 |
+
return {"ok": True, "text": text_or_err, "model": model}
|
| 287 |
+
last_error = {"model": model, "status": status, "error": text_or_err}
|
| 288 |
+
logger.warning("Model %s failed: %s (status=%s)", model, text_or_err, status)
|
| 289 |
+
# continue to next for robustness; some errors (403) may persist but try anyway
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
return {"ok": False, "error": f"All models failed. Last: {last_error}", "last": last_error}
|
| 291 |
|
| 292 |
# -------------------------
|
| 293 |
+
# TTS: ElevenLabs optional -> gTTS fallback
|
| 294 |
# -------------------------
|
| 295 |
def tts_elevenlabs_bytes(text: str, voice_id: str, api_key: str) -> bytes:
|
| 296 |
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
|
|
|
| 302 |
|
| 303 |
def tts_gtts_bytes(text: str, lang: str = "vi") -> bytes:
|
| 304 |
if not GTTS_AVAILABLE:
|
| 305 |
+
raise RuntimeError("gTTS not available in environment")
|
| 306 |
t = gTTS(text=text, lang=lang)
|
| 307 |
bio = io.BytesIO()
|
| 308 |
t.write_to_fp(bio)
|
|
|
|
| 330 |
return False, f"TTS error: {e}"
|
| 331 |
|
| 332 |
# -------------------------
|
| 333 |
+
# STT: server-side speech-to-text (optional)
|
| 334 |
# -------------------------
|
| 335 |
def speech_to_text(wav_bytes: bytes) -> Tuple[bool, str]:
|
| 336 |
if not STT_AVAILABLE:
|
|
|
|
| 346 |
return False, str(e)
|
| 347 |
|
| 348 |
# -------------------------
|
| 349 |
+
# Telegram helper (optional)
|
| 350 |
# -------------------------
|
| 351 |
def send_telegram_message(text: str) -> bool:
|
| 352 |
token = CFG.get("TELEGRAM_TOKEN") or ""
|
|
|
|
| 378 |
button{padding:10px 14px;border-radius:8px;background:#0ea5ff;color:#012;border:none;cursor:pointer}
|
| 379 |
#resp{white-space:pre-wrap;margin-top:12px;background:#021220;padding:12px;border-radius:6px}
|
| 380 |
.small{font-size:0.9rem;color:#9fb3c8}
|
| 381 |
+
</style></head><body>
|
|
|
|
| 382 |
<div class="container">
|
| 383 |
<h1>🤖 KCrobot AI — Vmax (Voice-first)</h1>
|
| 384 |
<p class="small">Model fallback: {{models}} — Gemini key: {{gemini}} — Telegram: {{tg}}</p>
|
| 385 |
+
<p>Giao diện chat là phụ — ưu tiên voice (ESP32 gửi audio). Bạn có thể thử gõ "Xin chào" để nghe trả lời.</p>
|
| 386 |
<textarea id="q" rows="4" placeholder="Nhập tiếng Việt / English..."></textarea>
|
| 387 |
<p><label><input id="voice" type="checkbox" checked> Voice ON</label>
|
| 388 |
<button onclick="send()">Gửi & Nghe</button></p>
|
|
|
|
| 448 |
if ok:
|
| 449 |
play_url = "/play_latest"
|
| 450 |
try:
|
|
|
|
| 451 |
threading.Thread(target=send_telegram_message, args=(f"Q: {q}\nA: {answer}",)).start()
|
| 452 |
except Exception:
|
| 453 |
pass
|
|
|
|
| 458 |
|
| 459 |
@app.route("/esp/send_text", methods=["POST"])
|
| 460 |
def esp_send_text():
|
|
|
|
| 461 |
return chat_text()
|
| 462 |
|
| 463 |
@app.route("/chat_audio", methods=["POST"])
|
| 464 |
def chat_audio():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
wav_bytes = None
|
| 466 |
if 'file' in request.files:
|
| 467 |
f = request.files['file']
|
|
|
|
| 470 |
wav_bytes = request.get_data()
|
| 471 |
if not wav_bytes:
|
| 472 |
return jsonify({"error": "no audio provided"}), 400
|
|
|
|
| 473 |
try:
|
| 474 |
ts = int(time.time())
|
| 475 |
(DATA_DIR / f"uploaded_{ts}.wav").write_bytes(wav_bytes)
|
|
|
|
| 531 |
})
|
| 532 |
|
| 533 |
# -------------------------
|
| 534 |
+
# Start server
|
| 535 |
# -------------------------
|
| 536 |
if __name__ == "__main__":
|
|
|
|
| 537 |
load_json_safe(HISTORY_FILE, [])
|
| 538 |
load_json_safe(USAGE_FILE, {})
|
| 539 |
logger.info("KCrobot Vmax starting. Gemini key present: %s, SDK present: %s, STT available: %s",
|