kcrobot25 commited on
Commit
12bf2c1
·
verified ·
1 Parent(s): 96266ee
Files changed (1) hide show
  1. app.py +344 -0
app.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # app.py
3
+ # KC Robot AI — V4 FINAL (Gradio + REST API /api/* + Telegram)
4
+ # Upload this file to a Hugging Face Space (SDK = Gradio).
5
+ # Put your secrets in Space Settings:
6
+ # HF_API_TOKEN (required)
7
+ # TELEGRAM_TOKEN (optional)
8
+ # TELEGRAM_CHATID (optional)
9
+ # Optional overrides:
10
+ # HF_MODEL, HF_STT_MODEL, HF_TTS_MODEL
11
+
12
+ import os
13
+ import io
14
+ import time
15
+ import threading
16
+ import logging
17
+ from typing import Optional, Any, List, Tuple
18
+
19
+ import requests
20
+ import gradio as gr
21
+ from langdetect import detect, DetectorFactory
22
+ from gtts import gTTS
23
+
24
+ # Ensure deterministic detection
25
+ DetectorFactory.seed = 0
26
+
27
+ # Logging
28
+ logging.basicConfig(level=logging.INFO)
29
+ logger = logging.getLogger("kcrobot.v4")
30
+
31
+ # ====== Config from Secrets / env ======
32
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
33
+ HF_MODEL = os.getenv("HF_MODEL", "google/flan-t5-large")
34
+ HF_STT_MODEL = os.getenv("HF_STT_MODEL", "openai/whisper-small")
35
+ HF_TTS_MODEL = os.getenv("HF_TTS_MODEL", "") # optional, if empty use gTTS
36
+ TELEGRAM_TOKEN = os.getenv("TELEGRAM_TOKEN", "").strip()
37
+ TELEGRAM_CHATID = os.getenv("TELEGRAM_CHATID", "").strip()
38
+
39
+ if not HF_API_TOKEN:
40
+ logger.warning("HF_API_TOKEN not set — put it into Space Secrets for HF inference calls to work.")
41
+
42
+ HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"} if HF_API_TOKEN else {}
43
+
44
+ # ====== In-memory state ======
45
+ CONVERSATION: List[Tuple[str, str]] = []
46
+ DISPLAY_BUFFER: List[str] = []
47
+ DISPLAY_LIMIT = 6
48
+
49
+ def push_display(line: str):
50
+ DISPLAY_BUFFER.append(line)
51
+ if len(DISPLAY_BUFFER) > DISPLAY_LIMIT:
52
+ del DISPLAY_BUFFER[0]
53
+
54
+ # ====== Hugging Face helpers ======
55
+ def _parse_hf_text_response(data: Any) -> str:
56
+ try:
57
+ if isinstance(data, list) and data and isinstance(data[0], dict):
58
+ return data[0].get("generated_text", "") or str(data[0])
59
+ if isinstance(data, dict) and "generated_text" in data:
60
+ return data["generated_text"]
61
+ if isinstance(data, dict) and "text" in data:
62
+ return data["text"]
63
+ if isinstance(data, dict) and "choices" in data and isinstance(data["choices"], list):
64
+ c0 = data["choices"][0]
65
+ return c0.get("text") or c0.get("message", {}).get("content", "") or str(c0)
66
+ return str(data)
67
+ except Exception:
68
+ return str(data)
69
+
70
+ def hf_text_generate(prompt: str, model: Optional[str] = None, max_new_tokens: int = 256, temperature: float = 0.7) -> str:
71
+ if not HF_API_TOKEN:
72
+ raise RuntimeError("HF_API_TOKEN not configured in environment")
73
+ model = model or HF_MODEL
74
+ url = f"https://api-inference.huggingface.co/models/{model}"
75
+ payload = {
76
+ "inputs": prompt,
77
+ "parameters": {"max_new_tokens": int(max_new_tokens), "temperature": float(temperature)},
78
+ "options": {"wait_for_model": True}
79
+ }
80
+ logger.info("HF text gen -> model=%s prompt_len=%d", model, len(prompt))
81
+ r = requests.post(url, headers=HF_HEADERS, json=payload, timeout=120)
82
+ if r.status_code != 200:
83
+ logger.error("HF text gen error %s: %s", r.status_code, r.text[:300])
84
+ raise RuntimeError(f"HF text gen failed: {r.status_code}: {r.text}")
85
+ return _parse_hf_text_response(r.json())
86
+
87
+ def hf_stt_from_bytes(audio_bytes: bytes, model: Optional[str] = None) -> str:
88
+ if not HF_API_TOKEN:
89
+ raise RuntimeError("HF_API_TOKEN not configured")
90
+ model = model or HF_STT_MODEL
91
+ url = f"https://api-inference.huggingface.co/models/{model}"
92
+ headers = dict(HF_HEADERS)
93
+ headers["Content-Type"] = "application/octet-stream"
94
+ logger.info("HF STT -> model=%s bytes=%d", model, len(audio_bytes) if audio_bytes else 0)
95
+ r = requests.post(url, headers=headers, data=audio_bytes, timeout=180)
96
+ if r.status_code != 200:
97
+ logger.error("HF STT error %s: %s", r.status_code, r.text[:300])
98
+ raise RuntimeError(f"HF STT failed: {r.status_code}: {r.text}")
99
+ j = r.json()
100
+ if isinstance(j, dict) and "text" in j:
101
+ return j["text"]
102
+ return _parse_hf_text_response(j)
103
+
104
+ # ====== TTS: prefer gTTS (free). If HF_TTS_MODEL provided you can implement HF TTS similarly. ======
105
+ def tts_gtts_bytes(text: str) -> bytes:
106
+ if not text:
107
+ raise RuntimeError("Empty text for TTS")
108
+ # detect language (vi/en) to choose voice
109
+ try:
110
+ lang = detect(text)
111
+ except Exception:
112
+ lang = "vi"
113
+ # select language code: prefer Vietnamese if detect says 'vi'
114
+ tts_lang = "vi" if lang.startswith("vi") else "en"
115
+ logger.info("gTTS generating audio lang=%s len=%d", tts_lang, len(text))
116
+ tts = gTTS(text=text, lang=tts_lang)
117
+ bio = io.BytesIO()
118
+ tts.write_to_fp(bio)
119
+ bio.seek(0)
120
+ return bio.read()
121
+
122
+ # ====== Telegram helpers (optional) ======
123
+ def send_telegram(text: str):
124
+ if not TELEGRAM_TOKEN or not TELEGRAM_CHATID:
125
+ logger.debug("Telegram not configured or missing chat id")
126
+ return
127
+ base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"
128
+ try:
129
+ requests.post(base + "/sendMessage", json={"chat_id": TELEGRAM_CHATID, "text": text}, timeout=10)
130
+ except Exception:
131
+ logger.exception("send_telegram failed")
132
+
133
+ def telegram_poller():
134
+ if not TELEGRAM_TOKEN:
135
+ logger.info("Telegram poller disabled")
136
+ return
137
+ base = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}"
138
+ offset = None
139
+ logger.info("Telegram poller started")
140
+ while True:
141
+ try:
142
+ params = {"timeout": 30}
143
+ if offset: params["offset"] = offset
144
+ r = requests.get(base + "/getUpdates", params=params, timeout=35)
145
+ if r.status_code != 200:
146
+ time.sleep(2); continue
147
+ data = r.json()
148
+ for upd in data.get("result", []):
149
+ offset = upd.get("update_id", 0) + 1
150
+ msg = upd.get("message") or {}
151
+ chat = msg.get("chat", {})
152
+ chat_id = chat.get("id")
153
+ text = (msg.get("text") or "").strip()
154
+ if not text:
155
+ continue
156
+ logger.info("TG msg: %s", text)
157
+ if text.lower().startswith("/ask "):
158
+ q = text[5:].strip()
159
+ try:
160
+ ans = hf_text_generate(q)
161
+ except Exception as e:
162
+ ans = f"[HF error] {e}"
163
+ try:
164
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": ans}, timeout=10)
165
+ except Exception:
166
+ logger.exception("tg reply failed")
167
+ elif text.lower().startswith("/say "):
168
+ phrase = text[5:].strip()
169
+ try:
170
+ audio = tts_gtts_bytes(phrase)
171
+ files = {"audio": ("reply.mp3", audio, "audio/mpeg")}
172
+ requests.post(base + "/sendAudio", files=files, data={"chat_id": chat_id}, timeout=30)
173
+ except Exception:
174
+ logger.exception("tg say failed")
175
+ elif text.lower().startswith("/status"):
176
+ try:
177
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "KC Robot brain running"}, timeout=10)
178
+ except Exception:
179
+ pass
180
+ else:
181
+ try:
182
+ requests.post(base + "/sendMessage", json={"chat_id": chat_id, "text": "Commands: /ask <q> | /say <text> | /status"}, timeout=10)
183
+ except Exception:
184
+ pass
185
+ except Exception:
186
+ logger.exception("telegram poller exception")
187
+ time.sleep(3)
188
+
189
+ if TELEGRAM_TOKEN:
190
+ t = threading.Thread(target=telegram_poller, daemon=True)
191
+ t.start()
192
+
193
+ # ====== Gradio UI (chat + TTS + STT) ======
194
+ with gr.Blocks(title="KC Robot AI - Cloud Brain V4") as demo:
195
+ gr.Markdown("## 🤖 KC Robot AI — Cloud Brain (Hugging Face Inference)")
196
+ with gr.Row():
197
+ with gr.Column(scale=2):
198
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=480)
199
+ txt = gr.Textbox(lines=2, placeholder="Nhập câu hỏi (VN/EN) hoặc tiếng Anh...", label="Your message")
200
+ send = gr.Button("Gửi")
201
+ with gr.Row():
202
+ temp = gr.Slider(0.0, 1.0, value=0.7, label="Temperature")
203
+ tokens = gr.Slider(16, 1024, value=256, step=16, label="Max tokens")
204
+ model_override = gr.Textbox(label="Override HF model (optional)")
205
+ with gr.Column(scale=1):
206
+ gr.Markdown("### TTS / STT")
207
+ tts_in = gr.Textbox(lines=2, label="Text → TTS")
208
+ tts_btn = gr.Button("Create TTS")
209
+ tts_audio = gr.Audio(label="TTS audio", interactive=False)
210
+ gr.Markdown("Upload audio for STT")
211
+ up = gr.Audio(source="upload", type="filepath", label="Upload audio")
212
+ stt_btn = gr.Button("Transcribe")
213
+ stt_out = gr.Textbox(label="Transcription")
214
+
215
+ def chat_fn(message, history, temperature, max_tokens, model_override_val):
216
+ if not message or not message.strip():
217
+ return history or [], ""
218
+ system = "You are KC Robot AI, bilingual (Vietnamese & English). Answer in the same language as the user. Be clear and helpful."
219
+ prompt = f"{system}\n\nUser: {message}\nAssistant:"
220
+ model = model_override_val.strip() if model_override_val else HF_MODEL
221
+ try:
222
+ ans = hf_text_generate(prompt, model=model, max_new_tokens=int(max_tokens), temperature=float(temperature))
223
+ except Exception as e:
224
+ ans = f"[HF error] {e}"
225
+ history = history or []
226
+ history.append(("You", message))
227
+ history.append(("Bot", ans))
228
+ push_display(f"YOU: {message[:40]}")
229
+ push_display(f"BOT: {ans[:40]}")
230
+ return history, ""
231
+
232
+ def tts_fn(text, model_override_val):
233
+ if not text or not text.strip():
234
+ return None
235
+ # prefer gTTS (free)
236
+ try:
237
+ audio = tts_gtts_bytes(text)
238
+ return (audio, "audio/mpeg")
239
+ except Exception as e:
240
+ raise gr.Error(f"TTS failed: {e}")
241
+
242
+ def stt_fn(local_path, model_override_val):
243
+ if not local_path:
244
+ return ""
245
+ with open(local_path, "rb") as f:
246
+ b = f.read()
247
+ try:
248
+ text = hf_stt_from_bytes(b)
249
+ except Exception as e:
250
+ raise gr.Error(f"STT failed: {e}")
251
+ push_display(f"Voice: {text[:40]}")
252
+ return text
253
+
254
+ send.click(chat_fn, inputs=[txt, chatbot, temp, tokens, model_override], outputs=[chatbot, txt])
255
+ tts_btn.click(tts_fn, inputs=[tts_in, model_override], outputs=[tts_audio])
256
+ stt_btn.click(stt_fn, inputs=[up, model_override], outputs=[stt_out])
257
+
258
+ # ====== Expose REST endpoints under same server (Gradio uses FastAPI) ======
259
+ app = demo.app # FastAPI app
260
+
261
+ from fastapi import Request, UploadFile, File
262
+ from starlette.responses import JSONResponse, Response
263
+
264
+ @app.post("/api/ask")
265
+ async def api_ask(request: Request):
266
+ try:
267
+ j = await request.json()
268
+ except Exception:
269
+ return JSONResponse({"error":"invalid json"}, status_code=400)
270
+ text = (j.get("text","") or "").strip()
271
+ lang = (j.get("lang","auto") or "auto").strip().lower()
272
+ if not text:
273
+ return JSONResponse({"error":"no text"}, status_code=400)
274
+ if lang == "vi":
275
+ prompt = "Bạn là trợ lý thông minh. Trả lời bằng tiếng Việt, rõ ràng:\n\n" + text
276
+ elif lang == "en":
277
+ prompt = "You are a helpful assistant. Answer in English:\n\n" + text
278
+ else:
279
+ prompt = "You are bilingual assistant (Vietnamese/English). Answer in the language of the question.\n\n" + text
280
+ try:
281
+ ans = hf_text_generate(prompt)
282
+ except Exception as e:
283
+ return JSONResponse({"error": str(e)}, status_code=500)
284
+ CONVERSATION.append((text, ans))
285
+ push_display(f"YOU: {text[:40]}")
286
+ push_display(f"BOT: {ans[:40]}")
287
+ return {"answer": ans}
288
+
289
+ @app.post("/api/tts")
290
+ async def api_tts(request: Request):
291
+ try:
292
+ j = await request.json()
293
+ except Exception:
294
+ return JSONResponse({"error":"invalid json"}, status_code=400)
295
+ text = (j.get("text","") or "").strip()
296
+ if not text:
297
+ return JSONResponse({"error":"no text"}, status_code=400)
298
+ # use gTTS (free)
299
+ try:
300
+ mp3 = tts_gtts_bytes(text)
301
+ except Exception as e:
302
+ return JSONResponse({"error": str(e)}, status_code=500)
303
+ return Response(content=mp3, media_type="audio/mpeg")
304
+
305
+ @app.post("/api/stt")
306
+ async def api_stt(file: UploadFile = File(...)):
307
+ try:
308
+ content = await file.read()
309
+ except Exception:
310
+ return JSONResponse({"error":"file read error"}, status_code=400)
311
+ if not content:
312
+ return JSONResponse({"error":"no audio content"}, status_code=400)
313
+ try:
314
+ text = hf_stt_from_bytes(content)
315
+ except Exception as e:
316
+ return JSONResponse({"error": str(e)}, status_code=500)
317
+ push_display(f"Voice: {text[:40]}")
318
+ CONVERSATION.append((f"[voice] {text}", ""))
319
+ return {"text": text}
320
+
321
+ @app.post("/api/presence")
322
+ async def api_presence(request: Request):
323
+ try:
324
+ j = await request.json()
325
+ except Exception:
326
+ return JSONResponse({"error":"invalid json"}, status_code=400)
327
+ note = (j.get("note","Có người phía trước") or "").strip()
328
+ greeting = f"Xin chào! {note}"
329
+ push_display(f"RADAR: {note[:40]}")
330
+ CONVERSATION.append(("__presence__", greeting))
331
+ if TELEGRAM_TOKEN and TELEGRAM_CHATID:
332
+ try:
333
+ send_telegram(f"⚠️ Robot: Phát hiện người - {note}")
334
+ except Exception:
335
+ logger.exception("telegram notify failed")
336
+ return {"greeting": greeting}
337
+
338
+ @app.get("/api/display")
339
+ async def api_display():
340
+ return {"lines": DISPLAY_BUFFER.copy(), "conv_len": len(CONVERSATION)}
341
+
342
+ # ====== Launch app ======
343
+ if __name__ == "__main__":
344
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))