OrbitMC commited on
Commit
ac22457
Β·
verified Β·
1 Parent(s): 5b51ec0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +301 -258
app.py CHANGED
@@ -1,277 +1,320 @@
1
- import sys
2
- import os
 
 
3
 
4
- # =========================================================
5
- # 1. SETUP PHASE (Runs during Docker build to bake models)
6
- # =========================================================
7
- if "--setup" in sys.argv:
8
- print("Pre-downloading models into Docker image...")
9
- from huggingface_hub import hf_hub_download
10
- hf_hub_download(repo_id="unsloth/Qwen3.5-0.8B-GGUF", filename="Qwen3.5-0.8B-UD-Q2_K_XL.gguf")
11
-
12
- from langchain_huggingface import HuggingFaceEmbeddings
13
- HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
14
-
15
- try:
16
- from kittentts import KittenTTS
17
- KittenTTS("KittenML/kitten-tts-nano-0.8-fp32")
18
- except Exception as e:
19
- print(f"TTS Download check: {e}")
20
-
21
- print("Setup complete. Exiting installer.")
22
- sys.exit(0)
23
-
24
- # =========================================================
25
- # 2. RUNTIME PHASE (Runs when Space is active)
26
- # =========================================================
27
- import time
28
  import json
29
- import uuid
 
 
30
  from pathlib import Path
31
- from flask import Flask, request, jsonify, send_file, render_template_string
32
- from huggingface_hub import hf_hub_download
33
- from llama_cpp import Llama
34
- from kittentts import KittenTTS
35
- from langchain_huggingface import HuggingFaceEmbeddings
36
- from langchain_community.vectorstores import FAISS
37
- from langchain_text_splitters import RecursiveCharacterTextSplitter
38
- from langchain_core.documents import Document
39
-
40
- # Initialize App & Directories
41
- app = Flask(__name__)
42
- DB_DIR = Path("database")
43
- AUDIO_DIR = Path("static/audio")
44
- for d in [DB_DIR, AUDIO_DIR]:
 
 
 
 
 
 
 
45
  d.mkdir(parents=True, exist_ok=True)
46
 
47
- print("Loading Models into RAM...")
48
-
49
- # Initialize Embeddings & Vector Store
50
- embeddings = HuggingFaceEmbeddings(
51
- model_name="sentence-transformers/all-MiniLM-L6-v2",
52
- model_kwargs={"device": "cpu"}
 
 
 
 
 
53
  )
54
 
55
- faiss_path = DB_DIR / "index.faiss"
56
- if faiss_path.exists():
57
- vector_store = FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)
58
- else:
59
- vector_store = FAISS.from_documents([Document(page_content="Ana initialized.")], embeddings)
60
- vector_store.save_local(str(DB_DIR))
61
-
62
- # Initialize LLM
63
- model_path = hf_hub_download(
64
- repo_id="unsloth/Qwen3.5-0.8B-GGUF",
65
- filename="Qwen3.5-0.8B-UD-Q2_K_XL.gguf"
66
- )
67
- llm = Llama(model_path=model_path, n_ctx=2048, n_threads=os.cpu_count() or 4, verbose=False)
68
 
69
- # Initialize TTS
70
- tts = KittenTTS("KittenML/kitten-tts-nano-0.8-fp32")
 
 
 
 
71
 
72
- # --- Helper Functions ---
73
- def get_context(query):
74
- try:
75
- retriever = vector_store.as_retriever(search_kwargs={"k": 3})
76
- return "\n".join(d.page_content for d in retriever.invoke(query))
77
- except:
78
- return ""
79
-
80
- def generate_audio(text):
81
- if not text.strip(): return None
82
- audio_id = str(uuid.uuid4())
83
- audio_path = AUDIO_DIR / f"{audio_id}.wav"
84
- try:
85
- import soundfile as sf
86
- audio_data = tts.generate(text, voice="Kiki")
87
- sf.write(str(audio_path), audio_data, 24000)
88
- return f"/audio/{audio_id}.wav"
89
- except Exception as e:
90
- print(f"TTS Error: {e}")
91
- return None
92
-
93
- # --- HTML Interface ---
94
- HTML_TEMPLATE = """
95
- <!DOCTYPE html>
96
- <html lang="en">
97
- <head>
98
- <meta charset="UTF-8">
99
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
100
- <title>Ana Web Terminal</title>
101
- <style>
102
- body { font-family: 'Courier New', Courier, monospace; background-color: #0d1117; color: #ff79c6; margin: 0; padding: 0; display: flex; height: 100vh; }
103
- .sidebar { width: 300px; background-color: #161b22; padding: 20px; border-right: 1px solid #30363d; display: flex; flex-direction: column; gap: 15px; }
104
- .main { flex: 1; display: flex; flex-direction: column; padding: 20px; }
105
- .chat-box { flex: 1; overflow-y: auto; background: #000; padding: 15px; border: 1px solid #30363d; border-radius: 5px; margin-bottom: 15px; color: #f8f8f2; }
106
- .message { margin-bottom: 10px; }
107
- .user-msg { color: #8be9fd; }
108
- .ai-msg { color: #ff79c6; }
109
- .sys-msg { color: #50fa7b; }
110
- .input-group { display: flex; gap: 10px; }
111
- input[type="text"], textarea { width: 100%; background: #0d1117; color: #fff; border: 1px solid #30363d; padding: 10px; border-radius: 3px; font-family: inherit; }
112
- button { background: #bd93f9; color: #fff; border: none; padding: 10px 15px; cursor: pointer; border-radius: 3px; font-family: inherit; font-weight: bold; }
113
- button:hover { background: #ff79c6; }
114
- .btn-danger { background: #ff5555; }
115
- .btn-danger:hover { background: #ff0000; }
116
- label { font-size: 0.9em; font-weight: bold; color: #f8f8f2; }
117
- h2 { margin-top: 0; color: #ff79c6; font-size: 1.2em; text-align: center; border-bottom: 1px solid #30363d; padding-bottom: 10px; }
118
- </style>
119
- </head>
120
- <body>
121
-
122
- <div class="sidebar">
123
- <h2>βš™οΈ ANA SETTINGS</h2>
124
- <label>System Prompt:</label>
125
- <textarea id="sys_prompt" rows="5">You are Ana, a concise and intelligent AI assistant. Reply in 1-2 short sentences. Never use emojis or markdown. Be slightly casual but highly efficient.</textarea>
126
-
127
- <label>Temperature (<span id="temp_val">0.65</span>):</label>
128
- <input type="range" id="temperature" min="0.1" max="1.0" step="0.05" value="0.65" oninput="document.getElementById('temp_val').innerText=this.value">
129
-
130
- <label>
131
- <input type="checkbox" id="use_tts" checked> Enable Voice (TTS)
132
- </label>
133
-
134
- <button class="btn-danger" onclick="clearMemory()" style="margin-top: auto;">Wipe Vector Memory</button>
135
- </div>
136
-
137
- <div class="main">
138
- <div class="chat-box" id="chat_box">
139
- <div class="message sys-msg">[SYSTEM] Ana Online. Awaiting input...</div>
140
- </div>
141
-
142
- <div class="input-group">
143
- <input type="text" id="user_input" placeholder="Talk to Ana..." onkeypress="if(event.key==='Enter') sendMessage()" autofocus>
144
- <button onclick="sendMessage()" id="send_btn">Send</button>
145
- </div>
146
- </div>
147
-
148
- <audio id="audio_player" autoplay></audio>
149
-
150
- <script>
151
- let history =[];
152
-
153
- async function sendMessage() {
154
- const inputField = document.getElementById("user_input");
155
- const text = inputField.value.trim();
156
- if (!text) return;
157
-
158
- appendMessage("User", text, "user-msg");
159
- inputField.value = "";
160
- document.getElementById("send_btn").disabled = true;
161
-
162
- const sysPrompt = document.getElementById("sys_prompt").value;
163
- const temp = document.getElementById("temperature").value;
164
- const useTTS = document.getElementById("use_tts").checked;
165
-
166
- try {
167
- const response = await fetch("/chat", {
168
- method: "POST",
169
- headers: { "Content-Type": "application/json" },
170
- body: JSON.stringify({
171
- message: text,
172
- history: history,
173
- system_prompt: sysPrompt,
174
- temperature: temp,
175
- use_tts: useTTS
176
- })
177
- });
178
- const data = await response.json();
179
-
180
- appendMessage("Ana", data.reply, "ai-msg");
181
-
182
- history.push({ role: "user", content: text });
183
- history.push({ role: "assistant", content: data.reply });
184
- if (history.length > 8) history = history.slice(history.length - 8);
185
-
186
- if (data.audio_url) {
187
- const player = document.getElementById("audio_player");
188
- player.src = data.audio_url;
189
- player.play();
190
- }
191
- } catch (err) {
192
- appendMessage("System", "Error connecting to backend.", "sys-msg");
193
- }
194
- document.getElementById("send_btn").disabled = false;
195
- document.getElementById("user_input").focus();
196
- }
197
-
198
- function appendMessage(sender, text, className) {
199
- const box = document.getElementById("chat_box");
200
- const msgDiv = document.createElement("div");
201
- msgDiv.className = `message ${className}`;
202
- msgDiv.innerText = `[${sender}] ${text}`;
203
- box.appendChild(msgDiv);
204
- box.scrollTop = box.scrollHeight;
205
- }
206
-
207
- async function clearMemory() {
208
- if(confirm("Wipe long-term Vector Memory?")) {
209
- await fetch("/clear_memory", { method: "POST" });
210
- history =[];
211
- appendMessage("System", "Memory wiped successfully. Ana has forgotten past interactions.", "sys-msg");
212
- }
213
- }
214
- </script>
215
- </body>
216
- </html>
217
- """
218
 
219
- # --- Flask Routes ---
220
- @app.route("/")
221
- def index():
222
- return render_template_string(HTML_TEMPLATE)
223
-
224
- @app.route("/chat", methods=["POST"])
225
- def chat():
226
- data = request.json
227
- user_input = data.get("message", "")
228
- history = data.get("history",[])
229
- sys_prompt = data.get("system_prompt", "You are Ana, a concise AI assistant. Reply in 1-2 short sentences.")
230
- temp = float(data.get("temperature", 0.65))
231
- use_tts = data.get("use_tts", True)
232
-
233
- context = get_context(user_input)
234
-
235
- messages =[{"role": "system", "content": f"{sys_prompt}\n\nRelevant Memory:\n{context}"}]
236
- for msg in history[-4:]:
237
- messages.append(msg)
238
- messages.append({"role": "user", "content": user_input})
239
-
240
- response = llm.create_chat_completion(
241
- messages=messages,
242
- max_tokens=150,
243
- temperature=temp,
244
- top_p=0.9
245
  )
246
-
247
- reply_text = response["choices"][0]["message"]["content"].strip()
248
-
249
- audio_url = None
250
- if use_tts:
251
- audio_url = generate_audio(reply_text)
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  try:
254
- new_doc = Document(page_content=f"User: {user_input}\nAna: {reply_text}")
255
- vector_store.add_documents([new_doc])
256
- vector_store.save_local(str(DB_DIR))
 
257
  except Exception as e:
258
- print("Memory save error:", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
- return jsonify({
261
- "reply": reply_text,
262
- "audio_url": audio_url
263
- })
264
 
265
- @app.route("/audio/<filename>")
266
- def serve_audio(filename):
267
- return send_file(AUDIO_DIR / filename, mimetype="audio/wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
- @app.route("/clear_memory", methods=["POST"])
270
- def clear_memory():
271
- global vector_store
272
- vector_store = FAISS.from_documents([Document(page_content="Memory wiped.")], embeddings)
273
- vector_store.save_local(str(DB_DIR))
274
- return jsonify({"status": "success"})
275
 
276
  if __name__ == "__main__":
277
- app.run(host="0.0.0.0", port=7860)
 
 
 
 
 
 
1
+ """
2
+ J.A.R.V.I.S β€” FastAPI backend
3
+ Model is loaded ONCE at startup and kept in RAM for instant responses.
4
+ """
5
 
6
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import json
8
+ import time
9
+ import warnings
10
+ import asyncio
11
  from pathlib import Path
12
+ from contextlib import asynccontextmanager
13
+
14
+ import uvicorn
15
+ from fastapi import FastAPI, HTTPException
16
+ from fastapi.staticfiles import StaticFiles
17
+ from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse
18
+ from pydantic import BaseModel
19
+
20
+ warnings.filterwarnings("ignore")
21
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
+ os.environ["HF_HOME"] = "/app/cache"
23
+ os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
24
+ os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/app/cache"
25
+
26
+ # ── Paths ──
27
+ VECTOR_DIR = Path("/app/database/vector_store")
28
+ LEARN_DIR = Path("/app/database/learning_data")
29
+ CHATS_DIR = Path("/app/database/chats_data")
30
+ CACHE_DIR = Path("/app/cache")
31
+
32
+ for d in [VECTOR_DIR, LEARN_DIR, CHATS_DIR]:
33
  d.mkdir(parents=True, exist_ok=True)
34
 
35
+ # ── Global model holders (loaded once, never reloaded) ──
36
+ LLM = None
37
+ RETRIEVER = None
38
+ TTS = None
39
+ TTS_OK = False
40
+
41
+ SYSTEM_PROMPT = (
42
+ "You are J.A.R.V.I.S, a concise and intelligent AI assistant. "
43
+ "Always reply in 1–2 short, direct sentences. "
44
+ "Never use emojis, markdown, asterisks, or filler phrases. "
45
+ "Be helpful, precise, and slightly formal."
46
  )
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # ══════════════════════════════════════════
50
+ # STARTUP β€” load everything into RAM once
51
+ # ══════════════════════════════════════════
52
+ @asynccontextmanager
53
+ async def lifespan(app: FastAPI):
54
+ global LLM, RETRIEVER, TTS, TTS_OK
55
 
56
+ print("=" * 55)
57
+ print(" J.A.R.V.I.S β€” starting up")
58
+ print("=" * 55)
59
+
60
+ # 1. Vector store / embeddings
61
+ print("[1/3] Loading embeddings & vector store...", flush=True)
62
+ from langchain_huggingface import HuggingFaceEmbeddings
63
+ from langchain_community.vectorstores import FAISS
64
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
65
+ from langchain_core.documents import Document
66
+
67
+ embeddings = HuggingFaceEmbeddings(
68
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
69
+ model_kwargs={"device": "cpu"},
70
+ cache_folder=str(CACHE_DIR),
71
+ )
72
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
73
+
74
+ def _load_docs():
75
+ docs = []
76
+ for f in LEARN_DIR.glob("*.txt"):
77
+ try:
78
+ docs.append(Document(page_content=f.read_text(errors="ignore"),
79
+ metadata={"source": f.name}))
80
+ except Exception:
81
+ pass
82
+ for f in CHATS_DIR.glob("*.json"):
83
+ try:
84
+ data = json.loads(f.read_text(errors="ignore"))
85
+ content = "\n".join(
86
+ f"{m['role']}: {m['content']}"
87
+ for m in data.get("messages", [])
88
+ if isinstance(m, dict) and "role" in m and "content" in m
89
+ )
90
+ if content.strip():
91
+ docs.append(Document(page_content=content,
92
+ metadata={"source": f.name}))
93
+ except Exception:
94
+ pass
95
+ return docs
96
+
97
+ index_file = VECTOR_DIR / "index.faiss"
98
+ if index_file.exists():
99
+ try:
100
+ vs = FAISS.load_local(str(VECTOR_DIR), embeddings,
101
+ allow_dangerous_deserialization=True)
102
+ print(" Vector store loaded from disk.")
103
+ except Exception:
104
+ vs = None
105
+
106
+ if not index_file.exists() or vs is None:
107
+ docs = _load_docs() or [Document(page_content="No data yet.")]
108
+ chunks = splitter.split_documents(docs)
109
+ vs = FAISS.from_documents(chunks, embeddings)
110
+ vs.save_local(str(VECTOR_DIR))
111
+ print(" Vector store built and saved.")
112
+
113
+ RETRIEVER = vs.as_retriever(search_kwargs={"k": 3})
114
+ print(" βœ“ Vector store ready")
115
+
116
+ # 2. LLM β€” loaded into RAM, stays there forever
117
+ print("[2/3] Loading LLM into RAM (model pre-cached in image)...", flush=True)
118
+ from huggingface_hub import hf_hub_download
119
+ from llama_cpp import Llama
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ model_path = hf_hub_download(
122
+ repo_id="unsloth/Qwen3.5-0.8B-GGUF",
123
+ filename="Qwen3.5-0.8B-UD-Q2_K_XL.gguf",
124
+ cache_dir=str(CACHE_DIR),
125
+ local_files_only=True, # ← never re-download; use baked image cache
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
 
 
 
 
 
 
127
 
128
+ LLM = Llama(
129
+ model_path=model_path,
130
+ n_ctx=2048,
131
+ n_threads=os.cpu_count() or 4,
132
+ n_batch=512, # larger batch = faster prompt processing
133
+ use_mmap=True, # memory-map the file β€” fastest cold load on CPU
134
+ use_mlock=True, # lock pages in RAM β€” prevents swap thrashing
135
+ verbose=False,
136
+ )
137
+ print(" βœ“ LLM ready")
138
+
139
+ # 3. TTS (optional)
140
+ print("[3/3] Loading TTS...", flush=True)
141
  try:
142
+ from kittentts import KittenTTS
143
+ TTS = KittenTTS("KittenML/kitten-tts-nano-0.8-fp32")
144
+ TTS_OK = True
145
+ print(" βœ“ TTS ready (Kiki)")
146
  except Exception as e:
147
+ print(f" ⚠ TTS unavailable: {e}")
148
+
149
+ print("\n βœ“ ALL SYSTEMS ONLINE β€” serving on :7860\n")
150
+ yield
151
+
152
+ # Shutdown
153
+ print("J.A.R.V.I.S: shutting down.")
154
+
155
+
156
+ # ══════════════════════════════════════════
157
+ # APP
158
+ # ══════════════════════════════════════════
159
+ app = FastAPI(title="J.A.R.V.I.S", lifespan=lifespan)
160
+ app.mount("/static", StaticFiles(directory="static"), name="static")
161
+
162
 
163
+ # ── Request / response schemas ──
164
+ class ChatRequest(BaseModel):
165
+ message: str
166
+ history: list[list[str]] = [] # [[user, assistant], ...]
167
 
168
+ class ChatResponse(BaseModel):
169
+ reply: str
170
+
171
+
172
+ # ── Routes ──
173
+ @app.get("/", response_class=HTMLResponse)
174
+ async def root():
175
+ return FileResponse("static/index.html")
176
+
177
+
178
+ @app.get("/health")
179
+ async def health():
180
+ return {"status": "ok", "llm": LLM is not None, "tts": TTS_OK}
181
+
182
+
183
+ @app.post("/chat", response_model=ChatResponse)
184
+ async def chat(req: ChatRequest):
185
+ if LLM is None:
186
+ raise HTTPException(503, "Model not ready yet")
187
+
188
+ # Retrieve context
189
+ context = ""
190
+ try:
191
+ docs = RETRIEVER.invoke(req.message)
192
+ context = "\n".join(d.page_content for d in docs)
193
+ except Exception:
194
+ pass
195
+
196
+ # Build messages
197
+ system = SYSTEM_PROMPT
198
+ if context.strip():
199
+ system += f"\n\nBackground context (use only if relevant):\n{context}"
200
+
201
+ messages = [{"role": "system", "content": system}]
202
+ for turn in req.history[-4:]:
203
+ if len(turn) == 2:
204
+ messages.append({"role": "user", "content": turn[0]})
205
+ messages.append({"role": "assistant", "content": turn[1]})
206
+ messages.append({"role": "user", "content": req.message})
207
+
208
+ # Generate (run in thread so we don't block the event loop)
209
+ loop = asyncio.get_event_loop()
210
+
211
+ def _generate():
212
+ result = LLM.create_chat_completion(
213
+ messages=messages,
214
+ max_tokens=150,
215
+ temperature=0.65,
216
+ top_p=0.9,
217
+ repeat_penalty=1.1,
218
+ stream=False,
219
+ )
220
+ return result["choices"][0]["message"]["content"].strip()
221
+
222
+ reply = await loop.run_in_executor(None, _generate)
223
+ return ChatResponse(reply=reply)
224
+
225
+
226
+ @app.post("/chat/stream")
227
+ async def chat_stream(req: ChatRequest):
228
+ """Server-Sent Events streaming endpoint."""
229
+ if LLM is None:
230
+ raise HTTPException(503, "Model not ready yet")
231
+
232
+ context = ""
233
+ try:
234
+ docs = RETRIEVER.invoke(req.message)
235
+ context = "\n".join(d.page_content for d in docs)
236
+ except Exception:
237
+ pass
238
+
239
+ system = SYSTEM_PROMPT
240
+ if context.strip():
241
+ system += f"\n\nBackground context:\n{context}"
242
+
243
+ messages = [{"role": "system", "content": system}]
244
+ for turn in req.history[-4:]:
245
+ if len(turn) == 2:
246
+ messages.append({"role": "user", "content": turn[0]})
247
+ messages.append({"role": "assistant", "content": turn[1]})
248
+ messages.append({"role": "user", "content": req.message})
249
+
250
+ async def event_gen():
251
+ loop = asyncio.get_event_loop()
252
+ queue = asyncio.Queue()
253
+
254
+ def _stream():
255
+ for chunk in LLM.create_chat_completion(
256
+ messages=messages,
257
+ max_tokens=150,
258
+ temperature=0.65,
259
+ top_p=0.9,
260
+ repeat_penalty=1.1,
261
+ stream=True,
262
+ ):
263
+ piece = chunk["choices"][0].get("delta", {}).get("content", "")
264
+ if piece:
265
+ asyncio.run_coroutine_threadsafe(queue.put(piece), loop)
266
+ asyncio.run_coroutine_threadsafe(queue.put(None), loop) # sentinel
267
+
268
+ loop.run_in_executor(None, _stream)
269
+
270
+ while True:
271
+ piece = await queue.get()
272
+ if piece is None:
273
+ yield "data: [DONE]\n\n"
274
+ break
275
+ yield f"data: {json.dumps(piece)}\n\n"
276
+
277
+ return StreamingResponse(event_gen(), media_type="text/event-stream")
278
+
279
+
280
+ @app.post("/tts")
281
+ async def tts_endpoint(body: dict):
282
+ """Return raw PCM audio bytes for the given text."""
283
+ if not TTS_OK:
284
+ raise HTTPException(503, "TTS not available")
285
+ text = body.get("text", "").strip()
286
+ if not text:
287
+ raise HTTPException(400, "No text provided")
288
+
289
+ loop = asyncio.get_event_loop()
290
+
291
+ def _speak():
292
+ return TTS.generate(text, voice="Kiki")
293
+
294
+ audio_bytes = await loop.run_in_executor(None, _speak)
295
+ return StreamingResponse(iter([bytes(audio_bytes)]),
296
+ media_type="audio/wav")
297
+
298
+
299
+ @app.post("/save")
300
+ async def save_chat(body: dict):
301
+ history = body.get("history", [])
302
+ if not history:
303
+ return {"saved": False}
304
+ path = CHATS_DIR / f"session_{int(time.time())}.json"
305
+ messages = []
306
+ for turn in history:
307
+ if len(turn) == 2:
308
+ messages.append({"role": "user", "content": turn[0]})
309
+ messages.append({"role": "assistant", "content": turn[1]})
310
+ path.write_text(json.dumps({"messages": messages}, ensure_ascii=False, indent=2))
311
+ return {"saved": True, "path": str(path)}
312
 
 
 
 
 
 
 
313
 
314
  if __name__ == "__main__":
315
+ uvicorn.run(
316
+ "app:app",
317
+ host="0.0.0.0",
318
+ port=int(os.environ.get("PORT", 7860)),
319
+ log_level="warning",
320
+ )