Update app.py
Browse files
app.py
CHANGED
|
@@ -137,5 +137,47 @@ async def api_chat(
|
|
| 137 |
processing_time_ms=int((time.time() - start) * 1000)
|
| 138 |
)
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
# ββ Launch βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 141 |
-
demo.launch()
|
|
|
|
| 137 |
processing_time_ms=int((time.time() - start) * 1000)
|
| 138 |
)
|
| 139 |
|
| 140 |
+
# Ganti bagian bawah app.py β dari "Tambah API route" sampai akhir
|
| 141 |
+
|
| 142 |
+
# ββ Build Gradio dulu βββββββββββββββββββββββββββββββββββββ
|
| 143 |
+
def gradio_chat(message, history):
|
| 144 |
+
prompt = f"{message} <cot>"
|
| 145 |
+
full = generate_text(
|
| 146 |
+
model=model, tokenizer=tokenizer, prompt=prompt,
|
| 147 |
+
max_new_tokens=200, temperature=0.7,
|
| 148 |
+
top_k=50, top_p=0.9, device=device
|
| 149 |
+
)
|
| 150 |
+
raw = full[len(prompt):].strip()
|
| 151 |
+
_, answer = _extract_thinking(raw)
|
| 152 |
+
return answer if answer else "Maaf, saya tidak mengerti."
|
| 153 |
+
|
| 154 |
+
demo = gr.ChatInterface(
|
| 155 |
+
fn=gradio_chat,
|
| 156 |
+
title="Indonesian LLM",
|
| 157 |
+
description="Chat dengan model bahasa Indonesia"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# ββ Tambah custom routes SEBELUM launch ββββββββββββββββββ
|
| 161 |
+
@demo.app.get("/api/health")
|
| 162 |
+
def health():
|
| 163 |
+
return {"status": "ok", "device": str(device)}
|
| 164 |
+
|
| 165 |
+
@demo.app.post("/api/chat")
|
| 166 |
+
async def api_chat(req: ChatRequest, request: Request, _key: str = Depends(verify_api_key)):
|
| 167 |
+
start = time.time()
|
| 168 |
+
prompt = f"{req.message} <cot>"
|
| 169 |
+
full = generate_text(
|
| 170 |
+
model=model, tokenizer=tokenizer, prompt=prompt,
|
| 171 |
+
max_new_tokens=req.max_tokens, temperature=req.temperature,
|
| 172 |
+
top_k=50, top_p=0.9, device=device
|
| 173 |
+
)
|
| 174 |
+
raw = full[len(prompt):].strip()
|
| 175 |
+
thinking, answer = _extract_thinking(raw)
|
| 176 |
+
return ChatResponse(
|
| 177 |
+
answer=answer if answer else "Maaf, saya tidak mengerti.",
|
| 178 |
+
thinking=thinking if req.show_thinking else None,
|
| 179 |
+
processing_time_ms=int((time.time() - start) * 1000)
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
# ββ Launch βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 183 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|