FaiziRBLX commited on
Commit
594848c
Β·
verified Β·
1 Parent(s): 6d3ad04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -1
app.py CHANGED
@@ -137,5 +137,47 @@ async def api_chat(
137
  processing_time_ms=int((time.time() - start) * 1000)
138
  )
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  # ── Launch ───────────────────────────────────────────────
141
- demo.launch()
 
137
  processing_time_ms=int((time.time() - start) * 1000)
138
  )
139
 
140
+ # Ganti bagian bawah app.py β€” dari "Tambah API route" sampai akhir
141
+
142
+ # ── Build Gradio dulu ─────────────────────────────────────
143
+ def gradio_chat(message, history):
144
+ prompt = f"{message} <cot>"
145
+ full = generate_text(
146
+ model=model, tokenizer=tokenizer, prompt=prompt,
147
+ max_new_tokens=200, temperature=0.7,
148
+ top_k=50, top_p=0.9, device=device
149
+ )
150
+ raw = full[len(prompt):].strip()
151
+ _, answer = _extract_thinking(raw)
152
+ return answer if answer else "Maaf, saya tidak mengerti."
153
+
154
+ demo = gr.ChatInterface(
155
+ fn=gradio_chat,
156
+ title="Indonesian LLM",
157
+ description="Chat dengan model bahasa Indonesia"
158
+ )
159
+
160
+ # ── Tambah custom routes SEBELUM launch ──────────────────
161
+ @demo.app.get("/api/health")
162
+ def health():
163
+ return {"status": "ok", "device": str(device)}
164
+
165
+ @demo.app.post("/api/chat")
166
+ async def api_chat(req: ChatRequest, request: Request, _key: str = Depends(verify_api_key)):
167
+ start = time.time()
168
+ prompt = f"{req.message} <cot>"
169
+ full = generate_text(
170
+ model=model, tokenizer=tokenizer, prompt=prompt,
171
+ max_new_tokens=req.max_tokens, temperature=req.temperature,
172
+ top_k=50, top_p=0.9, device=device
173
+ )
174
+ raw = full[len(prompt):].strip()
175
+ thinking, answer = _extract_thinking(raw)
176
+ return ChatResponse(
177
+ answer=answer if answer else "Maaf, saya tidak mengerti.",
178
+ thinking=thinking if req.show_thinking else None,
179
+ processing_time_ms=int((time.time() - start) * 1000)
180
+ )
181
+
182
  # ── Launch ───────────────────────────────────────────────
183
+ demo.launch(server_name="0.0.0.0", server_port=7860)