Spaces:

FaiziRBLX
/

NousAPI

Running

App Files Files Community

FaiziRBLX commited on Apr 11

Commit

6d3ad04

verified ·

1 Parent(s): d50a66d

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -33

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import os
 import logging
 from collections import defaultdict
 from transformers import AutoTokenizer
-from fastapi import FastAPI, Request, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from slowapi import Limiter, _rate_limit_exceeded_handler
 from slowapi.util import get_remote_address
@@ -21,6 +21,7 @@ logger = logging.getLogger(__name__)
 device = torch.device('cpu')
 logger.info(f"model.pt ada: {os.path.exists('model.pt')}")
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
@@ -39,22 +40,44 @@ gc.collect()
 model.eval()
 logger.info("Model siap!")
-# ── FastAPI (untuk /api/chat endpoint) ───────────────────
 limiter = Limiter(key_func=get_remote_address)
 ip_request_count: dict = defaultdict(list)
 ip_banned_until:  dict = {}
-fastapi_app = FastAPI()
-fastapi_app.state.limiter = limiter
-fastapi_app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
-fastapi_app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_methods=["POST", "GET"],
     allow_headers=["*"],
 )
-@fastapi_app.middleware("http")
 async def ddos_protection(request: Request, call_next):
     ip  = get_remote_address(request)
     now = time.time()
@@ -81,19 +104,17 @@ class ChatResponse(BaseModel):
     thinking:           str | None = None
     processing_time_ms: int
-API_KEYS = {"kunci-rahasia-kamu-123"}  # ← ganti!
 def verify_api_key(request: Request):
     key = request.headers.get("X-API-Key")
     if not key or key not in API_KEYS:
         raise HTTPException(401, "API key tidak valid.")
     return key
-@fastapi_app.get("/api/health")
 def health():
     return {"status": "ok", "device": str(device)}
-@fastapi_app.post("/api/chat", response_model=ChatResponse)
 @limiter.limit("10/minute")
 @limiter.limit("50/hour")
 async def api_chat(
@@ -116,25 +137,5 @@ async def api_chat(
         processing_time_ms=int((time.time() - start) * 1000)
     )
-# ── Gradio UI ────────────────────────────────────────────
-def gradio_chat(message, history):
-    prompt           = f"{message} <cot>"
-    full             = generate_text(
-        model=model, tokenizer=tokenizer, prompt=prompt,
-        max_new_tokens=200, temperature=0.7,
-        top_k=50, top_p=0.9, device=device
-    )
-    raw              = full[len(prompt):].strip()
-    _, answer        = _extract_thinking(raw)
-    return answer if answer else "Maaf, saya tidak mengerti."
-gradio_ui = gr.ChatInterface(
-    fn=gradio_chat,
-    title="Indonesian LLM",
-    description="Chat dengan model bahasa Indonesia"
-)
-# ── Mount FastAPI ke Gradio ───────────────────────────────
-# Ini kuncinya: Gradio expose FastAPI, kita tinggal mount route kita
-demo = gr.mount_gradio_app(fastapi_app, gradio_ui, path="/")

 import logging
 from collections import defaultdict
 from transformers import AutoTokenizer
+from fastapi import Request, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from slowapi import Limiter, _rate_limit_exceeded_handler
 from slowapi.util import get_remote_address
 device = torch.device('cpu')
 logger.info(f"model.pt ada: {os.path.exists('model.pt')}")
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
 model.eval()
 logger.info("Model siap!")
+# ── Rate limiter ─────────────────────────────────────────
 limiter = Limiter(key_func=get_remote_address)
 ip_request_count: dict = defaultdict(list)
 ip_banned_until:  dict = {}
+API_KEYS = {"kunci-rahasia-kamu-123"}  # ← ganti!
+# ── Gradio UI ────────────────────────────────────────────
+def gradio_chat(message, history):
+    prompt    = f"{message} <cot>"
+    full      = generate_text(
+        model=model, tokenizer=tokenizer, prompt=prompt,
+        max_new_tokens=200, temperature=0.7,
+        top_k=50, top_p=0.9, device=device
+    )
+    raw       = full[len(prompt):].strip()
+    _, answer = _extract_thinking(raw)
+    return answer if answer else "Maaf, saya tidak mengerti."
+demo = gr.ChatInterface(
+    fn=gradio_chat,
+    title="Indonesian LLM",
+    description="Chat dengan model bahasa Indonesia"
+)
+# ── Tambah API route ke Gradio's FastAPI ─────────────────
+app = demo.app  # Gradio expose FastAPI internal di sini
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_methods=["POST", "GET"],
     allow_headers=["*"],
 )
+@app.middleware("http")
 async def ddos_protection(request: Request, call_next):
     ip  = get_remote_address(request)
     now = time.time()
     thinking:           str | None = None
     processing_time_ms: int
 def verify_api_key(request: Request):
     key = request.headers.get("X-API-Key")
     if not key or key not in API_KEYS:
         raise HTTPException(401, "API key tidak valid.")
     return key
+@app.get("/api/health")
 def health():
     return {"status": "ok", "device": str(device)}
+@app.post("/api/chat", response_model=ChatResponse)
 @limiter.limit("10/minute")
 @limiter.limit("50/hour")
 async def api_chat(
         processing_time_ms=int((time.time() - start) * 1000)
     )
+# ── Launch ───────────────────────────────────────────────
+demo.launch()