Spaces:

FaiziRBLX
/

NousAPI

Sleeping

App Files Files Community

FaiziRBLX commited on Apr 13

Commit

3d4765d

verified ·

1 Parent(s): 7041c0e

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -130

app.py CHANGED Viewed

@@ -5,32 +5,43 @@ import os
 import logging
 from collections import defaultdict
 from transformers import AutoTokenizer
-from fastapi import Request, HTTPException, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from slowapi import Limiter, _rate_limit_exceeded_handler
-from slowapi.util import get_remote_address
-from slowapi.errors import RateLimitExceeded
-from pydantic import BaseModel, Field
-from fastapi import FastAPI
 from fastapi.responses import JSONResponse
 import gradio as gr
 from best import ModelConfig, IndonesianLLM, generate_text, _extract_thinking
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# ── Load Model ───────────────────────────────────────────
-device = torch.device('cpu')
 logger.info(f"model.pt ada: {os.path.exists('model.pt')}")
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
 checkpoint = torch.load("model.pt", map_location='cpu', weights_only=False)
-config     = checkpoint['config']
-model      = IndonesianLLM(config)
 state_dict = checkpoint['model_state_dict']
 for k in list(state_dict.keys()):
     if state_dict[k].dtype == torch.float16:
@@ -39,172 +50,204 @@ for k in list(state_dict.keys()):
 model.load_state_dict(state_dict)
 del checkpoint, state_dict
 gc.collect()
 model.eval()
 logger.info("Model siap!")
-# ── Rate limiter ─────────────────────────────────────────
-limiter = Limiter(key_func=get_remote_address)
-ip_request_count: dict = defaultdict(list)
-ip_banned_until:  dict = {}
-API_KEYS = {"kunci-rahasia-kamu-123"}  # ← ganti!
-# ── Gradio UI ────────────────────────────────────────────
-def gradio_chat(message, history):
-    prompt    = f"{message} <cot>"
-    full      = generate_text(
-        model=model, tokenizer=tokenizer, prompt=prompt,
-        max_new_tokens=200, temperature=0.7,
-        top_k=50, top_p=0.9, device=device
-    )
-    raw       = full[len(prompt):].strip()
-    _, answer = _extract_thinking(raw)
-    return answer if answer else "Maaf, saya tidak mengerti."
-demo = gr.ChatInterface(
-    fn=gradio_chat,
-    title="Indonesian LLM",
-    description="Chat dengan model bahasa Indonesia"
 )
-# ── Tambah API route ke Gradio's FastAPI ─────────────────
-app = demo.app  # Gradio expose FastAPI internal di sini
-app.state.limiter = limiter
-app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
-    allow_methods=["POST", "GET"],
     allow_headers=["*"],
 )
 @app.middleware("http")
 async def ddos_protection(request: Request, call_next):
-    ip  = get_remote_address(request)
     now = time.time()
     if ip in ip_banned_until:
         if now < ip_banned_until[ip]:
-            raise HTTPException(429, f"Banned. Coba lagi dalam {int(ip_banned_until[ip]-now)}s.")
-        del ip_banned_until[ip]
-        ip_request_count[ip] = []
     ip_request_count[ip].append(now)
-    ip_request_count[ip] = [t for t in ip_request_count[ip] if now - t < 60]
-    if len(ip_request_count[ip]) > 100:
-        ip_banned_until[ip] = now + 3600
-        raise HTTPException(429, "Terlalu banyak request. Banned 1 jam.")
-    return await call_next(request)
-class ChatRequest(BaseModel):
-    message:       str   = Field(..., min_length=1, max_length=500)
-    max_tokens:    int   = Field(default=200, ge=10, le=500)
-    temperature:   float = Field(default=0.7, ge=0.1, le=1.5)
-    show_thinking: bool  = False
-class ChatResponse(BaseModel):
-    answer:             str
-    thinking:           str | None = None
-    processing_time_ms: int
-def verify_api_key(request: Request):
     key = request.headers.get("X-API-Key")
     if not key or key not in API_KEYS:
-        raise HTTPException(401, "API key tidak valid.")
-    return key
 @app.get("/api/health")
 def health():
-    return {"status": "ok", "device": str(device)}
-@app.post("/api/chat", response_model=ChatResponse)
-@limiter.limit("10/minute")
-@limiter.limit("50/hour")
-async def api_chat(
-    req: ChatRequest,
-    request: Request,
-    _key: str = Depends(verify_api_key)
-):
-    start  = time.time()
-    prompt = f"{req.message} <cot>"
-    full   = generate_text(
-        model=model, tokenizer=tokenizer, prompt=prompt,
-        max_new_tokens=req.max_tokens, temperature=req.temperature,
-        top_k=50, top_p=0.9, device=device
-    )
-    raw              = full[len(prompt):].strip()
-    thinking, answer = _extract_thinking(raw)
-    return ChatResponse(
-        answer=answer if answer else "Maaf, saya tidak mengerti.",
-        thinking=thinking if req.show_thinking else None,
-        processing_time_ms=int((time.time() - start) * 1000)
-    )
-# Ganti bagian bawah app.py — dari "Tambah API route" sampai akhir
-# ── Build Gradio dulu ─────────────────────────────────────
-def gradio_chat(message, history):
-    prompt    = f"{message} <cot>"
-    full      = generate_text(
-        model=model, tokenizer=tokenizer, prompt=prompt,
-        max_new_tokens=200, temperature=0.7,
-        top_k=50, top_p=0.9, device=device
-    )
-    raw       = full[len(prompt):].strip()
-    _, answer = _extract_thinking(raw)
-    return answer if answer else "Maaf, saya tidak mengerti."
-demo = gr.ChatInterface(
-    fn=gradio_chat,
-    title="Indonesian LLM",
-    description="Chat dengan model bahasa Indonesia"
-)
-# Tambah route langsung ke demo.app
-@demo.app.get("/api/health")
-def health():
-    return {"status": "ok", "device": str(device)}
-@demo.app.post("/api/chat")
 async def api_chat(request: Request):
     # Cek API key
-    key = request.headers.get("X-API-Key")
-    if not key or key not in API_KEYS:
-        return JSONResponse(status_code=401, content={"error": "API key tidak valid."})
-    # Parse body manual (hindari Pydantic issue)
     try:
         body        = await request.json()
-        message     = body.get("message", "").strip()
         max_tokens  = int(body.get("max_tokens", 200))
         temperature = float(body.get("temperature", 0.7))
         show_think  = bool(body.get("show_thinking", False))
     except Exception:
-        return JSONResponse(status_code=400, content={"error": "Request tidak valid."})
-    if not message or len(message) > 500:
-        return JSONResponse(status_code=400, content={"error": "Pesan kosong atau terlalu panjang."})
     # Generate
     try:
-        start  = time.time()
-        prompt = f"{message} <cot>"
-        full   = generate_text(
-            model=model, tokenizer=tokenizer, prompt=prompt,
-            max_new_tokens=max_tokens, temperature=temperature,
-            top_k=50, top_p=0.9, device=device
         )
         raw              = full[len(prompt):].strip()
         thinking, answer = _extract_thinking(raw)
-        elapsed          = int((time.time() - start) * 1000)
         return JSONResponse(content={
             "answer":             answer if answer else "Maaf, saya tidak mengerti.",
             "thinking":           thinking if show_think else None,
-            "processing_time_ms": elapsed
         })
     except Exception as e:
         logger.error(f"Generate error: {e}")
-        return JSONResponse(status_code=500, content={"error": str(e)})
-# ── Launch ───────────────────────────────────────────────
-demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)

 import logging
 from collections import defaultdict
 from transformers import AutoTokenizer
+from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 import gradio as gr
 from best import ModelConfig, IndonesianLLM, generate_text, _extract_thinking
+# ── Logging ───────────────────────────────────────────────
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# ── Device ────────────────────────────────────────────────
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+logger.info(f"Device: {device}")
+# ── Cek model file ────────────────────────────────────────
 logger.info(f"model.pt ada: {os.path.exists('model.pt')}")
+if not os.path.exists('model.pt'):
+    raise FileNotFoundError("model.pt tidak ditemukan! Upload dulu ke Space.")
+logger.info(f"model.pt size: {os.path.getsize('model.pt') / 1e6:.1f} MB")
+# ── Load tokenizer ────────────────────────────────────────
+logger.info("Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
+logger.info("Tokenizer OK")
+# ── Load model ────────────────────────────────────────────
+logger.info("Loading checkpoint...")
 checkpoint = torch.load("model.pt", map_location='cpu', weights_only=False)
+logger.info(f"Checkpoint keys: {list(checkpoint.keys())}")
+logger.info("Building model...")
+config = checkpoint['config']
+model  = IndonesianLLM(config)
+logger.info(f"Model params: {model.count_parameters():,}")
+logger.info("Loading weights...")
 state_dict = checkpoint['model_state_dict']
 for k in list(state_dict.keys()):
     if state_dict[k].dtype == torch.float16:
 model.load_state_dict(state_dict)
 del checkpoint, state_dict
 gc.collect()
 model.eval()
+model.to(device)
 logger.info("Model siap!")
+# ── Config ────────────────────────────────────────────────
+API_KEYS              = {"kunci-rahasia-kamu-123"}  # ← GANTI!
+ip_request_count      = defaultdict(list)
+ip_banned_until       = {}
+BLACKLIST_THRESHOLD   = 100
+BLACKLIST_WINDOW      = 60
+BLACKLIST_DURATION    = 3600
+# ═══════════════════════════════════════════════════════════
+#  1. FastAPI (induk)
+# ═══════════════════════════════════════════════════════════
+app = FastAPI(
+    title="Indonesian LLM API",
+    description="API untuk model bahasa Indonesia dengan Chain-of-Thought",
+    version="1.0.0"
 )
+# ── CORS ──────────────────────────────────────────────────
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
+    allow_methods=["*"],
     allow_headers=["*"],
 )
+# ── DDoS protection ───────────────────────────────────────
 @app.middleware("http")
 async def ddos_protection(request: Request, call_next):
+    ip  = request.client.host if request.client else "unknown"
     now = time.time()
     if ip in ip_banned_until:
         if now < ip_banned_until[ip]:
+            remaining = int(ip_banned_until[ip] - now)
+            return JSONResponse(
+                status_code=429,
+                content={"error": f"IP dibanned. Coba lagi dalam {remaining} detik."}
+            )
+        else:
+            del ip_banned_until[ip]
+            ip_request_count[ip] = []
     ip_request_count[ip].append(now)
+    ip_request_count[ip] = [t for t in ip_request_count[ip] if now - t < BLACKLIST_WINDOW]
+    if len(ip_request_count[ip]) > BLACKLIST_THRESHOLD:
+        ip_banned_until[ip] = now + BLACKLIST_DURATION
+        ip_request_count[ip] = []
+        return JSONResponse(
+            status_code=429,
+            content={"error": f"Terlalu banyak request. IP dibanned selama {BLACKLIST_DURATION // 60} menit."}
+        )
+    return await call_next(request)
+# ═══════════════════════════════════════════════════════════
+#  2. API Routes
+# ═══════════════════════════════════════════════════════════
+def check_api_key(request: Request):
     key = request.headers.get("X-API-Key")
     if not key or key not in API_KEYS:
+        return False
+    return True
 @app.get("/api/health")
 def health():
+    return {
+        "status": "ok",
+        "device": str(device),
+        "model_params": model.count_parameters()
+    }
+@app.post("/api/chat")
 async def api_chat(request: Request):
     # Cek API key
+    if not check_api_key(request):
+        return JSONResponse(
+            status_code=401,
+            content={"error": "API key tidak valid. Tambahkan header X-API-Key."}
+        )
+    # Rate limit per endpoint (10 req/menit per IP)
+    ip  = request.client.host if request.client else "unknown"
+    now = time.time()
+    endpoint_key = f"{ip}_chat"
+    if endpoint_key not in ip_request_count:
+        ip_request_count[endpoint_key] = []
+    ip_request_count[endpoint_key] = [
+        t for t in ip_request_count[endpoint_key] if now - t < 60
+    ]
+    if len(ip_request_count[endpoint_key]) >= 10:
+        return JSONResponse(
+            status_code=429,
+            content={"error": "Rate limit: maksimal 10 request per menit."}
+        )
+    ip_request_count[endpoint_key].append(now)
+    # Parse request body
     try:
         body        = await request.json()
+        message     = str(body.get("message", "")).strip()
         max_tokens  = int(body.get("max_tokens", 200))
         temperature = float(body.get("temperature", 0.7))
         show_think  = bool(body.get("show_thinking", False))
     except Exception:
+        return JSONResponse(
+            status_code=400,
+            content={"error": "Request body tidak valid. Gunakan JSON."}
+        )
+    # Validasi input
+    if not message:
+        return JSONResponse(status_code=400, content={"error": "Pesan tidak boleh kosong."})
+    if len(message) > 500:
+        return JSONResponse(status_code=400, content={"error": "Pesan terlalu panjang. Maksimal 500 karakter."})
+    if not (10 <= max_tokens <= 500):
+        return JSONResponse(status_code=400, content={"error": "max_tokens harus antara 10 dan 500."})
+    if not (0.1 <= temperature <= 1.5):
+        return JSONResponse(status_code=400, content={"error": "temperature harus antara 0.1 dan 1.5."})
     # Generate
     try:
+        start            = time.time()
+        prompt           = f"{message} <cot>"
+        full             = generate_text(
+            model=model,
+            tokenizer=tokenizer,
+            prompt=prompt,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_k=50,
+            top_p=0.9,
+            device=device
         )
         raw              = full[len(prompt):].strip()
         thinking, answer = _extract_thinking(raw)
+        elapsed_ms       = int((time.time() - start) * 1000)
+        logger.info(f"[{ip}] '{message[:40]}' → {elapsed_ms}ms")
         return JSONResponse(content={
             "answer":             answer if answer else "Maaf, saya tidak mengerti.",
             "thinking":           thinking if show_think else None,
+            "processing_time_ms": elapsed_ms
         })
     except Exception as e:
         logger.error(f"Generate error: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Gagal generate: {str(e)}"}
+        )
+# ═══════════════════════════════════════════════════════════
+#  3. Gradio UI
+# ═══════════════════════════════════════════════════════════
+def gradio_chat(message, history):
+    if not message.strip():
+        return "Silakan ketik pesan."
+    try:
+        prompt    = f"{message} <cot>"
+        full      = generate_text(
+            model=model,
+            tokenizer=tokenizer,
+            prompt=prompt,
+            max_new_tokens=200,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.9,
+            device=device
+        )
+        raw       = full[len(prompt):].strip()
+        _, answer = _extract_thinking(raw)
+        return answer if answer else "Maaf, saya tidak mengerti."
+    except Exception as e:
+        logger.error(f"Gradio error: {e}")
+        return f"Error: {str(e)}"
+gradio_ui = gr.ChatInterface(
+    fn=gradio_chat,
+    title="Indonesian LLM",
+    description="Model bahasa Indonesia dengan kemampuan Chain-of-Thought reasoning. Juga tersedia sebagai API di `/api/chat`.",
+    examples=[
+        ["Halo, apa kabar?"],
+        ["Jelaskan cara kerja internet"],
+        ["Berapa hasil dari 25 dikali 4?"],
+        ["Apa ibu kota Indonesia?"],
+    ],
+    theme=gr.themes.Soft()
+)
+# ═══════════════════════════════════════════════════════════
+#  4. Mount Gradio ke FastAPI — FastAPI sebagai induk
+# ═══════════════════════════════════════════════════════════
+demo = gr.mount_gradio_app(app, gradio_ui, path="/")