Spaces:

FaiziRBLX
/

NousAPI

Sleeping

App Files Files Community

FaiziRBLX commited on Apr 11

Commit

166c4d3

verified ·

1 Parent(s): 4bc037c

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -28

app.py CHANGED Viewed

@@ -1,22 +1,27 @@
 import torch
-import gradio as gr
 from transformers import AutoTokenizer
-from best import ModelConfig, IndonesianLLM
-# Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
-# Load checkpoint (strukturnya: {"model_state_dict": ..., "config": ..., dst})
-checkpoint = torch.load("model.pt", map_location=torch.device('cpu'), weights_only=False)
-# Ambil config dari checkpoint (bukan ModelConfig default!)
 config = checkpoint['config']
-# Bangun kerangka model sesuai config yang tersimpan
 model = IndonesianLLM(config)
-# Ambil bobot, konversi fp16 → fp32 jika perlu
 state_dict = checkpoint['model_state_dict']
 if checkpoint.get('dtype') == 'fp16':
     state_dict = {k: v.float() if v.dtype == torch.float16 else v
@@ -24,29 +29,128 @@ if checkpoint.get('dtype') == 'fp16':
 model.load_state_dict(state_dict)
 model.eval()
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model.to(device)
-# Fungsi inference
-def predict(teks_input):
-    from best import generate_text, _extract_thinking
-    prompt = f"{teks_input} <cot>"
     full = generate_text(
         model=model, tokenizer=tokenizer, prompt=prompt,
-        max_new_tokens=200, temperature=0.7,
         top_k=50, top_p=0.9, device=device
     )
     raw = full[len(prompt):].strip()
-    _, answer = _extract_thinking(raw)
-    return answer if answer else "Maaf, saya tidak mengerti."
-# Gradio UI
-iface = gr.Interface(
-    fn=predict,
-    inputs=gr.Textbox(lines=2, placeholder="Ketik pesan di sini..."),
-    outputs="text",
-    title="Indonesian LLM API"
-)
-iface.launch()

 import torch
+import time
+import hashlib
+from collections import defaultdict
 from transformers import AutoTokenizer
+from fastapi import FastAPI, Request, HTTPException, Depends
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.trustedhost import TrustedHostMiddleware
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
+from pydantic import BaseModel, Field
+from best import ModelConfig, IndonesianLLM, generate_text, _extract_thinking
+# ── Load model ──────────────────────────────────────────────────────────────
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 tokenizer = AutoTokenizer.from_pretrained("indolem/indobert-base-uncased")
 tokenizer.add_special_tokens({"additional_special_tokens": ["<cot>", "</cot>"]})
+checkpoint = torch.load("model.pt", map_location=device, weights_only=False)
 config = checkpoint['config']
 model = IndonesianLLM(config)
 state_dict = checkpoint['model_state_dict']
 if checkpoint.get('dtype') == 'fp16':
     state_dict = {k: v.float() if v.dtype == torch.float16 else v
 model.load_state_dict(state_dict)
 model.eval()
 model.to(device)
+# ── Rate Limiter (slowapi) ───────────────────────────────────────────────────
+limiter = Limiter(key_func=get_remote_address)
+# ── IP Blacklist (in-memory, reset saat restart) ────────────────────────────
+ip_blacklist: set = set()
+ip_request_count: dict = defaultdict(list)  # ip -> [timestamp, ...]
+BLACKLIST_THRESHOLD = 100   # request dalam window ini → blacklist
+BLACKLIST_WINDOW    = 60    # detik
+BLACKLIST_DURATION  = 3600  # banned 1 jam (simpan di set terpisah)
+ip_banned_until: dict = {}  # ip -> timestamp banned sampai kapan
+# ── FastAPI setup ───────────────────────────────────────────────────────────
+app = FastAPI(title="Indonesian LLM API")
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+# CORS — ganti origins sesuai domain kamu
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["https://nousai.netlify.app"],  # ganti! jangan "*" di production
+    allow_methods=["POST", "GET"],
+    allow_headers=["*"],
+)
+# Trusted hosts — tolak request dengan Host header aneh
+app.add_middleware(
+    TrustedHostMiddleware,
+    allowed_hosts=["yourdomain.com", "localhost", "127.0.0.1"]
+)
+# ── Middleware: DDoS / Flood Detection ──────────────────────────────────────
+@app.middleware("http")
+async def ddos_protection(request: Request, call_next):
+    ip = get_remote_address(request)
+    now = time.time()
+    # Cek apakah IP sedang dibanned
+    if ip in ip_banned_until:
+        if now < ip_banned_until[ip]:
+            remaining = int(ip_banned_until[ip] - now)
+            return HTTPException(
+                status_code=429,
+                detail=f"IP banned. Coba lagi dalam {remaining} detik."
+            )
+        else:
+            # Ban sudah habis
+            del ip_banned_until[ip]
+            ip_request_count[ip] = []
+    # Catat timestamp request ini
+    ip_request_count[ip].append(now)
+    # Bersihkan request yang sudah di luar window
+    ip_request_count[ip] = [
+        t for t in ip_request_count[ip]
+        if now - t < BLACKLIST_WINDOW
+    ]
+    # Jika terlalu banyak request → ban
+    if len(ip_request_count[ip]) > BLACKLIST_THRESHOLD:
+        ip_banned_until[ip] = now + BLACKLIST_DURATION
+        ip_request_count[ip] = []
+        raise HTTPException(
+            status_code=429,
+            detail=f"Terlalu banyak request. IP dibanned selama {BLACKLIST_DURATION//60} menit."
+        )
+    response = await call_next(request)
+    return response
+# ── Request/Response Schema ─────────────────────────────────────────────────
+class ChatRequest(BaseModel):
+    message: str = Field(..., min_length=1, max_length=500)  # batasi panjang input
+    max_tokens: int = Field(default=200, ge=10, le=500)      # min 10, max 500
+    temperature: float = Field(default=0.7, ge=0.1, le=1.5)
+    show_thinking: bool = False
+class ChatResponse(BaseModel):
+    answer: str
+    thinking: str | None = None
+    processing_time_ms: int
+# ── API Key sederhana (opsional tapi direkomendasikan) ──────────────────────
+API_KEYS = {"kunci-rahasia-kamu-123"}  # ganti dengan key yang kuat
+def verify_api_key(request: Request):
+    key = request.headers.get("X-API-Key")
+    if not key or key not in API_KEYS:
+        raise HTTPException(status_code=401, detail="API key tidak valid.")
+    return key
+# ── Endpoints ───────────────────────────────────────────────────────────────
+@app.get("/")
+def health():
+    return {"status": "ok", "device": str(device)}
+@app.post("/chat", response_model=ChatResponse)
+@limiter.limit("20/minute")          # max 10 request per menit per IP
+@limiter.limit("100/hour")            # max 50 request per jam per IP
+async def chat(
+    req: ChatRequest,
+    request: Request,
+    _key: str = Depends(verify_api_key)   # hapus baris ini jika tidak pakai API key
+):
+    start = time.time()
+    prompt = f"{req.message} <cot>"
     full = generate_text(
         model=model, tokenizer=tokenizer, prompt=prompt,
+        max_new_tokens=req.max_tokens, temperature=req.temperature,
         top_k=50, top_p=0.9, device=device
     )
     raw = full[len(prompt):].strip()
+    thinking, answer = _extract_thinking(raw)
+    elapsed_ms = int((time.time() - start) * 1000)
+    return ChatResponse(
+        answer=answer if answer else "Maaf, saya tidak mengerti.",
+        thinking=thinking if req.show_thinking else None,
+        processing_time_ms=elapsed_ms
+    )