Prompt-Builder / src /ner /ner_server.py
ArielJoe's picture
feat: cross-detector language policy + tidy structure & file naming
5ddfd1f
Raw
History Blame Contribute Delete
5.99 kB
"""
Server HTTP minimal untuk NER test — berjalan di http://127.0.0.1:8001
Endpoint:
GET /api/status — health-check, kembalikan status model
POST /api/ner — deteksi entitas dari {"text": "..."}
Jalankan:
python src/ner_server.py
python src/ner_server.py --port 8001 --model cahya/xlm-roberta-base-indonesian-NER
"""
from __future__ import annotations
import argparse
import json
import logging
import sys
import threading
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
# Pastikan src/ ada di sys.path agar ner_detector bisa diimpor langsung
sys.path.insert(0, str(Path(__file__).parent))
sys.path.insert(0, str(Path(__file__).parent.parent))
from core.language import detect_language
from ner_detector import IndonesianNER
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(message)s",
datefmt="%H:%M:%S",
)
logger = logging.getLogger(__name__)
# State global
_ner: IndonesianNER | None = None
_model_loading = threading.Event() # di-set saat load selesai (berhasil atau gagal)
_load_success = False
def _load_model_background(model_name: str | None) -> None:
"""Muat model di thread terpisah agar server bisa langsung merespons /api/status."""
global _ner, _load_success
_ner = IndonesianNER(model_name=model_name)
_load_success = _ner.load()
if not _load_success:
logger.error("Gagal memuat model: %s", _ner.load_error)
_model_loading.set()
# Request handler
class NERHandler(BaseHTTPRequestHandler):
# CORS: izinkan semua origin agar bisa dipanggil dari file://
def _send_cors_headers(self) -> None:
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
def do_OPTIONS(self) -> None: # preflight
self.send_response(204)
self._send_cors_headers()
self.end_headers()
# Helpers
def _json_response(self, status: int, body: object) -> None:
payload = json.dumps(body, ensure_ascii=False).encode()
self.send_response(status)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(payload)))
self._send_cors_headers()
self.end_headers()
self.wfile.write(payload)
def _read_json_body(self) -> dict | None:
length = int(self.headers.get("Content-Length", 0))
if length == 0:
return {}
try:
return json.loads(self.rfile.read(length))
except (json.JSONDecodeError, ValueError):
return None
def log_message(self, fmt: str, *args) -> None:
logger.info("%-6s %s", args[0] if args else "", args[1] if len(args) > 1 else "")
# GET
def do_GET(self) -> None:
if self.path == "/api/status":
loaded = _load_success and _ner is not None and _ner.is_loaded
self._json_response(200, {
"model_loaded": loaded,
"model": _ner.loaded_model if (loaded and _ner) else None,
"loading": not _model_loading.is_set(),
})
else:
self._json_response(404, {"error": "Not found"})
# POST
def do_POST(self) -> None:
if self.path != "/api/ner":
self._json_response(404, {"error": "Not found"})
return
body = self._read_json_body()
if body is None:
self._json_response(400, {"error": "Body bukan JSON yang valid."})
return
text = str(body.get("text", "")).strip()
if not text:
self._json_response(400, {"error": "Field 'text' kosong atau tidak ada."})
return
if not (_ner and _ner.is_loaded):
self._json_response(503, {
"error": "Model belum selesai dimuat. Coba lagi dalam beberapa detik.",
"loading": not _model_loading.is_set(),
})
return
language = str(body.get("language") or detect_language(text).language)
entities = _ner.predict(text, language=language)
self._json_response(200, {
"text": text,
"entities": [
{
"word": e.word,
"label": e.label,
"score": round(e.score, 4),
"start": e.start,
"end": e.end,
"source": e.source,
}
for e in entities
],
"model": _ner.loaded_model,
})
# Entrypoint
def main() -> None:
parser = argparse.ArgumentParser(description="Server NER Bahasa Indonesia")
parser.add_argument("--port", type=int, default=8001)
parser.add_argument("--host", default="127.0.0.1")
parser.add_argument("--model", default=None,
help="ID model HuggingFace atau path lokal. "
"Default: cascade otomatis (xlm-roberta-large → base → bert).")
args = parser.parse_args()
# Muat model di background agar server langsung bisa dijangkau
logger.info("Memuat model NER di background…")
t = threading.Thread(target=_load_model_background, args=(args.model,), daemon=True)
t.start()
server = HTTPServer((args.host, args.port), NERHandler)
logger.info("Server berjalan di http://%s:%d", args.host, args.port)
logger.info(" GET http://%s:%d/api/status", args.host, args.port)
logger.info(" POST http://%s:%d/api/ner", args.host, args.port)
logger.info("Buka web/ner-test.html di browser untuk memulai pengujian.")
logger.info("Tekan Ctrl+C untuk menghentikan server.")
try:
server.serve_forever()
except KeyboardInterrupt:
logger.info("Server dihentikan.")
server.server_close()
if __name__ == "__main__":
main()