Spaces:
Running
Running
File size: 5,993 Bytes
85020b5 ea3fcbe 5ddfd1f ea3fcbe 713c68e 5ddfd1f ea3fcbe 85020b5 ea3fcbe 85020b5 ea3fcbe 85020b5 ea3fcbe 85020b5 ea3fcbe 85020b5 ea3fcbe 85020b5 ea3fcbe 713c68e ea3fcbe 85020b5 ea3fcbe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | """
Server HTTP minimal untuk NER test — berjalan di http://127.0.0.1:8001
Endpoint:
GET /api/status — health-check, kembalikan status model
POST /api/ner — deteksi entitas dari {"text": "..."}
Jalankan:
python src/ner_server.py
python src/ner_server.py --port 8001 --model cahya/xlm-roberta-base-indonesian-NER
"""
from __future__ import annotations
import argparse
import json
import logging
import sys
import threading
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path
# Pastikan src/ ada di sys.path agar ner_detector bisa diimpor langsung
sys.path.insert(0, str(Path(__file__).parent))
sys.path.insert(0, str(Path(__file__).parent.parent))
from core.language import detect_language
from ner_detector import IndonesianNER
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-7s %(message)s",
datefmt="%H:%M:%S",
)
logger = logging.getLogger(__name__)
# State global
_ner: IndonesianNER | None = None
_model_loading = threading.Event() # di-set saat load selesai (berhasil atau gagal)
_load_success = False
def _load_model_background(model_name: str | None) -> None:
"""Muat model di thread terpisah agar server bisa langsung merespons /api/status."""
global _ner, _load_success
_ner = IndonesianNER(model_name=model_name)
_load_success = _ner.load()
if not _load_success:
logger.error("Gagal memuat model: %s", _ner.load_error)
_model_loading.set()
# Request handler
class NERHandler(BaseHTTPRequestHandler):
# CORS: izinkan semua origin agar bisa dipanggil dari file://
def _send_cors_headers(self) -> None:
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
def do_OPTIONS(self) -> None: # preflight
self.send_response(204)
self._send_cors_headers()
self.end_headers()
# Helpers
def _json_response(self, status: int, body: object) -> None:
payload = json.dumps(body, ensure_ascii=False).encode()
self.send_response(status)
self.send_header("Content-Type", "application/json; charset=utf-8")
self.send_header("Content-Length", str(len(payload)))
self._send_cors_headers()
self.end_headers()
self.wfile.write(payload)
def _read_json_body(self) -> dict | None:
length = int(self.headers.get("Content-Length", 0))
if length == 0:
return {}
try:
return json.loads(self.rfile.read(length))
except (json.JSONDecodeError, ValueError):
return None
def log_message(self, fmt: str, *args) -> None:
logger.info("%-6s %s", args[0] if args else "", args[1] if len(args) > 1 else "")
# GET
def do_GET(self) -> None:
if self.path == "/api/status":
loaded = _load_success and _ner is not None and _ner.is_loaded
self._json_response(200, {
"model_loaded": loaded,
"model": _ner.loaded_model if (loaded and _ner) else None,
"loading": not _model_loading.is_set(),
})
else:
self._json_response(404, {"error": "Not found"})
# POST
def do_POST(self) -> None:
if self.path != "/api/ner":
self._json_response(404, {"error": "Not found"})
return
body = self._read_json_body()
if body is None:
self._json_response(400, {"error": "Body bukan JSON yang valid."})
return
text = str(body.get("text", "")).strip()
if not text:
self._json_response(400, {"error": "Field 'text' kosong atau tidak ada."})
return
if not (_ner and _ner.is_loaded):
self._json_response(503, {
"error": "Model belum selesai dimuat. Coba lagi dalam beberapa detik.",
"loading": not _model_loading.is_set(),
})
return
language = str(body.get("language") or detect_language(text).language)
entities = _ner.predict(text, language=language)
self._json_response(200, {
"text": text,
"entities": [
{
"word": e.word,
"label": e.label,
"score": round(e.score, 4),
"start": e.start,
"end": e.end,
"source": e.source,
}
for e in entities
],
"model": _ner.loaded_model,
})
# Entrypoint
def main() -> None:
parser = argparse.ArgumentParser(description="Server NER Bahasa Indonesia")
parser.add_argument("--port", type=int, default=8001)
parser.add_argument("--host", default="127.0.0.1")
parser.add_argument("--model", default=None,
help="ID model HuggingFace atau path lokal. "
"Default: cascade otomatis (xlm-roberta-large → base → bert).")
args = parser.parse_args()
# Muat model di background agar server langsung bisa dijangkau
logger.info("Memuat model NER di background…")
t = threading.Thread(target=_load_model_background, args=(args.model,), daemon=True)
t.start()
server = HTTPServer((args.host, args.port), NERHandler)
logger.info("Server berjalan di http://%s:%d", args.host, args.port)
logger.info(" GET http://%s:%d/api/status", args.host, args.port)
logger.info(" POST http://%s:%d/api/ner", args.host, args.port)
logger.info("Buka web/ner-test.html di browser untuk memulai pengujian.")
logger.info("Tekan Ctrl+C untuk menghentikan server.")
try:
server.serve_forever()
except KeyboardInterrupt:
logger.info("Server dihentikan.")
server.server_close()
if __name__ == "__main__":
main()
|