File size: 5,993 Bytes
85020b5
ea3fcbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ddfd1f
ea3fcbe
713c68e
 
5ddfd1f
ea3fcbe
 
 
 
 
 
 
 
85020b5
ea3fcbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85020b5
ea3fcbe
 
 
85020b5
ea3fcbe
 
 
 
 
 
 
 
 
 
 
85020b5
ea3fcbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85020b5
ea3fcbe
 
 
 
 
 
 
 
 
 
 
 
85020b5
ea3fcbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713c68e
 
ea3fcbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85020b5
ea3fcbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
Server HTTP minimal untuk NER test — berjalan di http://127.0.0.1:8001

Endpoint:
    GET  /api/status   — health-check, kembalikan status model
    POST /api/ner      — deteksi entitas dari {"text": "..."}

Jalankan:
    python src/ner_server.py
    python src/ner_server.py --port 8001 --model cahya/xlm-roberta-base-indonesian-NER
"""

from __future__ import annotations

import argparse
import json
import logging
import sys
import threading
from http.server import BaseHTTPRequestHandler, HTTPServer
from pathlib import Path

# Pastikan src/ ada di sys.path agar ner_detector bisa diimpor langsung
sys.path.insert(0, str(Path(__file__).parent))
sys.path.insert(0, str(Path(__file__).parent.parent))
from core.language import detect_language
from ner_detector import IndonesianNER

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s  %(levelname)-7s  %(message)s",
    datefmt="%H:%M:%S",
)
logger = logging.getLogger(__name__)

# State global

_ner: IndonesianNER | None = None
_model_loading = threading.Event()   # di-set saat load selesai (berhasil atau gagal)
_load_success  = False


def _load_model_background(model_name: str | None) -> None:
    """Muat model di thread terpisah agar server bisa langsung merespons /api/status."""
    global _ner, _load_success
    _ner = IndonesianNER(model_name=model_name)
    _load_success = _ner.load()
    if not _load_success:
        logger.error("Gagal memuat model: %s", _ner.load_error)
    _model_loading.set()


# Request handler

class NERHandler(BaseHTTPRequestHandler):

    # CORS: izinkan semua origin agar bisa dipanggil dari file://

    def _send_cors_headers(self) -> None:
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
        self.send_header("Access-Control-Allow-Headers", "Content-Type")

    def do_OPTIONS(self) -> None:  # preflight
        self.send_response(204)
        self._send_cors_headers()
        self.end_headers()

    # Helpers

    def _json_response(self, status: int, body: object) -> None:
        payload = json.dumps(body, ensure_ascii=False).encode()
        self.send_response(status)
        self.send_header("Content-Type", "application/json; charset=utf-8")
        self.send_header("Content-Length", str(len(payload)))
        self._send_cors_headers()
        self.end_headers()
        self.wfile.write(payload)

    def _read_json_body(self) -> dict | None:
        length = int(self.headers.get("Content-Length", 0))
        if length == 0:
            return {}
        try:
            return json.loads(self.rfile.read(length))
        except (json.JSONDecodeError, ValueError):
            return None

    def log_message(self, fmt: str, *args) -> None:
        logger.info("%-6s %s", args[0] if args else "", args[1] if len(args) > 1 else "")

    # GET

    def do_GET(self) -> None:
        if self.path == "/api/status":
            loaded = _load_success and _ner is not None and _ner.is_loaded
            self._json_response(200, {
                "model_loaded": loaded,
                "model": _ner.loaded_model if (loaded and _ner) else None,
                "loading": not _model_loading.is_set(),
            })
        else:
            self._json_response(404, {"error": "Not found"})

    # POST

    def do_POST(self) -> None:
        if self.path != "/api/ner":
            self._json_response(404, {"error": "Not found"})
            return

        body = self._read_json_body()
        if body is None:
            self._json_response(400, {"error": "Body bukan JSON yang valid."})
            return

        text = str(body.get("text", "")).strip()
        if not text:
            self._json_response(400, {"error": "Field 'text' kosong atau tidak ada."})
            return

        if not (_ner and _ner.is_loaded):
            self._json_response(503, {
                "error": "Model belum selesai dimuat. Coba lagi dalam beberapa detik.",
                "loading": not _model_loading.is_set(),
            })
            return

        language = str(body.get("language") or detect_language(text).language)
        entities = _ner.predict(text, language=language)
        self._json_response(200, {
            "text": text,
            "entities": [
                {
                    "word":   e.word,
                    "label":  e.label,
                    "score":  round(e.score, 4),
                    "start":  e.start,
                    "end":    e.end,
                    "source": e.source,
                }
                for e in entities
            ],
            "model": _ner.loaded_model,
        })


# Entrypoint

def main() -> None:
    parser = argparse.ArgumentParser(description="Server NER Bahasa Indonesia")
    parser.add_argument("--port",  type=int, default=8001)
    parser.add_argument("--host",  default="127.0.0.1")
    parser.add_argument("--model", default=None,
                        help="ID model HuggingFace atau path lokal. "
                             "Default: cascade otomatis (xlm-roberta-large → base → bert).")
    args = parser.parse_args()

    # Muat model di background agar server langsung bisa dijangkau
    logger.info("Memuat model NER di background…")
    t = threading.Thread(target=_load_model_background, args=(args.model,), daemon=True)
    t.start()

    server = HTTPServer((args.host, args.port), NERHandler)
    logger.info("Server berjalan di http://%s:%d", args.host, args.port)
    logger.info("  GET  http://%s:%d/api/status", args.host, args.port)
    logger.info("  POST http://%s:%d/api/ner", args.host, args.port)
    logger.info("Buka web/ner-test.html di browser untuk memulai pengujian.")
    logger.info("Tekan Ctrl+C untuk menghentikan server.")

    try:
        server.serve_forever()
    except KeyboardInterrupt:
        logger.info("Server dihentikan.")
        server.server_close()


if __name__ == "__main__":
    main()