Nomio4640 commited on
Commit
7726529
·
1 Parent(s): e51c368

added test for NER

Browse files
adapters/api/main.py CHANGED
@@ -1,10 +1,6 @@
1
- """
2
- FastAPI adapter — REST API entry point.
3
- This is the outer adapter that wraps the NLP core domain layer.
4
- """
5
-
6
  import logging
7
  import traceback
 
8
 
9
  from fastapi import FastAPI, Request
10
  from fastapi.middleware.cors import CORSMiddleware
@@ -56,6 +52,7 @@ async def root():
56
  "name": "NLP Intelligence API",
57
  "version": "1.0.0",
58
  "endpoints": {
 
59
  "upload": "POST /api/upload",
60
  "analyze": "POST /api/analyze",
61
  "network": "POST /api/network",
@@ -65,3 +62,24 @@ async def root():
65
  "admin_stopwords": "GET/POST /api/admin/stopwords",
66
  },
67
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  import traceback
3
+ import torch
4
 
5
  from fastapi import FastAPI, Request
6
  from fastapi.middleware.cors import CORSMiddleware
 
52
  "name": "NLP Intelligence API",
53
  "version": "1.0.0",
54
  "endpoints": {
55
+ "health": "GET /api/health",
56
  "upload": "POST /api/upload",
57
  "analyze": "POST /api/analyze",
58
  "network": "POST /api/network",
 
62
  "admin_stopwords": "GET/POST /api/admin/stopwords",
63
  },
64
  }
65
+
66
+
67
+ @app.get("/api/health")
68
+ async def health():
69
+ """
70
+ Quick health check used by the frontend on page load.
71
+ Returns GPU availability and which NLP models are loaded.
72
+ """
73
+ from adapters.api import services
74
+ gpu = torch.cuda.is_available()
75
+ gpu_name = torch.cuda.get_device_name(0) if gpu else None
76
+ return {
77
+ "status": "ok",
78
+ "gpu": gpu,
79
+ "gpu_name": gpu_name,
80
+ "models": {
81
+ "ner": services.ner._pipeline is not None,
82
+ "sentiment": services.sentiment._pipeline is not None,
83
+ "topic": services.topic._model is not None,
84
+ },
85
+ }
adapters/api/routers/analysis.py CHANGED
@@ -16,6 +16,7 @@ import csv
16
  import io
17
  import json
18
  import logging
 
19
  import uuid
20
  from typing import List
21
 
@@ -321,6 +322,7 @@ def _run_analysis(
321
  run_sentiment: bool,
322
  run_topics: bool,
323
  ) -> AnalysisResponse:
 
324
  preprocessor = services.preprocessor
325
  kb = services.kb
326
 
@@ -328,6 +330,8 @@ def _run_analysis(
328
  ids = [row.get("ID", str(i)) for i, row in enumerate(rows)]
329
  sources = [row.get("Source", "") for row in rows]
330
 
 
 
331
  # Dual preprocessing — one pass, two outputs
332
  nlp_texts: List[str] = []
333
  tm_texts: List[str] = []
@@ -335,11 +339,15 @@ def _run_analysis(
335
  nlp, tm = preprocessor.preprocess_dual(raw)
336
  nlp_texts.append(nlp)
337
  tm_texts.append(tm)
 
338
 
339
  # NER
340
  ner_results = []
341
  if run_ner:
 
342
  ner_results = services.ner.recognize_batch(nlp_texts)
 
 
343
 
344
  # Entity relabeling from admin custom labels
345
  custom_labels = kb.get_labels(label_type="entity") if run_ner else {}
@@ -347,18 +355,30 @@ def _run_analysis(
347
  # Sentiment
348
  sentiment_results = []
349
  if run_sentiment:
 
350
  sentiment_results = services.sentiment.analyze_batch(nlp_texts)
 
 
 
 
351
 
352
  # Topic modeling — now works from 3 documents via KMeans fallback
353
  topic_results = []
354
  topic_summary = []
355
  if run_topics:
 
 
356
  if len(tm_texts) >= MIN_TOPICS_DOCS:
357
  try:
 
358
  topic_results, topic_summary = services.topic.fit_transform(tm_texts)
 
 
359
  except Exception as exc:
 
360
  topic_summary = [{"error": f"Topic modeling failed: {exc}"}]
361
  else:
 
362
  topic_summary = [{
363
  "info": (
364
  f"Topic modeling needs at least {MIN_TOPICS_DOCS} documents. "
 
16
  import io
17
  import json
18
  import logging
19
+ import time
20
  import uuid
21
  from typing import List
22
 
 
322
  run_sentiment: bool,
323
  run_topics: bool,
324
  ) -> AnalysisResponse:
325
+ t0 = time.time()
326
  preprocessor = services.preprocessor
327
  kb = services.kb
328
 
 
330
  ids = [row.get("ID", str(i)) for i, row in enumerate(rows)]
331
  sources = [row.get("Source", "") for row in rows]
332
 
333
+ logger.info(f"[Pipeline] Starting analysis: {len(raw_texts)} rows, NER={run_ner}, Sentiment={run_sentiment}, Topics={run_topics}")
334
+
335
  # Dual preprocessing — one pass, two outputs
336
  nlp_texts: List[str] = []
337
  tm_texts: List[str] = []
 
339
  nlp, tm = preprocessor.preprocess_dual(raw)
340
  nlp_texts.append(nlp)
341
  tm_texts.append(tm)
342
+ logger.info(f"[Pipeline] Preprocessing done in {(time.time()-t0)*1000:.0f}ms")
343
 
344
  # NER
345
  ner_results = []
346
  if run_ner:
347
+ t1 = time.time()
348
  ner_results = services.ner.recognize_batch(nlp_texts)
349
+ total_ents = sum(len(r) for r in ner_results)
350
+ logger.info(f"[Pipeline] NER done in {(time.time()-t1)*1000:.0f}ms — found {total_ents} entities total")
351
 
352
  # Entity relabeling from admin custom labels
353
  custom_labels = kb.get_labels(label_type="entity") if run_ner else {}
 
355
  # Sentiment
356
  sentiment_results = []
357
  if run_sentiment:
358
+ t1 = time.time()
359
  sentiment_results = services.sentiment.analyze_batch(nlp_texts)
360
+ pos = sum(1 for s in sentiment_results if s.label == "positive")
361
+ neg = sum(1 for s in sentiment_results if s.label == "negative")
362
+ neu = sum(1 for s in sentiment_results if s.label == "neutral")
363
+ logger.info(f"[Pipeline] Sentiment done in {(time.time()-t1)*1000:.0f}ms — pos={pos} neu={neu} neg={neg}")
364
 
365
  # Topic modeling — now works from 3 documents via KMeans fallback
366
  topic_results = []
367
  topic_summary = []
368
  if run_topics:
369
+ non_empty_tm = [t for t in tm_texts if t.strip()]
370
+ logger.info(f"[Pipeline] Topic modeling: {len(non_empty_tm)} non-empty TM texts (need >={MIN_TOPICS_DOCS})")
371
  if len(tm_texts) >= MIN_TOPICS_DOCS:
372
  try:
373
+ t1 = time.time()
374
  topic_results, topic_summary = services.topic.fit_transform(tm_texts)
375
+ real_topics = [t for t in topic_summary if isinstance(t, dict) and t.get("topic_id", -1) >= 0]
376
+ logger.info(f"[Pipeline] Topics done in {(time.time()-t1)*1000:.0f}ms — {len(real_topics)} real topics, summary={topic_summary}")
377
  except Exception as exc:
378
+ logger.error(f"[Pipeline] Topic modeling FAILED: {exc}", exc_info=True)
379
  topic_summary = [{"error": f"Topic modeling failed: {exc}"}]
380
  else:
381
+ logger.info(f"[Pipeline] Skipping topics — only {len(tm_texts)} docs (need {MIN_TOPICS_DOCS}+)")
382
  topic_summary = [{
383
  "info": (
384
  f"Topic modeling needs at least {MIN_TOPICS_DOCS} documents. "
data/test.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/train.txt ADDED
The diff for this file is too large to render. See raw diff
 
data/valid.txt ADDED
The diff for this file is too large to render. See raw diff
 
eval/evaluate.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+
5
+ # Add the project root to the python path so we can import nlp_core
6
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+
8
+ from nlp_core.ner_engine import NEREngine
9
+ from nlp_core.preprocessing import Preprocessor
10
+
11
+ def extract_entities_from_conll(lines):
12
+ """
13
+ Extracts entities from a list of CoNLL-formatted lines for a single sentence.
14
+ Returns the reconstructed text and a list of entities: (type, string).
15
+ """
16
+ words = []
17
+ entities = []
18
+ current_entity_type = None
19
+ current_entity_words = []
20
+
21
+ for line in lines:
22
+ parts = line.strip().split()
23
+ if len(parts) < 4:
24
+ continue
25
+ word = parts[0]
26
+ tag = parts[-1]
27
+
28
+ words.append(word)
29
+
30
+ if tag.startswith("B-"):
31
+ if current_entity_type:
32
+ entities.append((current_entity_type, " ".join(current_entity_words)))
33
+ current_entity_type = tag[2:]
34
+ current_entity_words = [word]
35
+ elif tag.startswith("I-"):
36
+ if current_entity_type == tag[2:]:
37
+ current_entity_words.append(word)
38
+ else:
39
+ if current_entity_type:
40
+ entities.append((current_entity_type, " ".join(current_entity_words)))
41
+ current_entity_type = tag[2:]
42
+ current_entity_words = [word]
43
+ else:
44
+ if current_entity_type:
45
+ entities.append((current_entity_type, " ".join(current_entity_words)))
46
+ current_entity_type = None
47
+ current_entity_words = []
48
+
49
+ if current_entity_type:
50
+ entities.append((current_entity_type, " ".join(current_entity_words)))
51
+
52
+ text = " ".join(words)
53
+ return text, entities
54
+
55
+ def evaluate_ner(test_file_path, limit=None):
56
+ print(f"Loading test data from {test_file_path}...")
57
+
58
+ with open(test_file_path, "r", encoding="utf-8") as f:
59
+ blocks = f.read().split("\n\n")
60
+
61
+ sentences = []
62
+ for block in blocks:
63
+ if not block.strip():
64
+ continue
65
+ text, true_ents = extract_entities_from_conll(block.split("\n"))
66
+ if text:
67
+ sentences.append((text, true_ents))
68
+
69
+ if limit:
70
+ sentences = sentences[:limit]
71
+
72
+ print(f"Loaded {len(sentences)} test sentences.")
73
+
74
+ preprocessor = Preprocessor()
75
+ ner = NEREngine()
76
+
77
+ true_positives = 0
78
+ false_positives = 0
79
+ false_negatives = 0
80
+
81
+ print("Running NER evaluation (this may take a while)...")
82
+ for i, (text, true_ents) in enumerate(sentences):
83
+ if i > 0 and i % 50 == 0:
84
+ print(f"Processed {i}/{len(sentences)} sentences...")
85
+
86
+ # Clean text specifically for NER
87
+ clean_text = preprocessor.preprocess_nlp(text)
88
+
89
+ predicted_results = ner.recognize(clean_text)
90
+
91
+ # Format predictions into (type, string) lowercased for fair comparison
92
+ pred_ents = [(res.entity_group, res.word.replace(" ", "").lower()) for res in predicted_results]
93
+
94
+ # Format true entities similarly (strip spaces, lowercase)
95
+ # Note: The model output uses different spacing sometimes due to subwords.
96
+ true_ents_formatted = [(t, w.replace(" ", "").lower()) for t, w in true_ents]
97
+
98
+ # Calculate overlaps
99
+ for true_e in true_ents_formatted:
100
+ if true_e in pred_ents:
101
+ true_positives += 1
102
+ pred_ents.remove(true_e)
103
+ else:
104
+ false_negatives += 1
105
+
106
+ # Whatever is left in pred_ents are false positives
107
+ false_positives += len(pred_ents)
108
+
109
+ precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
110
+ recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
111
+ f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
112
+
113
+ print("\n" + "="*40)
114
+ print("NER EVALUATION RESULTS (Entity-Level Exact Match)")
115
+ print("="*40)
116
+ print(f"Sentences Evaluated: {len(sentences)}")
117
+ print(f"True Positives: {true_positives}")
118
+ print(f"False Positives: {false_positives}")
119
+ print(f"False Negatives: {false_negatives}")
120
+ print("-" * 40)
121
+ print(f"Precision: {precision:.4f}")
122
+ print(f"Recall: {recall:.4f}")
123
+ print(f"F1 Score: {f1:.4f}")
124
+ print("="*40)
125
+
126
+ if __name__ == "__main__":
127
+ test_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data", "test.txt")
128
+ if not os.path.exists(test_path):
129
+ print(f"Error: Could not find CoNLL test file at {test_path}")
130
+ else:
131
+ # Run on the first 500 sentences to get a quick estimate.
132
+ # Change limit=None to run on the entire test set.
133
+ evaluate_ner(test_path, limit=500)
frontend/next.config.ts CHANGED
@@ -1,12 +1,17 @@
1
  import type { NextConfig } from "next";
2
 
 
 
 
 
 
 
3
  const nextConfig: NextConfig = {
4
  async rewrites() {
5
  return [
6
  {
7
  source: "/api/:path*",
8
- // When using Colab + Ngrok, paste your Ngrok URL here!
9
- destination: "https://joye-tetracid-trevor.ngrok-free.dev/api/:path*",
10
  },
11
  ];
12
  },
 
1
  import type { NextConfig } from "next";
2
 
3
+ // Set NEXT_PUBLIC_API_URL in .env.local to point to your backend.
4
+ // Example for Colab: NEXT_PUBLIC_API_URL=https://your-url.ngrok-free.dev
5
+ // Example for local: NEXT_PUBLIC_API_URL=http://localhost:8000
6
+ // If not set, defaults to localhost:8000
7
+ const API_URL = process.env.NEXT_PUBLIC_API_URL || "http://localhost:8000";
8
+
9
  const nextConfig: NextConfig = {
10
  async rewrites() {
11
  return [
12
  {
13
  source: "/api/:path*",
14
+ destination: `${API_URL}/api/:path*`,
 
15
  },
16
  ];
17
  },
frontend/src/app/page.tsx CHANGED
@@ -94,6 +94,9 @@ function NetworkGraph({ network }: { network: { nodes: any[]; edges: any[] } })
94
  );
95
  }
96
 
 
 
 
97
  export default function Dashboard() {
98
  const [data, setData] = useState<AnalysisResult | null>(null);
99
  const [insights, setInsights] = useState<InsightItem[]>([]);
@@ -117,14 +120,36 @@ export default function Dashboard() {
117
  // Annotation editor
118
  const [editingDoc, setEditingDoc] = useState<DocForEditor | null>(null);
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  const loadHistory = useCallback(async () => {
121
  setHistoryLoading(true);
 
122
  try {
123
- const res = await fetch(`${API_BASE}/api/history?limit=50`, { headers: { "ngrok-skip-browser-warning": "true" } });
 
124
  if (res.ok) setHistory(await res.json());
125
- } finally {
126
- setHistoryLoading(false);
127
- }
128
  }, []);
129
 
130
  useEffect(() => {
@@ -194,25 +219,38 @@ export default function Dashboard() {
194
  const uploadCSV = useCallback(async (file: File) => {
195
  setLoading(true);
196
  setError("");
 
197
  try {
198
  const formData = new FormData();
199
  formData.append("file", file);
 
 
 
200
  const res = await fetch(`${API_BASE}/api/upload?run_ner=true&run_sentiment=true&run_topics=true`, {
201
  method: "POST",
 
202
  body: formData,
203
  });
 
204
  if (!res.ok) {
205
- const err = await res.json();
206
  throw new Error(err.detail || "Upload failed");
207
  }
208
  const result: AnalysisResult = await res.json();
 
209
  setData(result);
210
- const insightsRes = await fetch(`${API_BASE}/api/insights`, { headers: { "ngrok-skip-browser-warning": "true" }, method: "POST" });
 
 
 
 
211
  if (insightsRes.ok) setInsights(await insightsRes.json());
212
  } catch (e: any) {
 
213
  setError(e.message || "Error uploading file");
214
  } finally {
215
  setLoading(false);
 
216
  }
217
  }, []);
218
 
@@ -220,19 +258,25 @@ export default function Dashboard() {
220
  if (!textInput.trim()) return;
221
  setLoading(true);
222
  setError("");
 
223
  try {
224
  const res = await fetch(`${API_BASE}/api/analyze`, {
225
  method: "POST",
226
- headers: { "ngrok-skip-browser-warning": "true", "Content-Type": "application/json" },
227
  body: JSON.stringify({ text: textInput }),
228
  });
 
229
  if (!res.ok) throw new Error("Analysis failed");
230
  const result: AnalysisResult = await res.json();
 
 
231
  setData(result);
232
  } catch (e: any) {
 
233
  setError(e.message);
234
  } finally {
235
  setLoading(false);
 
236
  }
237
  }, [textInput]);
238
 
@@ -276,6 +320,30 @@ export default function Dashboard() {
276
 
277
  return (
278
  <div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  {/* Annotation editor modal */}
280
  {editingDoc && (
281
  <AnnotationEditor
@@ -287,6 +355,7 @@ export default function Dashboard() {
287
 
288
  {/* Upload Section */}
289
  {!data && !loading && (
 
290
  <section style={{ marginBottom: "2rem" }}>
291
  <div
292
  className={`upload-area ${dragging ? "dragging" : ""}`}
@@ -299,9 +368,10 @@ export default function Dashboard() {
299
  <p className="upload-text">
300
  <strong>CSV файл чирж оруулах</strong> эсвэл дарж сонгох
301
  </p>
302
- <p style={{ fontSize: "0.75rem", color: "var(--text-muted)", marginTop: "0.5rem" }}>
303
- &apos;text&apos; эсвэл &apos;Text&apos; баганатай CSV файл шаардлагатай
304
- </p>
 
305
  <input
306
  ref={fileInputRef}
307
  type="file"
 
94
  );
95
  }
96
 
97
+ // Standard headers needed for all API calls when going through Ngrok
98
+ const NGROK_HEADERS = { "ngrok-skip-browser-warning": "true" };
99
+
100
  export default function Dashboard() {
101
  const [data, setData] = useState<AnalysisResult | null>(null);
102
  const [insights, setInsights] = useState<InsightItem[]>([]);
 
120
  // Annotation editor
121
  const [editingDoc, setEditingDoc] = useState<DocForEditor | null>(null);
122
 
123
+ // Backend health check
124
+ const [backendOk, setBackendOk] = useState<boolean | null>(null); // null = checking
125
+
126
+ // Health check on mount — tells you immediately if backend is reachable
127
+ useEffect(() => {
128
+ const check = async () => {
129
+ console.group("[NLP] Backend health check");
130
+ try {
131
+ const res = await fetch(`${API_BASE}/api/health`, { headers: NGROK_HEADERS });
132
+ const ok = res.ok;
133
+ setBackendOk(ok);
134
+ console.log(ok ? "✅ Backend reachable" : `❌ Backend returned ${res.status}`);
135
+ } catch (e) {
136
+ setBackendOk(false);
137
+ console.error("❌ Backend unreachable:", e);
138
+ }
139
+ console.groupEnd();
140
+ };
141
+ check();
142
+ }, []);
143
+
144
  const loadHistory = useCallback(async () => {
145
  setHistoryLoading(true);
146
+ console.group("[NLP] Load history");
147
  try {
148
+ const res = await fetch(`${API_BASE}/api/history?limit=50`, { headers: NGROK_HEADERS });
149
+ console.log(`→ GET /api/history status=${res.status}`);
150
  if (res.ok) setHistory(await res.json());
151
+ } catch (e) { console.error(e); }
152
+ finally { setHistoryLoading(false); console.groupEnd(); }
 
153
  }, []);
154
 
155
  useEffect(() => {
 
219
  const uploadCSV = useCallback(async (file: File) => {
220
  setLoading(true);
221
  setError("");
222
+ console.group(`[NLP] CSV Upload — ${file.name} (${(file.size/1024).toFixed(1)} KB)`);
223
  try {
224
  const formData = new FormData();
225
  formData.append("file", file);
226
+ // ⚠️ IMPORTANT: ngrok-skip-browser-warning header MUST be included here.
227
+ // Without it, Ngrok returns an HTML warning page instead of forwarding
228
+ // the request to FastAPI → FastAPI tries to parse HTML as CSV → 500 error.
229
  const res = await fetch(`${API_BASE}/api/upload?run_ner=true&run_sentiment=true&run_topics=true`, {
230
  method: "POST",
231
+ headers: NGROK_HEADERS, // ← THE FIX
232
  body: formData,
233
  });
234
+ console.log(`→ POST /api/upload status=${res.status}`);
235
  if (!res.ok) {
236
+ const err = await res.json().catch(() => ({ detail: `HTTP ${res.status}` }));
237
  throw new Error(err.detail || "Upload failed");
238
  }
239
  const result: AnalysisResult = await res.json();
240
+ console.log(`← ${result.total_documents} documents, topics=${result.topic_summary?.length}`);
241
  setData(result);
242
+ setActiveTab("overview"); // Auto-switch to results
243
+
244
+ // Immediately fetch insights after upload
245
+ const insightsRes = await fetch(`${API_BASE}/api/insights`, { headers: NGROK_HEADERS, method: "POST" });
246
+ console.log(`→ POST /api/insights status=${insightsRes.status}`);
247
  if (insightsRes.ok) setInsights(await insightsRes.json());
248
  } catch (e: any) {
249
+ console.error("Upload error:", e);
250
  setError(e.message || "Error uploading file");
251
  } finally {
252
  setLoading(false);
253
+ console.groupEnd();
254
  }
255
  }, []);
256
 
 
258
  if (!textInput.trim()) return;
259
  setLoading(true);
260
  setError("");
261
+ console.group(`[NLP] Analyze text (${textInput.length} chars)`);
262
  try {
263
  const res = await fetch(`${API_BASE}/api/analyze`, {
264
  method: "POST",
265
+ headers: { ...NGROK_HEADERS, "Content-Type": "application/json" },
266
  body: JSON.stringify({ text: textInput }),
267
  });
268
+ console.log(`→ POST /api/analyze status=${res.status}`);
269
  if (!res.ok) throw new Error("Analysis failed");
270
  const result: AnalysisResult = await res.json();
271
+ console.log(`← entities:`, result.documents[0]?.entities?.length ?? 0,
272
+ `sentiment:`, result.documents[0]?.sentiment?.label);
273
  setData(result);
274
  } catch (e: any) {
275
+ console.error(e);
276
  setError(e.message);
277
  } finally {
278
  setLoading(false);
279
+ console.groupEnd();
280
  }
281
  }, [textInput]);
282
 
 
320
 
321
  return (
322
  <div>
323
+ {/* Backend status banner */}
324
+ {backendOk === false && (
325
+ <div style={{
326
+ background: "rgba(255,80,80,0.15)", border: "1px solid var(--negative)",
327
+ borderRadius: "0.5rem", padding: "0.6rem 1rem", marginBottom: "1rem",
328
+ display: "flex", alignItems: "center", gap: "0.5rem", fontSize: "0.85rem",
329
+ }}>
330
+ <span>🔴</span>
331
+ <span style={{ color: "var(--negative)", fontWeight: 600 }}>Backend холболт алдаатай.</span>
332
+ <span style={{ color: "var(--text-muted)" }}>
333
+ Colab дээрх сервер ажиллаж байгаа эсэхийг шалгаад, Ngrok URL зөв эсэхийг .env.local файлд шинэчилнэ үү.
334
+ </span>
335
+ </div>
336
+ )}
337
+ {backendOk === null && (
338
+ <div style={{
339
+ background: "rgba(100,100,200,0.1)", border: "1px solid rgba(100,100,255,0.3)",
340
+ borderRadius: "0.5rem", padding: "0.4rem 1rem", marginBottom: "0.75rem",
341
+ fontSize: "0.8rem", color: "var(--text-muted)",
342
+ }}>
343
+ ⏳ Backend холболт шалгаж байна...
344
+ </div>
345
+ )}
346
+
347
  {/* Annotation editor modal */}
348
  {editingDoc && (
349
  <AnnotationEditor
 
355
 
356
  {/* Upload Section */}
357
  {!data && !loading && (
358
+
359
  <section style={{ marginBottom: "2rem" }}>
360
  <div
361
  className={`upload-area ${dragging ? "dragging" : ""}`}
 
368
  <p className="upload-text">
369
  <strong>CSV файл чирж оруулах</strong> эсвэл дарж сонгох
370
  </p>
371
+ <div style={{ fontSize: "0.75rem", color: "var(--text-muted)", marginTop: "0.5rem" }}>
372
+ <p>⚠️ <strong>Санамж:</strong> Шинжлэх өгөгдөл тань заавал <code>text</code> эсвэл <code>Text</code> гэсэн нэртэй баганад байх ёстой.</p>
373
+ <p>Хэрэв таны багана <code>Текст</code>, <code>Мессеж</code> гэх мэт Монгол нэртэй бол файлаа оруулахаас өмнө нэрийг нь <code>text</code> болгож өөрчилнө үү.</p>
374
+ </div>
375
  <input
376
  ref={fileInputRef}
377
  type="file"
nlp_core/ner_engine.py CHANGED
@@ -17,12 +17,18 @@ class NEREngine:
17
  def _load_pipeline(self):
18
  """Lazy-load the NER pipeline (heavy model, load only when needed)."""
19
  if self._pipeline is None:
 
20
  from transformers import pipeline
 
21
  self._pipeline = pipeline(
22
  "ner",
23
  model=self.model_name,
24
  aggregation_strategy="simple",
 
 
 
25
  )
 
26
  return self._pipeline
27
 
28
  def _clean_entities(self, raw_entities: List[dict]) -> List[dict]:
 
17
  def _load_pipeline(self):
18
  """Lazy-load the NER pipeline (heavy model, load only when needed)."""
19
  if self._pipeline is None:
20
+ import torch
21
  from transformers import pipeline
22
+ device = 0 if torch.cuda.is_available() else -1
23
  self._pipeline = pipeline(
24
  "ner",
25
  model=self.model_name,
26
  aggregation_strategy="simple",
27
+ truncation=True,
28
+ max_length=512,
29
+ device=device,
30
  )
31
+ print(f"[NEREngine] Loaded on {'GPU' if device == 0 else 'CPU'}")
32
  return self._pipeline
33
 
34
  def _clean_entities(self, raw_entities: List[dict]) -> List[dict]: