hellosindh commited on
Commit
4e3fb85
·
verified ·
1 Parent(s): 1a83bac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +495 -139
app.py CHANGED
@@ -1,57 +1,45 @@
1
  import gradio as gr
2
  import torch
3
  import sentencepiece as spm
4
- import os
5
  from transformers import RobertaForTokenClassification
 
 
 
6
 
7
- # ─── Load model & tokenizer ───────────────────────
8
- MODEL_PATH = "hellosindh/sindhi-bert-ner"
9
- SP_MODEL = "sindhi_bpe_32k.model"
10
 
11
  print("Loading model...", flush=True)
12
- model = RobertaForTokenClassification.from_pretrained(
13
- MODEL_PATH
14
- )
15
  model.eval()
16
 
17
  print("Loading tokenizer...", flush=True)
 
18
  sp = spm.SentencePieceProcessor()
19
- sp.Load(SP_MODEL)
 
20
 
21
- # ─── Tag config ───────────────────────────────────
22
  ID2TAG = model.config.id2label
23
-
24
  BOS_ID = 2
25
  EOS_ID = 3
26
 
27
- # Entity colors for highlighting
28
- COLORS = {
29
- "PERSON": "#FF6B6B",
30
- "LOCATION": "#4ECDC4",
31
- "ORGANIZATION": "#45B7D1",
32
- "DATE_TIME": "#96CEB4",
33
- "EVENT": "#FFEAA7",
34
- "LITERARY_WORK":"#DDA0DD",
35
- "PROFESSION": "#98D8C8",
36
- "TITLE": "#F7DC6F",
37
- "LANGUAGE": "#BB8FCE",
38
- "FIELD": "#85C1E9",
39
- "LAW": "#F0B27A",
40
- "GROUP": "#82E0AA",
41
- "MISC": "#BDC3C7",
42
  }
43
 
44
- # ─── Prediction function ──────────────────────────
45
  def predict_ner(sentence):
46
  if not sentence.strip():
47
- return "", []
48
-
49
- words = sentence.split()
50
-
51
- # Tokenize
52
  input_ids = [BOS_ID]
53
- word_map = [-1] # maps token → word index
54
-
55
  for i, word in enumerate(words):
56
  subwords = sp.EncodeAsIds(word)
57
  if not subwords:
@@ -59,149 +47,517 @@ def predict_ner(sentence):
59
  for j, sw in enumerate(subwords):
60
  input_ids.append(sw)
61
  word_map.append(i if j == 0 else -1)
62
-
63
  input_ids.append(EOS_ID)
64
  word_map.append(-1)
65
-
66
- # Run model
67
  tensor = torch.tensor([input_ids])
68
  with torch.no_grad():
69
  logits = model(tensor).logits[0]
70
-
 
71
  preds = torch.argmax(logits, dim=-1).tolist()
72
-
73
- # Collect word-level predictions
74
  word_tags = {}
 
75
  for pos, (pred, wid) in enumerate(zip(preds, word_map)):
76
  if wid >= 0:
77
  word_tags[wid] = ID2TAG[pred]
78
-
79
- # ─── Build highlighted HTML ───────────────────
80
- html_parts = []
81
  entities = []
82
-
83
  i = 0
 
84
  while i < len(words):
85
  tag = word_tags.get(i, "O")
86
-
87
  if tag.startswith("B-"):
88
- entity_type = tag[2:]
89
  entity_words = [words[i]]
90
-
91
- # Collect I- continuation tokens
92
  j = i + 1
93
  while j < len(words):
94
- next_tag = word_tags.get(j, "O")
95
- if next_tag == f"I-{entity_type}":
96
  entity_words.append(words[j])
 
97
  j += 1
98
  else:
99
  break
100
-
101
  entity_text = " ".join(entity_words)
102
- color = COLORS.get(entity_type, "#BDC3C7")
103
-
104
- html_parts.append(
105
- f'<mark style="background:{color}; '
106
- f'padding:2px 6px; border-radius:4px; '
107
- f'margin:2px; font-weight:bold;" '
108
- f'title="{entity_type}">'
109
- f'{entity_text} '
110
- f'<span style="font-size:0.75em; '
111
- f'opacity:0.8;">[{entity_type}]</span>'
112
- f'</mark>'
 
 
 
 
 
 
 
 
 
 
 
 
113
  )
114
-
115
- entities.append((entity_text, entity_type))
 
 
 
 
 
116
  i = j
117
-
118
  else:
119
- html_parts.append(words[i])
 
 
120
  i += 1
121
-
122
- html = '<p dir="rtl" style="font-size:1.2em; ' \
123
- 'line-height:2.5em; text-align:right;">' + \
124
- " ".join(html_parts) + "</p>"
125
-
126
- # Build entity table
127
- table = []
128
- for text, etype in entities:
129
- table.append([text, etype])
130
-
131
- return html, table
132
-
133
- # ─── Example sentences ────────────────────────────
134
- examples = [
135
- ["شيخ اياز شڪارپور ۾ پيدا ٿيو"],
136
- ["سنڌ يونيورسٽي حيدرآباد ۾ آھي"],
137
- ["پاڪستان ڏکڻ ايشيا ۾ آھي"],
138
- ["ڊاڪٽر محمد علي 1990ع ۾ سنڌ آيو"],
139
- ]
140
-
141
- # ─── Gradio Interface ─────────────────────────────
142
- with gr.Blocks(
143
- theme=gr.themes.Soft(),
144
- title="Sindhi NER"
145
- ) as demo:
146
-
147
- gr.Markdown("""
148
- # 🏷️ Sindhi Named Entity Recognizer
149
- ### سنڌي نالن جي سڃاڻپ جو اوزار
150
-
151
- First Sindhi NER model — trained on 22,777 annotated sentences!
152
-
153
- **Recognizes:** Person · Location · Organization ·
154
- Date/Time · Event · Literary Work · and 15 more types
155
- """)
156
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  with gr.Row():
158
- with gr.Column():
159
- text_input = gr.Textbox(
160
- label="سنڌي جملو لکو (Enter Sindhi text)",
161
- placeholder="شيخ اياز شڪارپور ۾ پيدا ٿيو",
162
- lines=3,
163
  rtl=True
164
  )
165
- submit_btn = gr.Button(
166
- "🔍 Entities ڳوليو",
167
- variant="primary"
168
- )
169
-
170
- with gr.Row():
171
- highlighted = gr.HTML(
172
- label="Highlighted Entities"
173
- )
174
-
 
 
 
 
 
 
175
  with gr.Row():
176
- entity_table = gr.Dataframe(
177
- headers=["Entity", "Type"],
178
- label="Entities Found",
179
- wrap=True
180
  )
181
-
182
- # Color legend
183
- gr.Markdown("""
184
- ### Legend
185
- 🔴 Person &nbsp; 🟦 Location &nbsp;
186
- 🔵 Organization &nbsp; 🟢 Date/Time &nbsp;
187
- 🟡 Event &nbsp; 🟣 Literary Work
188
- """)
189
-
190
  gr.Examples(
191
- examples=examples,
192
- inputs=text_input
 
 
 
 
 
 
 
 
193
  )
194
-
195
- submit_btn.click(
 
 
 
 
 
 
 
 
 
 
196
  fn=predict_ner,
197
- inputs=text_input,
198
- outputs=[highlighted, entity_table]
199
  )
200
-
201
- text_input.submit(
202
  fn=predict_ner,
203
- inputs=text_input,
204
- outputs=[highlighted, entity_table]
205
  )
206
 
207
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  import sentencepiece as spm
 
4
  from transformers import RobertaForTokenClassification
5
+ from huggingface_hub import hf_hub_download
6
+ import csv
7
+ import io
8
 
9
+ MODEL_ID = "hellosindh/sindhi-bert-ner"
 
 
10
 
11
  print("Loading model...", flush=True)
12
+ model = RobertaForTokenClassification.from_pretrained(MODEL_ID)
 
 
13
  model.eval()
14
 
15
  print("Loading tokenizer...", flush=True)
16
+ sp_path = hf_hub_download(repo_id=MODEL_ID, filename="sindhi_bpe_32k.model")
17
  sp = spm.SentencePieceProcessor()
18
+ sp.Load(sp_path)
19
+ print("✅ Ready!", flush=True)
20
 
 
21
  ID2TAG = model.config.id2label
 
22
  BOS_ID = 2
23
  EOS_ID = 3
24
 
25
+ ENTITY_CONFIG = {
26
+ "PERSON": {"color": "#c084fc", "bg": "rgba(192,132,252,0.15)", "sindhi": "ماڻهو"},
27
+ "LOCATION": {"color": "#818cf8", "bg": "rgba(129,140,248,0.15)", "sindhi": "جڳهه"},
28
+ "ORGANIZATION": {"color": "#38bdf8", "bg": "rgba(56,189,248,0.15)", "sindhi": "ادارو"},
29
+ "DATE_TIME": {"color": "#34d399", "bg": "rgba(52,211,153,0.15)", "sindhi": "تاريخ"},
30
+ "EVENT": {"color": "#fbbf24", "bg": "rgba(251,191,36,0.15)", "sindhi": "واقعو"},
31
+ "TITLE": {"color": "#fb923c", "bg": "rgba(251,146,60,0.15)", "sindhi": "لقب"},
 
 
 
 
 
 
 
 
32
  }
33
 
34
+
35
  def predict_ner(sentence):
36
  if not sentence.strip():
37
+ return _empty_html(), _empty_summary(), "", None
38
+
39
+ words = sentence.split()
 
 
40
  input_ids = [BOS_ID]
41
+ word_map = [-1]
42
+
43
  for i, word in enumerate(words):
44
  subwords = sp.EncodeAsIds(word)
45
  if not subwords:
 
47
  for j, sw in enumerate(subwords):
48
  input_ids.append(sw)
49
  word_map.append(i if j == 0 else -1)
50
+
51
  input_ids.append(EOS_ID)
52
  word_map.append(-1)
53
+
 
54
  tensor = torch.tensor([input_ids])
55
  with torch.no_grad():
56
  logits = model(tensor).logits[0]
57
+
58
+ probs = torch.softmax(logits, dim=-1)
59
  preds = torch.argmax(logits, dim=-1).tolist()
60
+ conf = probs.max(dim=-1).values.tolist()
61
+
62
  word_tags = {}
63
+ word_conf = {}
64
  for pos, (pred, wid) in enumerate(zip(preds, word_map)):
65
  if wid >= 0:
66
  word_tags[wid] = ID2TAG[pred]
67
+ word_conf[wid] = conf[pos]
68
+
 
69
  entities = []
70
+ html_words = []
71
  i = 0
72
+
73
  while i < len(words):
74
  tag = word_tags.get(i, "O")
75
+
76
  if tag.startswith("B-"):
77
+ etype = tag[2:]
78
  entity_words = [words[i]]
79
+ scores = [word_conf.get(i, 0)]
80
+
81
  j = i + 1
82
  while j < len(words):
83
+ if word_tags.get(j, "O") == f"I-{etype}":
 
84
  entity_words.append(words[j])
85
+ scores.append(word_conf.get(j, 0))
86
  j += 1
87
  else:
88
  break
89
+
90
  entity_text = " ".join(entity_words)
91
+ avg_score = sum(scores) / len(scores)
92
+ cfg = ENTITY_CONFIG.get(etype, ENTITY_CONFIG["MISC"])
93
+
94
+ html_words.append(
95
+ f'<span style="'
96
+ f'background:{cfg["bg"]};'
97
+ f'border:1px solid {cfg["color"]}50;'
98
+ f'color:#f1f5f9;'
99
+ f'padding:3px 10px 3px 6px;'
100
+ f'border-radius:8px;'
101
+ f'margin:2px;'
102
+ f'display:inline-block;'
103
+ f'font-weight:500;">'
104
+ f'<span style="'
105
+ f'background:{cfg["color"]};'
106
+ f'color:#0a0a1a;'
107
+ f'font-size:0.6em;font-weight:800;'
108
+ f'padding:1px 6px;border-radius:4px;'
109
+ f'margin-left:6px;vertical-align:middle;'
110
+ f'letter-spacing:0.3px;">'
111
+ f'{cfg["sindhi"]}</span>'
112
+ f'{entity_text}'
113
+ f'</span>'
114
  )
115
+ entities.append({
116
+ "text": entity_text,
117
+ "type": etype,
118
+ "sindhi": cfg["sindhi"],
119
+ "score": avg_score,
120
+ "color": cfg["color"],
121
+ })
122
  i = j
 
123
  else:
124
+ html_words.append(
125
+ f'<span style="color:#cbd5e1;padding:2px 3px;">{words[i]}</span>'
126
+ )
127
  i += 1
128
+
129
+ highlighted = f"""
130
+ <div style="
131
+ background:linear-gradient(135deg,#1a0533 0%,#0f0f2e 100%);
132
+ border:1px solid #7c3aed30;
133
+ border-radius:16px;
134
+ padding:24px 28px;
135
+ font-size:1.2em;
136
+ line-height:3.2em;
137
+ direction:rtl;
138
+ text-align:right;
139
+ font-family:'Noto Nastaliq Urdu',Georgia,serif;
140
+ min-height:90px;
141
+ box-shadow:inset 0 0 60px #7c3aed08;">
142
+ {" ".join(html_words)}
143
+ </div>
144
+ """
145
+
146
+ summary = _build_summary(entities)
147
+ conf_html = _build_confidence(entities)
148
+ csv_file = _build_csv(entities)
149
+
150
+ return highlighted, summary, conf_html, csv_file
151
+
152
+
153
+ def _empty_html():
154
+ return """
155
+ <div style="
156
+ background:linear-gradient(135deg,#1a0533,#0f0f2e);
157
+ border:1px solid #7c3aed20;
158
+ border-radius:16px;
159
+ padding:40px;
160
+ text-align:center;
161
+ min-height:90px;
162
+ display:flex;align-items:center;justify-content:center;">
163
+ <span style="color:#4c1d95;font-size:1em;font-family:Georgia,serif;">
164
+ ڪو بہ سنڌي جملو لکو ✦
165
+ </span>
166
+ </div>
167
+ """
168
+
169
+ def _empty_summary():
170
+ return """
171
+ <div style="
172
+ background:#1a0533;
173
+ border:1px solid #7c3aed20;
174
+ border-radius:16px;
175
+ padding:24px;
176
+ text-align:center;
177
+ color:#4c1d95;
178
+ font-size:0.9em;">
179
+ اعتماد جوڳا نتيجا نہ مليا
180
+ </div>
181
+ """
182
+
183
+ def _build_summary(entities):
184
+ if not entities:
185
+ return _empty_summary()
186
+
187
+ from collections import Counter
188
+ counts = Counter(e["type"] for e in entities)
189
+ total = len(entities)
190
+
191
+ cards = ""
192
+ for etype, cnt in sorted(counts.items(), key=lambda x: -x[1]):
193
+ cfg = ENTITY_CONFIG.get(etype, ENTITY_CONFIG["MISC"])
194
+ cards += f"""
195
+ <div style="
196
+ background:{cfg['bg']};
197
+ border:1px solid {cfg['color']}40;
198
+ border-radius:10px;
199
+ padding:10px 14px;
200
+ display:flex;
201
+ justify-content:space-between;
202
+ align-items:center;
203
+ margin-bottom:8px;
204
+ direction:rtl;">
205
+ <span style="color:{cfg['color']};font-weight:600;font-size:0.88em;">
206
+ {cfg['sindhi']}
207
+ </span>
208
+ <span style="
209
+ background:{cfg['color']};
210
+ color:#0a0a1a;
211
+ font-weight:800;
212
+ border-radius:20px;
213
+ padding:1px 10px;
214
+ font-size:0.82em;
215
+ min-width:24px;text-align:center;">
216
+ {cnt}
217
+ </span>
218
+ </div>
219
+ """
220
+
221
+ return f"""
222
+ <div style="
223
+ background:linear-gradient(135deg,#1a0533,#0f0f2e);
224
+ border:1px solid #7c3aed30;
225
+ border-radius:16px;
226
+ padding:18px 16px;">
227
+ <div style="
228
+ color:#c084fc;font-weight:700;font-size:0.88em;
229
+ margin-bottom:12px;padding-bottom:10px;
230
+ border-bottom:1px solid #7c3aed25;
231
+ direction:rtl;text-align:right;
232
+ letter-spacing:0.3px;">
233
+ مجموعي:
234
+ <span style="color:#f1f5f9;font-size:1.1em;">{total}</span>
235
+ سڃاڻپ
236
+ </div>
237
+ <div>{cards}</div>
238
+ </div>
239
+ """
240
+
241
+ def _build_confidence(entities):
242
+ if not entities:
243
+ return ""
244
+
245
+ bars = ""
246
+ for ent in entities:
247
+ cfg = ENTITY_CONFIG.get(ent["type"], ENTITY_CONFIG["MISC"])
248
+ pct = int(ent["score"] * 100)
249
+ width = pct
250
+
251
+ bars += f"""
252
+ <div style="margin-bottom:16px;direction:rtl;">
253
+ <div style="
254
+ display:flex;justify-content:space-between;
255
+ align-items:center;margin-bottom:6px;">
256
+ <span style="color:#e2e8f0;font-size:0.9em;font-weight:500;
257
+ font-family:Georgia,serif;">
258
+ {ent['text']}
259
+ </span>
260
+ <div style="display:flex;gap:8px;align-items:center;">
261
+ <span style="
262
+ background:{cfg['color']}18;
263
+ border:1px solid {cfg['color']}40;
264
+ color:{cfg['color']};
265
+ font-size:0.68em;padding:2px 8px;
266
+ border-radius:4px;font-weight:700;">
267
+ {ent['sindhi']}
268
+ </span>
269
+ <span style="color:{cfg['color']};
270
+ font-weight:800;font-size:0.9em;
271
+ font-family:monospace;">
272
+ {pct}%
273
+ </span>
274
+ </div>
275
+ </div>
276
+ <div style="
277
+ background:#1e1040;
278
+ border-radius:999px;height:5px;overflow:hidden;">
279
+ <div style="
280
+ width:{width}%;height:100%;
281
+ background:linear-gradient(90deg,
282
+ {cfg['color']}60,{cfg['color']});
283
+ border-radius:999px;">
284
+ </div>
285
+ </div>
286
+ </div>
287
+ """
288
+
289
+ return f"""
290
+ <div style="
291
+ background:linear-gradient(135deg,#1a0533,#0f0f2e);
292
+ border:1px solid #7c3aed30;
293
+ border-radius:16px;
294
+ padding:20px 18px;
295
+ margin-top:4px;">
296
+ <div style="
297
+ color:#c084fc;font-weight:700;font-size:0.88em;
298
+ margin-bottom:16px;padding-bottom:10px;
299
+ border-bottom:1px solid #7c3aed25;
300
+ direction:rtl;text-align:right;">
301
+ اعتماد
302
+ </div>
303
+ {bars}
304
+ </div>
305
+ """
306
+
307
+ def _build_csv(entities):
308
+ if not entities:
309
+ return None
310
+ output = io.StringIO()
311
+ writer = csv.writer(output)
312
+ writer.writerow(["Entity", "Type", "Sindhi Type", "Confidence"])
313
+ for ent in entities:
314
+ writer.writerow([
315
+ ent["text"], ent["type"],
316
+ ent["sindhi"], f"{ent['score']*100:.1f}%"
317
+ ])
318
+ path = "/tmp/sindhi_ner.csv"
319
+ with open(path, "w", encoding="utf-8-sig", newline="") as f:
320
+ f.write(output.getvalue())
321
+ return path
322
+
323
+
324
+ CSS = """
325
+ @import url('https://fonts.googleapis.com/css2?family=Noto+Nastaliq+Urdu:wght@400;700&family=Space+Mono:wght@700&family=Outfit:wght@300;400;600;700;800&display=swap');
326
+
327
+ *, body, .gradio-container {
328
+ font-family: 'Outfit', sans-serif !important;
329
+ }
330
+ body, .gradio-container {
331
+ background: #08081a !important;
332
+ }
333
+ .gradio-container {
334
+ max-width: 980px !important;
335
+ margin: 0 auto !important;
336
+ padding: 16px !important;
337
+ }
338
+ label > span {
339
+ color: #9333ea !important;
340
+ font-size: 0.82em !important;
341
+ font-weight: 700 !important;
342
+ letter-spacing: 0.8px !important;
343
+ text-transform: uppercase !important;
344
+ }
345
+ textarea, input[type="text"] {
346
+ background: #130825 !important;
347
+ border: 1px solid #6d28d960 !important;
348
+ border-radius: 14px !important;
349
+ color: #e2e8f0 !important;
350
+ font-size: 1.1em !important;
351
+ direction: rtl !important;
352
+ font-family: Georgia, 'Noto Nastaliq Urdu', serif !important;
353
+ caret-color: #c084fc !important;
354
+ }
355
+ textarea:focus {
356
+ border-color: #c084fc !important;
357
+ box-shadow: 0 0 0 3px #7c3aed15 !important;
358
+ outline: none !important;
359
+ }
360
+ button.primary {
361
+ background: linear-gradient(135deg, #6d28d9, #9333ea, #c084fc) !important;
362
+ background-size: 200% auto !important;
363
+ border: none !important;
364
+ border-radius: 12px !important;
365
+ color: #fff !important;
366
+ font-weight: 800 !important;
367
+ font-size: 0.95em !important;
368
+ letter-spacing: 0.5px !important;
369
+ transition: all 0.3s ease !important;
370
+ padding: 13px !important;
371
+ }
372
+ button.primary:hover {
373
+ background-position: right center !important;
374
+ box-shadow: 0 6px 24px #7c3aed50 !important;
375
+ transform: translateY(-1px) !important;
376
+ }
377
+ button.secondary {
378
+ background: #130825 !important;
379
+ border: 1px solid #6d28d940 !important;
380
+ border-radius: 12px !important;
381
+ color: #7c3aed !important;
382
+ font-weight: 600 !important;
383
+ transition: all 0.2s !important;
384
+ }
385
+ button.secondary:hover {
386
+ border-color: #c084fc !important;
387
+ color: #c084fc !important;
388
+ }
389
+ .examples-holder, .examples table {
390
+ background: #130825 !important;
391
+ border: 1px solid #6d28d930 !important;
392
+ border-radius: 12px !important;
393
+ }
394
+ .examples table td {
395
+ color: #94a3b8 !important;
396
+ font-family: Georgia, serif !important;
397
+ }
398
+ .examples table tr:hover td {
399
+ color: #c084fc !important;
400
+ background: #1a0533 !important;
401
+ }
402
+ .gap-4 { gap: 12px !important; }
403
+ ::-webkit-scrollbar { width: 5px; }
404
+ ::-webkit-scrollbar-track { background: #08081a; }
405
+ ::-webkit-scrollbar-thumb { background: #6d28d9; border-radius: 3px; }
406
+ """
407
+
408
+ HEADER = """
409
+ <div style="
410
+ background:linear-gradient(135deg,#1a0533 0%,#0f0f2e 60%,#160a2e 100%);
411
+ border:1px solid #7c3aed25;
412
+ border-radius:20px;
413
+ padding:36px 28px 28px;
414
+ margin-bottom:20px;
415
+ text-align:center;
416
+ position:relative;overflow:hidden;">
417
+ <div style="
418
+ position:absolute;top:0;left:0;right:0;bottom:0;
419
+ background:radial-gradient(ellipse at 50% 0%,#7c3aed12 0%,transparent 65%);
420
+ pointer-events:none;"></div>
421
+ <div style="position:relative;">
422
+ <div style="font-size:3em;margin-bottom:8px;line-height:1;">🏷️</div>
423
+ <h1 style="
424
+ color:#f1f5f9;
425
+ font-size:2em;font-weight:800;
426
+ margin:0 0 4px;
427
+ letter-spacing:-1px;
428
+ text-shadow:0 0 40px #7c3aed50;">
429
+ سنڌي اسمن جي سڃاڻپ
430
+ </h1>
431
+ <p style="
432
+ font-family:'Space Mono',monospace;
433
+ color:#6d28d9;font-size:0.72em;
434
+ letter-spacing:3px;margin:0 0 18px;">
435
+ SINDHI NAMED ENTITY RECOGNITION
436
+ </p>
437
+ <div style="display:flex;justify-content:center;gap:10px;flex-wrap:wrap;">
438
+ <span style="
439
+ background:#7c3aed15;border:1px solid #7c3aed35;
440
+ color:#a855f7;padding:5px 14px;border-radius:20px;
441
+ font-size:0.75em;font-weight:600;">
442
+ ✦ 22,777 جملا
443
+ </span>
444
+ <span style="
445
+ background:#7c3aed15;border:1px solid #7c3aed35;
446
+ color:#a855f7;padding:5px 14px;border-radius:20px;
447
+ font-size:0.75em;font-weight:600;">
448
+ ✦ 6 قسم
449
+ </span>
450
+ <span style="
451
+ background:#7c3aed15;border:1px solid #7c3aed35;
452
+ color:#a855f7;padding:5px 14px;border-radius:20px;
453
+ font-size:0.75em;font-weight:600;">
454
+ ✦ 125M پيراميٽر
455
+ </span>
456
+ <span style="
457
+ background:#7c3aed15;border:1px solid #7c3aed35;
458
+ color:#a855f7;padding:5px 14px;border-radius:20px;
459
+ font-size:0.75em;font-weight:600;">
460
+ ✦ سنڌي BERT
461
+ </span>
462
+ </div>
463
+ </div>
464
+ </div>
465
+ """
466
+
467
+ LEGEND = """
468
+ <div style="
469
+ background:linear-gradient(135deg,#1a0533,#0f0f2e);
470
+ border:1px solid #7c3aed20;
471
+ border-radius:14px;
472
+ padding:16px 18px;
473
+ margin-top:4px;">
474
+ <div style="
475
+ color:#9333ea;font-weight:700;font-size:0.78em;
476
+ letter-spacing:1px;margin-bottom:12px;
477
+ direction:rtl;text-align:right;">
478
+ اسمن جي نشاندھي
479
+ </div>
480
+ <div style="display:flex;flex-wrap:wrap;gap:8px;direction:rtl;">
481
+ <span style="background:rgba(192,132,252,0.15);border:1px solid #c084fc40;color:#c084fc;padding:3px 10px;border-radius:6px;font-size:0.75em;font-weight:600;">ماڻهو</span>
482
+ <span style="background:rgba(129,140,248,0.15);border:1px solid #818cf840;color:#818cf8;padding:3px 10px;border-radius:6px;font-size:0.75em;font-weight:600;">جڳهه</span>
483
+ <span style="background:rgba(56,189,248,0.15);border:1px solid #38bdf840;color:#38bdf8;padding:3px 10px;border-radius:6px;font-size:0.75em;font-weight:600;">ادارو</span>
484
+ <span style="background:rgba(52,211,153,0.15);border:1px solid #34d39940;color:#34d399;padding:3px 10px;border-radius:6px;font-size:0.75em;font-weight:600;">تاريخ</span>
485
+ <span style="background:rgba(251,191,36,0.15);border:1px solid #fbbf2440;color:#fbbf24;padding:3px 10px;border-radius:6px;font-size:0.75em;font-weight:600;">واقعو</span>
486
+ <span style="background:rgba(251,146,60,0.15);border:1px solid #fb923c40;color:#fb923c;padding:3px 10px;border-radius:6px;font-size:0.75em;font-weight:600;">لقب</span>
487
+ </div>
488
+ </div>
489
+ """
490
+
491
+
492
+ with gr.Blocks(css=CSS, title="سنڌي NER") as demo:
493
+
494
+ gr.HTML(HEADER)
495
+
496
  with gr.Row():
497
+ with gr.Column(scale=3):
498
+ inp = gr.Textbox(
499
+ label="سنڌي جملو لکو",
500
+ placeholder="شيخ اياز شڪارپور ۾ پيدا ٿيو...",
501
+ lines=4,
502
  rtl=True
503
  )
504
+ with gr.Row():
505
+ btn = gr.Button("🔍 ڳوليو", variant="primary")
506
+ gr.ClearButton(value="🗑️ مٽايو", components=[inp], variant="secondary")
507
+
508
+ with gr.Column(scale=2):
509
+ summary_out = gr.HTML(value=_empty_summary())
510
+
511
+ gr.HTML("<div style='height:4px'></div>")
512
+
513
+ highlighted_out = gr.HTML(
514
+ label="نتيجا(و)",
515
+ value=_empty_html()
516
+ )
517
+
518
+ conf_out = gr.HTML()
519
+
520
  with gr.Row():
521
+ csv_out = gr.File(
522
+ label="📥 ڊائونلوڊ ڪريو (CSV)",
523
+ file_types=[".csv"],
524
+ interactive=False
525
  )
526
+
527
+ gr.HTML(LEGEND)
528
+
 
 
 
 
 
 
529
  gr.Examples(
530
+ label="مثالي جملا",
531
+ examples=[
532
+ ["شيخ اياز شڪارپور ۾ پيدا ٿيو"],
533
+ ["يونيورسٽي آف سنڌ حيدرآباد ۾ آھي"],
534
+ ["سيد مراد علي شاھ سنڌ جو وڏو وزير آھي، سندس تعلق پاڪستان پيپلز پارٽي سان آھي"],
535
+ ["پاڪستان ۽ ڀارت جي ويڙھ 2025ع ۾ لڳي."],
536
+ ["ڊاڪٽر نبي بخش بلوچ 16 ڊسمبر 1917ع تي سنجھوري ۾ پيدا ٿيو"],
537
+ ["بينظير ڀٽو پاڪستان جي پھرين عورت وزيراعظم هئي"],
538
+ ],
539
+ inputs=inp
540
  )
541
+
542
+ gr.HTML("""
543
+ <div style="
544
+ text-align:center;padding:20px 0 8px;
545
+ font-family:'Space Mono',monospace;
546
+ color:#3b0764;font-size:0.72em;
547
+ letter-spacing:1.5px;">
548
+ hellosindh · sindhi-bert-ner · MIT License
549
+ </div>
550
+ """)
551
+
552
+ btn.click(
553
  fn=predict_ner,
554
+ inputs=inp,
555
+ outputs=[highlighted_out, summary_out, conf_out, csv_out]
556
  )
557
+ inp.submit(
 
558
  fn=predict_ner,
559
+ inputs=inp,
560
+ outputs=[highlighted_out, summary_out, conf_out, csv_out]
561
  )
562
 
563
  demo.launch()