Spaces:

techysanoj
/

indian-NER

Sleeping

App Files Files Community

techysanoj commited on Dec 15, 2025

Commit

77434f4

verified ·

1 Parent(s): 19235aa

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -67

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 MODEL_ID = "techysanoj/fine-tuned-IndicNER"
@@ -9,89 +10,63 @@ model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
 id2label = {int(k): v for k, v in model.config.id2label.items()}
-def merge_wordpieces(tokens, labels):
-    merged_tokens = []
-    merged_labels = []
-    current_word = ""
-    current_label = None
-    for tok, lab in zip(tokens, labels):
-        if tok.startswith("##"):
-            # continuation subword
-            current_word += tok[2:]
-        else:
-            # if a previous word is being built → flush it
-            if current_word != "":
-                merged_tokens.append(current_word)
-                merged_labels.append(current_label if current_label else "O")
-            # start new word
-            current_word = tok
-            current_label = "O" if lab == "O" else lab
-        # if label is not O and current_label is still O → update
-        if lab != "O" and (current_label == "O" or current_label is None):
-            current_label = lab
-    # flush last word
-    if current_word != "":
-        merged_tokens.append(current_word)
-        merged_labels.append(current_label if current_label else "O")
-    return merged_tokens, merged_labels
 def generate_ner_output(text):
     if not text.strip():
-        return "Please enter text."
     inputs = tokenizer(text, return_tensors="pt")
-    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
     with torch.no_grad():
         logits = model(**inputs).logits
-    pred_ids = torch.argmax(logits, dim=-1)[0].tolist()
-    labels = [id2label[pid] for pid in pred_ids]
-    # Remove CLS and SEP
-    tokens = tokens[1:-1]
-    labels = labels[1:-1]
-    # Merge WordPieces
-    merged_tokens, merged_labels = merge_wordpieces(tokens, labels)
-    # Format final output
-    output_lines = []
-    for tok, lab in zip(merged_tokens, merged_labels):
-        output_lines.append(f"{tok:<15} → {lab}")
-    return "\n".join(output_lines)
-# --------------- GRADIO UI ----------------
 with gr.Blocks() as demo:
-    gr.Markdown("## 🔥 IndicNER — Merged Token Output (Clean Words)")
-    text_input = gr.Textbox(
-        label="Enter text",
-        placeholder="Type your Hindi/English sentence here...",
-        lines=4
-    )
-    run_button = gr.Button("Generate NER")
-    ner_output = gr.Textbox(
-        label="NER Output (Merged Tokens)",
-        lines=30
-    )
-    run_button.click(
-        fn=generate_ner_output,
-        inputs=text_input,
-        outputs=ner_output
-    )
 demo.launch()

 import gradio as gr
 import torch
+import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModelForTokenClassification
 MODEL_ID = "techysanoj/fine-tuned-IndicNER"
 id2label = {int(k): v for k, v in model.config.id2label.items()}
+# Color map for Gradio HTML output
+COLOR_MAP = {
+    "B-PER": "red",
+    "I-PER": "red",
+    "B-ORG": "green",
+    "I-ORG": "green",
+    "B-LOC": "blue",
+    "I-LOC": "blue",
+    "O": "black"
+}
 def generate_ner_output(text):
     if not text.strip():
+        return "Please enter valid input."
     inputs = tokenizer(text, return_tensors="pt")
+    token_ids = inputs["input_ids"][0]
+    tokens = tokenizer.convert_ids_to_tokens(token_ids)
     with torch.no_grad():
         logits = model(**inputs).logits
+    # Softmax for confidence
+    probs = F.softmax(logits, dim=-1)[0]
+    pred_ids = torch.argmax(probs, dim=-1).tolist()
+    html_output = "<div style='font-family: monospace; font-size: 18px;'>"
+    for tok, pid, prob_vec in zip(tokens, pred_ids, probs):
+        label = id2label[pid]
+        conf = float(prob_vec[pid])
+        color = COLOR_MAP[label]
+        html_output += (
+            f"<span style='color:{color}; font-weight:bold;'>"
+            f"{tok.replace(' ', '&nbsp;')}</span>"
+            f" → <span style='color:{color};'><b>{label}</b></span>"
+            f" &nbsp; (conf: {conf:.3f})<br>"
+        )
+    html_output += "</div>"
+    return html_output
+# ---------- GRADIO UI -------------
 with gr.Blocks() as demo:
+    gr.Markdown("## 🔥 IndicNER — Token-Level NER (Colored + Confidence)")
+    text_input = gr.Textbox(label="Enter text", lines=3, placeholder="Type sentence here...")
+    run_btn = gr.Button("Generate NER")
+    ner_html = gr.HTML(label="NER Output")
+    run_btn.click(fn=generate_ner_output, inputs=text_input, outputs=ner_html)
 demo.launch()