techysanoj commited on
Commit
77434f4
Β·
verified Β·
1 Parent(s): 19235aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -67
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import torch
 
3
  from transformers import AutoTokenizer, AutoModelForTokenClassification
4
 
5
  MODEL_ID = "techysanoj/fine-tuned-IndicNER"
@@ -9,89 +10,63 @@ model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
9
 
10
  id2label = {int(k): v for k, v in model.config.id2label.items()}
11
 
12
-
13
- def merge_wordpieces(tokens, labels):
14
- merged_tokens = []
15
- merged_labels = []
16
-
17
- current_word = ""
18
- current_label = None
19
-
20
- for tok, lab in zip(tokens, labels):
21
- if tok.startswith("##"):
22
- # continuation subword
23
- current_word += tok[2:]
24
- else:
25
- # if a previous word is being built β†’ flush it
26
- if current_word != "":
27
- merged_tokens.append(current_word)
28
- merged_labels.append(current_label if current_label else "O")
29
-
30
- # start new word
31
- current_word = tok
32
- current_label = "O" if lab == "O" else lab
33
-
34
- # if label is not O and current_label is still O β†’ update
35
- if lab != "O" and (current_label == "O" or current_label is None):
36
- current_label = lab
37
-
38
- # flush last word
39
- if current_word != "":
40
- merged_tokens.append(current_word)
41
- merged_labels.append(current_label if current_label else "O")
42
-
43
- return merged_tokens, merged_labels
44
 
45
 
46
  def generate_ner_output(text):
47
  if not text.strip():
48
- return "Please enter text."
49
 
50
  inputs = tokenizer(text, return_tensors="pt")
51
- tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
 
52
 
53
  with torch.no_grad():
54
  logits = model(**inputs).logits
55
 
56
- pred_ids = torch.argmax(logits, dim=-1)[0].tolist()
57
- labels = [id2label[pid] for pid in pred_ids]
58
 
59
- # Remove CLS and SEP
60
- tokens = tokens[1:-1]
61
- labels = labels[1:-1]
62
 
63
- # Merge WordPieces
64
- merged_tokens, merged_labels = merge_wordpieces(tokens, labels)
65
 
66
- # Format final output
67
- output_lines = []
68
- for tok, lab in zip(merged_tokens, merged_labels):
69
- output_lines.append(f"{tok:<15} β†’ {lab}")
70
 
71
- return "\n".join(output_lines)
72
 
 
 
 
 
 
 
73
 
74
- # --------------- GRADIO UI ----------------
 
 
 
 
 
75
  with gr.Blocks() as demo:
76
- gr.Markdown("## πŸ”₯ IndicNER β€” Merged Token Output (Clean Words)")
77
-
78
- text_input = gr.Textbox(
79
- label="Enter text",
80
- placeholder="Type your Hindi/English sentence here...",
81
- lines=4
82
- )
83
-
84
- run_button = gr.Button("Generate NER")
85
-
86
- ner_output = gr.Textbox(
87
- label="NER Output (Merged Tokens)",
88
- lines=30
89
- )
90
-
91
- run_button.click(
92
- fn=generate_ner_output,
93
- inputs=text_input,
94
- outputs=ner_output
95
- )
96
 
97
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ import torch.nn.functional as F
4
  from transformers import AutoTokenizer, AutoModelForTokenClassification
5
 
6
  MODEL_ID = "techysanoj/fine-tuned-IndicNER"
 
10
 
11
  id2label = {int(k): v for k, v in model.config.id2label.items()}
12
 
13
+ # Color map for Gradio HTML output
14
+ COLOR_MAP = {
15
+ "B-PER": "red",
16
+ "I-PER": "red",
17
+ "B-ORG": "green",
18
+ "I-ORG": "green",
19
+ "B-LOC": "blue",
20
+ "I-LOC": "blue",
21
+ "O": "black"
22
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  def generate_ner_output(text):
26
  if not text.strip():
27
+ return "Please enter valid input."
28
 
29
  inputs = tokenizer(text, return_tensors="pt")
30
+ token_ids = inputs["input_ids"][0]
31
+ tokens = tokenizer.convert_ids_to_tokens(token_ids)
32
 
33
  with torch.no_grad():
34
  logits = model(**inputs).logits
35
 
36
+ # Softmax for confidence
37
+ probs = F.softmax(logits, dim=-1)[0]
38
 
39
+ pred_ids = torch.argmax(probs, dim=-1).tolist()
 
 
40
 
41
+ html_output = "<div style='font-family: monospace; font-size: 18px;'>"
 
42
 
43
+ for tok, pid, prob_vec in zip(tokens, pred_ids, probs):
44
+ label = id2label[pid]
45
+ conf = float(prob_vec[pid])
 
46
 
47
+ color = COLOR_MAP[label]
48
 
49
+ html_output += (
50
+ f"<span style='color:{color}; font-weight:bold;'>"
51
+ f"{tok.replace(' ', '&nbsp;')}</span>"
52
+ f" β†’ <span style='color:{color};'><b>{label}</b></span>"
53
+ f" &nbsp; (conf: {conf:.3f})<br>"
54
+ )
55
 
56
+ html_output += "</div>"
57
+
58
+ return html_output
59
+
60
+
61
+ # ---------- GRADIO UI -------------
62
  with gr.Blocks() as demo:
63
+ gr.Markdown("## πŸ”₯ IndicNER β€” Token-Level NER (Colored + Confidence)")
64
+
65
+ text_input = gr.Textbox(label="Enter text", lines=3, placeholder="Type sentence here...")
66
+ run_btn = gr.Button("Generate NER")
67
+
68
+ ner_html = gr.HTML(label="NER Output")
69
+
70
+ run_btn.click(fn=generate_ner_output, inputs=text_input, outputs=ner_html)
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  demo.launch()