techysanoj commited on
Commit
bd582d6
·
verified ·
1 Parent(s): 3af6dfa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -32
app.py CHANGED
@@ -7,49 +7,56 @@ MODEL_ID = "techysanoj/fine-tuned-IndicNER"
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
8
  model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
9
 
 
10
  id2label = {int(k): v for k, v in model.config.id2label.items()}
11
 
12
 
13
  def ner_predict(text):
14
- # tokenize input
 
 
 
15
  inputs = tokenizer(text, return_tensors="pt")
16
  tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
17
 
18
- # run model
19
  with torch.no_grad():
20
  logits = model(**inputs).logits
21
 
22
  pred_ids = torch.argmax(logits, dim=-1)[0].tolist()
23
 
24
- # build output table
25
  rows = []
 
 
26
  for tok, pid in zip(tokens, pred_ids):
27
- rows.append([tok, id2label[pid]])
28
-
29
- # pretty text version
30
- pretty_output = ""
31
- for tok, lab in rows:
32
- pretty_output += f"{tok:15} → {lab}\n"
33
-
34
- return pretty_output, rows
35
-
36
-
37
- # gradio UI
38
- with gr.Blocks(title="Indic NER Token-wise Output") as demo:
39
- gr.Markdown("🔥 Indian Language NER — Token Level Output (Hindi + English)")
40
-
41
- inp = gr.Textbox(lines=3, label="Enter text")
42
-
43
- btn = gr.Button("Run NER")
44
-
45
- out_text = gr.Textbox(label="Tokenized Output")
46
- out_table = gr.Dataframe(
47
- headers=["Token", "Label"],
48
- datatype=["str", "str"],
49
- label="Table View",
50
- wrap=True
51
- )
52
-
53
- btn.click(fn=ner_predict, inputs=inp, outputs=[out_text, out_table])
54
-
55
- demo.launch()
 
 
 
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
8
  model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
9
 
10
+ # Convert id2label keys to int
11
  id2label = {int(k): v for k, v in model.config.id2label.items()}
12
 
13
 
14
  def ner_predict(text):
15
+ if not text.strip():
16
+ return "Please enter some text.", []
17
+
18
+ # tokenize text
19
  inputs = tokenizer(text, return_tensors="pt")
20
  tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
21
 
22
+ # model forward
23
  with torch.no_grad():
24
  logits = model(**inputs).logits
25
 
26
  pred_ids = torch.argmax(logits, dim=-1)[0].tolist()
27
 
 
28
  rows = []
29
+ pretty_text = ""
30
+
31
  for tok, pid in zip(tokens, pred_ids):
32
+ label = id2label[pid]
33
+ rows.append([tok, label])
34
+ pretty_text += f"{tok:15} → {label}\n"
35
+
36
+ return pretty_text, rows
37
+
38
+
39
+ def build_ui():
40
+ with gr.Blocks(title="Indic NER Token Viewer") as demo:
41
+ gr.Markdown("## 🔥 Hindi + English Token-level NER (Fine-tuned Model)")
42
+
43
+ inp = gr.Textbox(lines=3, label="Enter text to analyze")
44
+
45
+ btn = gr.Button("Run NER")
46
+
47
+ output_text = gr.Textbox(label="Formatted Output", lines=20)
48
+ output_table = gr.Dataframe(
49
+ headers=["Token", "NER Label"],
50
+ datatype=["str", "str"],
51
+ label="Detailed Table"
52
+ )
53
+
54
+ btn.click(fn=ner_predict, inputs=inp, outputs=[output_text, output_table])
55
+
56
+ return demo
57
+
58
+
59
+ # Prevent Gradio from using asyncio event loop that causes file descriptor crash
60
+ if __name__ == "__main__":
61
+ demo = build_ui()
62
+ demo.launch(server_name="0.0.0.0", server_port=7860)