MNGames commited on
Commit
e19d562
·
verified ·
1 Parent(s): 9ae87a6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ import re
4
+
5
+ # Load NER pipeline
6
+ ner = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
7
+
8
+ # Custom sentence splitting function
9
+ def split_sentences(text):
10
+ # Avoid splitting after abbreviations
11
+ protected = r"\b(?:[A-Z]\.|D\.C\.|U\.S\.|Mr\.|Mrs\.|Dr\.|Jr\.|Sr\.|vs\.|Inc\.|Ltd\.|etc\.)"
12
+
13
+ # Split on ., ?, ! followed by a space + capital/lowercase/’ (not part of abbreviation)
14
+ pattern = re.compile(rf"(?<!{protected})(?<=[.!?])\s+(?=[A-Z'‘“a-z])")
15
+ sentences = re.split(pattern, text.strip())
16
+ return [s.strip() for s in sentences if s.strip()]
17
+
18
+ # API function
19
+ def analyze_text(text):
20
+ sentences = split_sentences(text)
21
+ results = []
22
+
23
+ for i, sentence in enumerate(sentences, start=1):
24
+ entities = ner(sentence)
25
+ results.append({
26
+ "sentence_number": i,
27
+ "sentence": sentence,
28
+ "entities": entities
29
+ })
30
+
31
+ return {"sentences": results, "total_sentences": len(sentences)}
32
+
33
+ # Gradio interface (API-style)
34
+ demo = gr.Interface(
35
+ fn=analyze_text,
36
+ inputs=gr.Textbox(label="Input Text", lines=6, placeholder="Paste your article..."),
37
+ outputs=gr.JSON(label="NER + Sentence Output"),
38
+ title="Sentence Splitter + NER API",
39
+ description="Splits text into sentences (with abbreviation protection) and runs NER using dslim/bert-base-NER."
40
+ )
41
+
42
+ if __name__ == "__main__":
43
+ demo.launch()