Spaces:

Shenuki
/

NER

Sleeping

App Files Files Community

Shenuki commited on May 14, 2025

Commit

a70a295

verified ·

1 Parent(s): e08081f

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -102

app.py CHANGED Viewed

@@ -1,106 +1,78 @@
 import gradio as gr
-from transformers import pipeline
-# Model names (keeping it programmatic)
-model_names = [
-    "dslim/bert-base-NER",
-    "dslim/bert-base-NER-uncased",
-    "dslim/bert-large-NER",
-    "dslim/distilbert-NER",
-]
-example_sent = (
-    "Nim Chimpsky was a chimpanzee at Columbia University named after Noam Chomsky."
-)
-# Programmatically build the model info dict
-model_info = {
-    model_name: {
-        "link": f"https://huggingface.co/{model_name}",
-        "usage": f"""from transformers import pipeline
-ner = pipeline("ner", model="{model_name}", grouped_entities=True)
-result = ner("{example_sent}")
-print(result)""",
-    }
-    for model_name in model_names
-}
-# Load models into a dictionary programmatically for the analyze function
-models = {
-    model_name: pipeline("ner", model=model_name, grouped_entities=True)
-    for model_name in model_names
-}
-# Function to display model info (link and usage code)
-def display_model_info(model_name):
-    info = model_info[model_name]
-    usage_code = info["usage"]
-    link_button = f'[Open model page for {model_name} ]({info["link"]})'
-    return usage_code, link_button
-# Function to run NER on input text
-def analyze_text(text, model_name):
-    ner = models[model_name]
-    ner_results = ner(text)
-    highlighted_text = []
-    last_idx = 0
-    for entity in ner_results:
-        start = entity["start"]
-        end = entity["end"]
-        label = entity["entity_group"]
-        # Add non-entity text
-        if start > last_idx:
-            highlighted_text.append((text[last_idx:start], None))
-        # Add entity text
-        highlighted_text.append((text[start:end], label))
-        last_idx = end
-    # Add any remaining text after the last entity
-    if last_idx < len(text):
-        highlighted_text.append((text[last_idx:], None))
-    return highlighted_text
-with gr.Blocks() as demo:
-    gr.Markdown("# Named Entity Recognition (NER) with BERT Models")
-    # Dropdown for model selection
-    model_selector = gr.Dropdown(
-        choices=list(model_info.keys()),
-        value=list(model_info.keys())[0],
-        label="Select Model",
-    )
-    # Textbox for input text
-    text_input = gr.Textbox(
-        label="Enter Text",
-        lines=5,
-        value=example_sent,
-    )
-    analyze_button = gr.Button("Run NER Model")
-    output = gr.HighlightedText(label="NER Result", combine_adjacent=True)
-    # Outputs: usage code, model page link, and analyze button
-    code_output = gr.Code(label="Use this model", visible=True)
-    link_output = gr.Markdown(
-        f"[Open model page for {model_selector} ]({model_selector})"
-    )
-    # Button for analyzing the input text
-    analyze_button.click(
-        analyze_text, inputs=[text_input, model_selector], outputs=output
-    )
-    # Trigger the code output and model link when model is changed
-    model_selector.change(
-        display_model_info, inputs=[model_selector], outputs=[code_output, link_output]
-    )
-    # Call the display_model_info function on load to set initial values
-    demo.load(
-        fn=display_model_info,
-        inputs=[model_selector],
-        outputs=[code_output, link_output],
     )
-demo.launch()

+import spacy
+import requests
+import wikipedia
 import gradio as gr
+# 1) Load spaCy small English model (make sure to add en_core_web_sm in requirements.txt)
+nlp = spacy.load("en_core_web_sm")
+# 2) Helper: Overpass query for POIs
+def fetch_osm(lat, lon, osm_filter, limit=5):
+    overpass = """
+    [out:json][timeout:25];
+    (
+      node{filt}(around:1000,{lat},{lon});
+      way{filt}(around:1000,{lat},{lon});
+      rel{filt}(around:1000,{lat},{lon});
+    );
+    out center {lim};
+    """.format(filt=osm_filter, lat=lat, lon=lon, lim=limit)
+    r = requests.post("https://overpass-api.de/api/interpreter", data={"data": overpass})
+    elems = r.json().get("elements", [])
+    results = []
+    for el in elems:
+        name = el.get("tags", {}).get("name")
+        if name:
+            results.append({"name": name, **({"info": el["tags"].get("cuisine")} if "cuisine" in el["tags"] else {})})
+    return results
+# 3) Geocode via Nominatim
+def geocode(place: str):
+    r = requests.get(
+        "https://nominatim.openstreetmap.org/search",
+        params={"q": place, "format": "json", "limit": 1},
+        headers={"User-Agent":"iVoiceContext/1.0"}
     )
+    data = r.json()
+    if not data: return None
+    return float(data[0]["lat"]), float(data[0]["lon"])
+# 4) Main context extractor
+def get_context(text):
+    doc = nlp(text)
+    out = {}
+    # gather unique entities of interest
+    for ent in {e.text for e in doc.ents if e.label_ in ("GPE","LOC","PERSON","ORG")}:
+        label = next(e.label_ for e in doc.ents if e.text == ent)
+        if label in ("GPE","LOC"):
+            geo = geocode(ent)
+            if not geo:
+                out[ent] = {"type":"location","error":"could not geocode"}
+            else:
+                lat, lon = geo
+                out[ent] = {
+                    "type": "location",
+                    "restaurants": fetch_osm(lat, lon, '["amenity"="restaurant"]'),
+                    "attractions": fetch_osm(lat, lon, '["tourism"="attraction"]'),
+                }
+        else:  # PERSON or ORG
+            try:
+                summ = wikipedia.summary(ent, sentences=2)
+            except Exception:
+                summ = "No summary available"
+            out[ent] = {"type":"wiki","summary": summ}
+    if not out:
+        return {"error":"no named entities found"}
+    return out
+# 5) Gradio interface
+iface = gr.Interface(
+    fn=get_context,
+    inputs=gr.Textbox(lines=3, placeholder="Enter or paste your translated text…"),
+    outputs="json",
+    title="iVoice Context-Aware API",
+    description="Extracts people, places, orgs from text and returns nearby POIs or Wikipedia summaries."
+)
+if __name__ == "__main__":
+    iface.launch()