Spaces:
Build error
Build error
| import gradio as gr | |
| from transformers import pipeline | |
| from langdetect import detect | |
| import requests | |
| import wikipedia | |
| import re | |
| # Load NER model | |
| ner_pipeline = pipeline("ner", model="Davlan/xlm-roberta-base-ner-hrl", grouped_entities=True) | |
| # Get Wikidata entity info | |
| def get_wikidata_info(entity, lang="en"): | |
| query = f''' | |
| SELECT ?item ?itemLabel ?itemDescription ?coordinate WHERE {{ | |
| ?item rdfs:label "{entity}"@{lang}. | |
| OPTIONAL {{ ?item wdt:P625 ?coordinate. }} | |
| SERVICE wikibase:label {{ bd:serviceParam wikibase:language "{lang}". }} | |
| }} LIMIT 1 | |
| ''' | |
| url = "https://query.wikidata.org/sparql" | |
| headers = {"Accept": "application/sparql-results+json"} | |
| try: | |
| response = requests.get(url, params={"query": query}, headers=headers) | |
| data = response.json() | |
| if data['results']['bindings']: | |
| item = data['results']['bindings'][0] | |
| label = item.get('itemLabel', {}).get('value', entity) | |
| description = item.get('itemDescription', {}).get('value', 'No description available.') | |
| coord = item.get('coordinate', {}).get('value', '') | |
| wikidata_link = item.get('item', {}).get('value', '') | |
| return label, description, coord, wikidata_link | |
| except: | |
| pass | |
| return entity, "No description available.", "", "" | |
| # Get Wikipedia details | |
| def get_wikipedia_details(entity, lang="en"): | |
| try: | |
| wikipedia.set_lang(lang) | |
| page = wikipedia.page(entity, auto_suggest=True, redirect=True) | |
| categories = page.categories[:5] | |
| links = page.links[:5] | |
| url = page.url | |
| return url, categories, links | |
| except: | |
| return "", [], [] | |
| # Enrich info with tags and intent | |
| def enrich_info(summary): | |
| related_info = [] | |
| if re.search(r'capital', summary, re.IGNORECASE): | |
| related_info.append("ποΈ Capital city") | |
| if re.search(r'tourism|attraction', summary, re.IGNORECASE): | |
| related_info.append("π§³ Popular for tourism") | |
| if re.search(r'population', summary, re.IGNORECASE): | |
| related_info.append("π₯ Densely populated") | |
| if re.search(r'university|education', summary, re.IGNORECASE): | |
| related_info.append("π Educational hub") | |
| if re.search(r'beach', summary, re.IGNORECASE): | |
| related_info.append("ποΈ Known for beaches") | |
| intent = "General knowledge inquiry" | |
| if re.search(r'tourism|travel', summary, re.IGNORECASE): | |
| intent = "Looking for travel guidance" | |
| elif re.search(r'university|education', summary, re.IGNORECASE): | |
| intent = "Seeking educational info" | |
| return related_info, intent | |
| # Main combined function | |
| def ner_wikidata_lookup(text): | |
| try: | |
| detected_lang = detect(text) | |
| except: | |
| detected_lang = "en" | |
| entities = ner_pipeline(text) | |
| seen = set() | |
| result = f"<b>π Detected Language:</b> <code>{detected_lang}</code><br><br>" | |
| for ent in entities: | |
| name = ent['word'].strip() | |
| if name not in seen and name.isalpha(): | |
| seen.add(name) | |
| label, desc, coord, wikidata_url = get_wikidata_info(name, lang=detected_lang) | |
| wiki_url, wiki_categories, wiki_links = get_wikipedia_details(name, lang=detected_lang) | |
| related_tags, detected_intent = enrich_info(desc) | |
| osm_link = "" | |
| if coord: | |
| try: | |
| lon, lat = coord.replace('Point(', '').replace(')', '').split(' ') | |
| osm_link = f"<a href='https://www.openstreetmap.org/?mlat={lat}&mlon={lon}' target='_blank'>π View on OpenStreetMap</a>" | |
| except: | |
| pass | |
| links = "" | |
| if wikidata_url: | |
| links += f"<a href='{wikidata_url}' target='_blank'>π Wikidata</a> " | |
| if wiki_url: | |
| links += f"<a href='{wiki_url}' target='_blank'>π Wikipedia</a>" | |
| tags_html = f"<p><b>Related Tags:</b> {' | '.join(related_tags)}</p>" if related_tags else "" | |
| intent_html = f"<p><b>Intent:</b> {detected_intent}</p>" | |
| extra_info = "" | |
| if wiki_categories: | |
| extra_info += f"<p><b>Wikipedia Categories:</b> {', '.join(wiki_categories)}</p>" | |
| if wiki_links: | |
| extra_info += f"<p><b>Related Topics:</b> {', '.join(wiki_links)}</p>" | |
| result += f""" | |
| <hr><h3>π {label}</h3> | |
| <p>{desc}</p> | |
| <p>{links}</p> | |
| <p>{osm_link}</p> | |
| {tags_html} | |
| {intent_html} | |
| {extra_info} | |
| """ | |
| return result if seen else "No named entities found." | |
| # Gradio Interface using HTML output | |
| iface = gr.Interface( | |
| fn=ner_wikidata_lookup, | |
| inputs=gr.Textbox(lines=4, placeholder="Type any sentence in any language..."), | |
| outputs=gr.HTML(), | |
| title="π NER with Wikidata + Wikipedia + Smart Tags", | |
| description="Detects named entities, retrieves Wikidata descriptions, adds Wikipedia links, maps, and enriches output with semantic tags, intent detection, categories, and related topics." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |