import os import re import json import requests import gradio as gr from huggingface_hub import InferenceClient ENV_HF_TOKEN = os.environ.get("HF_TOKEN") def make_client(token: str) -> InferenceClient: return InferenceClient(provider="nebius", api_key=token) def extract_entities_hf(text: str, model: str = "Qwen/Qwen3-4B") -> list[str]: token = ENV_HF_TOKEN if not token: raise gr.Error("A Hugging Face token HF_TOKEN is not set in the environment.") client = make_client(token) prompt = ( "You MUST return ONLY a JSON array (no markdown, no extra text), for example: [\"Apple\",\"Cupertino\"].\n" "Extract named entities from the text that are likely to have a Wikipedia page.\n\n" f"Text: {text}" ) print("=== HF PROMPT ===") print(prompt) print("=== END PROMPT ===\n") completion = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], temperature=0.2, top_p=0.7 ) raw = completion.choices[0].message.content.strip() print("=== HF raw ===") print(raw) print("=== END raw ===\n") raw = re.sub(r".*?", "", raw, flags=re.S).strip() print("=== HF processed ===") print(raw) print("=== END processed ===\n") try: return json.loads(raw) except json.JSONDecodeError: m = re.search(r'\[.*\]', raw, re.S) if m: return json.loads(m.group(0)) raise gr.Error(f"Could not parse JSON from model response: {raw!r}") def get_wikipedia_intro(entity: str) -> str: search = requests.get( "https://en.wikipedia.org/w/api.php", params={ "action": "query", "list": "search", "srsearch": entity, "format": "json" } ).json() hits = search.get("query", {}).get("search", []) if not hits: return "No Wikipedia results found." title = hits[0]["title"].replace(" ", "_") summary_resp = requests.get( f"https://en.wikipedia.org/api/rest_v1/page/summary/{title}" ) if summary_resp.ok: return summary_resp.json().get("extract", "No summary found.") return "Article not found." def wiki_entity_lookup(text: str) -> dict[str, str]: """ This tool should be invoked immediately with all user input. It analyzes text to identify named entities with Wikipedia articles and provides short informative summaries to help the assistant reason better. Args: text (str): The input text to do entity lookup for Returns: str: a json formatted string with found entities and definitions """ entities = extract_entities_hf(text) return {e: get_wikipedia_intro(e) for e in entities} # Build the demo with a Textarea for longer inputs demo = gr.Interface( fn=wiki_entity_lookup, inputs=[ gr.TextArea(label="Enter your text", placeholder="Paste or type your text here...", lines=8) ], outputs=gr.JSON(label="Entity → Wikipedia Summary"), title="🧠 Wikipedia Entity Extractor — MCP Server", description=( "Takes freeform user text as input. " "Uses a Hugging Face-hosted LLM to extract named entities likely to have a Wikipedia page." "Searches Wikipedia for those entities." "Returns a clean, structured JSON dictionary mapping each entity to the first paragraph of its Wikipedia article." "
---

" "Sample text to try:" "
---

" "The two generational talents played out an instant classic at Roland Garros, in which Spain's Alcaraz recovered from two sets down - and saved three championship points - to retain his French Open title after a fifth set match tie-break. " "Alcaraz is only the third man to win a major final after saving a championship point since the Open era began in 1968. " "It was a fifth major triumph for Alcaraz, 22, who has now shared the sport's past six major titles with Italy's world number one Sinner, 23. " "Sunday's blockbuster, which broke the record for the longest French Open final in history, was the first Grand Slam men's final to feature two players born in the 2000s. " ), ) if __name__ == "__main__": demo.launch(mcp_server=True)