Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import sqlite3 | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download, HfApi | |
| from fastapi import FastAPI | |
| from fastapi.responses import JSONResponse | |
| import os | |
| import time | |
| import json | |
| from typing import Optional | |
| # ===== CONFIGURATION ===== | |
| TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh'] | |
| INDEXED_REPO_ID = "cstr/conceptnet-de-indexed" | |
| INDEXED_DB_FILENAME = "conceptnet-de-indexed.db" | |
| PROGRESS_FILENAME = "indexing_progress.json" | |
| CONCEPTNET_BASE = "http://conceptnet.io" | |
| # ========================= | |
| print(f"π Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}") | |
| HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN") | |
| def log_progress(message, level="INFO"): | |
| timestamp = time.strftime("%H:%M:%S") | |
| prefix = {"INFO": "βΉοΈ ", "SUCCESS": "β ", "ERROR": "β", "WARN": "β οΈ ", "DEBUG": "π"}.get(level, "") | |
| print(f"[{timestamp}] {prefix} {message}") | |
| def check_remote_progress(): | |
| if not HF_TOKEN: | |
| return {"indexing_complete": False} | |
| try: | |
| api = HfApi() | |
| api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN) | |
| progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN) | |
| with open(progress_path, 'r') as f: | |
| return json.load(f) | |
| except: | |
| return {"indexing_complete": False} | |
| def create_indexed_database(): | |
| progress = check_remote_progress() | |
| if progress.get("indexing_complete"): | |
| try: | |
| path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN) | |
| log_progress("Indexed DB loaded", "SUCCESS") | |
| return path | |
| except: | |
| pass | |
| return None | |
| DB_PATH = create_indexed_database() | |
| def get_db_connection(): | |
| conn = sqlite3.connect(DB_PATH, check_same_thread=False) | |
| conn.execute("PRAGMA cache_size = -256000") | |
| conn.execute("PRAGMA mmap_size = 4294967296") | |
| return conn | |
| RELATIONS = [ | |
| ("IsA", f"{CONCEPTNET_BASE}/r/IsA"), | |
| ("PartOf", f"{CONCEPTNET_BASE}/r/PartOf"), | |
| ("HasA", f"{CONCEPTNET_BASE}/r/HasA"), | |
| ("UsedFor", f"{CONCEPTNET_BASE}/r/UsedFor"), | |
| ("CapableOf", f"{CONCEPTNET_BASE}/r/CapableOf"), | |
| ("Causes", f"{CONCEPTNET_BASE}/r/Causes"), | |
| ("HasProperty", f"{CONCEPTNET_BASE}/r/HasProperty"), | |
| ("Synonym", f"{CONCEPTNET_BASE}/r/Synonym"), | |
| ("Antonym", f"{CONCEPTNET_BASE}/r/Antonym"), | |
| ("AtLocation", f"{CONCEPTNET_BASE}/r/AtLocation"), | |
| ("RelatedTo", f"{CONCEPTNET_BASE}/r/RelatedTo"), | |
| ("DerivedFrom", f"{CONCEPTNET_BASE}/r/DerivedFrom"), | |
| ] | |
| def get_semantic_profile_json(word: str, lang: str = 'en', max_per_relation: int = 10): | |
| """Get semantic profile as JSON""" | |
| if not word or lang not in TARGET_LANGUAGES: | |
| return {"error": "Invalid input"} | |
| word = word.strip().lower().replace(' ', '_') | |
| like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%" | |
| result = { | |
| "word": word, | |
| "language": lang, | |
| "nodes": [], | |
| "relations": {}, | |
| "total_edges": 0 | |
| } | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,)) | |
| result["nodes"] = [{"id": nid, "label": label} for nid, label in cursor.fetchall()] | |
| if not result["nodes"]: | |
| return {"error": "Word not found"} | |
| for rel_name, rel_url in RELATIONS: | |
| outgoing = [] | |
| incoming = [] | |
| cursor.execute(""" | |
| SELECT en.label, e.weight, en.id | |
| FROM edge e | |
| JOIN node en ON e.end_id = en.id | |
| WHERE e.start_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC | |
| LIMIT ? | |
| """, (like_path, rel_url, max_per_relation)) | |
| outgoing = [{"target": label, "weight": weight, "target_id": eid} | |
| for label, weight, eid in cursor.fetchall()] | |
| cursor.execute(""" | |
| SELECT s.label, e.weight, s.id | |
| FROM edge e | |
| JOIN node s ON e.start_id = s.id | |
| WHERE e.end_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC | |
| LIMIT ? | |
| """, (like_path, rel_url, max_per_relation)) | |
| incoming = [{"source": label, "weight": weight, "source_id": sid} | |
| for label, weight, sid in cursor.fetchall()] | |
| if outgoing or incoming: | |
| result["relations"][rel_name] = { | |
| "outgoing": outgoing, | |
| "incoming": incoming, | |
| "count": len(outgoing) + len(incoming) | |
| } | |
| result["total_edges"] += len(outgoing) + len(incoming) | |
| return result | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def get_semantic_profile(word, lang='en', progress=gr.Progress()): | |
| """Get semantic profile with progress""" | |
| log_progress(f"Profile: {word} ({lang})", "INFO") | |
| if not word or lang not in TARGET_LANGUAGES: | |
| return "β οΈ Invalid input" | |
| progress(0, desc="π Starting...") | |
| word = word.strip().lower().replace(' ', '_') | |
| like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%" | |
| output_md = f"# π§ Semantic Profile: '{word}' ({lang.upper()})\n\n" | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| progress(0.05, desc="π Finding nodes...") | |
| cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,)) | |
| nodes = cursor.fetchall() | |
| if not nodes: | |
| return f"# π§ '{word}'\n\nβ οΈ Not found" | |
| log_progress(f"Found {len(nodes)} nodes", "SUCCESS") | |
| for node_id, label in nodes[:3]: | |
| output_md += f"**Node:** `{node_id}` β **{label}**\n" | |
| output_md += "\n" | |
| total = 0 | |
| num_relations = len(RELATIONS) | |
| for i, (rel_name, rel_url) in enumerate(RELATIONS): | |
| progress((i + 0.1) / num_relations, desc=f"π {rel_name}...") | |
| output_md += f"## {rel_name}\n\n" | |
| found = False | |
| start_time = time.time() | |
| cursor.execute(""" | |
| SELECT en.label, e.weight | |
| FROM edge e | |
| JOIN node en ON e.end_id = en.id | |
| WHERE e.start_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC | |
| LIMIT 10 | |
| """, (like_path, rel_url)) | |
| results = cursor.fetchall() | |
| elapsed = time.time() - start_time | |
| log_progress(f" {rel_name} out: {len(results)} in {elapsed:.3f}s", "DEBUG") | |
| for label, weight in results: | |
| output_md += f"- **{word}** {rel_name} β *{label}* `[{weight:.3f}]`\n" | |
| found = True | |
| total += 1 | |
| cursor.execute(""" | |
| SELECT s.label, e.weight | |
| FROM edge e | |
| JOIN node s ON e.start_id = s.id | |
| WHERE e.end_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC | |
| LIMIT 10 | |
| """, (like_path, rel_url)) | |
| results = cursor.fetchall() | |
| for label, weight in results: | |
| output_md += f"- *{label}* {rel_name} β **{word}** `[{weight:.3f}]`\n" | |
| found = True | |
| total += 1 | |
| if not found: | |
| output_md += "*No results*\n" | |
| output_md += "\n" | |
| progress((i + 1) / num_relations, desc=f"β {rel_name}") | |
| progress(1.0, desc="β Complete!") | |
| output_md += f"---\n**Total relations:** {total}\n" | |
| log_progress(f"Complete: {total} relations", "SUCCESS") | |
| return output_md | |
| except Exception as e: | |
| log_progress(f"Error: {e}", "ERROR") | |
| import traceback | |
| traceback.print_exc() | |
| return f"**β Error:** {e}" | |
| def query_edges_json(start_node: Optional[str] = None, | |
| relation: Optional[str] = None, | |
| end_node: Optional[str] = None, | |
| limit: int = 50): | |
| """Query edges JSON""" | |
| query = """ | |
| SELECT | |
| e.id, s.id, r.label, en.id, e.weight, s.label, en.label | |
| FROM edge e | |
| JOIN relation r ON e.rel_id = r.id | |
| JOIN node s ON e.start_id = s.id | |
| JOIN node en ON e.end_id = en.id | |
| WHERE 1=1 | |
| """ | |
| params = [] | |
| try: | |
| with get_db_connection() as conn: | |
| if start_node: | |
| if start_node.startswith('http://'): | |
| pattern = f"{start_node}%" | |
| else: | |
| pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%" | |
| query += " AND s.id LIKE ?" | |
| params.append(pattern) | |
| if relation: | |
| if relation.startswith('http://'): | |
| rel_value = relation | |
| elif relation.startswith('/r/'): | |
| rel_value = f"{CONCEPTNET_BASE}{relation}" | |
| else: | |
| rel_value = f"{CONCEPTNET_BASE}/r/{relation}" | |
| query += " AND r.id = ?" | |
| params.append(rel_value) | |
| if end_node: | |
| if end_node.startswith('http://'): | |
| pattern = f"{end_node}%" | |
| else: | |
| pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%" | |
| query += " AND en.id LIKE ?" | |
| params.append(pattern) | |
| query += " ORDER BY e.weight DESC LIMIT ?" | |
| params.append(limit) | |
| df = pd.read_sql_query(query, conn, params=params) | |
| if df.empty: | |
| return {"results": [], "count": 0} | |
| df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label'] | |
| return { | |
| "results": df.to_dict(orient='records'), | |
| "count": len(df) | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def run_query(start_node, relation, end_node, limit, progress=gr.Progress()): | |
| """Query builder""" | |
| log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO") | |
| progress(0, desc="π Building...") | |
| query = """ | |
| SELECT | |
| e.id, s.id, r.label, en.id, e.weight, s.label, en.label | |
| FROM edge e | |
| JOIN relation r ON e.rel_id = r.id | |
| JOIN node s ON e.start_id = s.id | |
| JOIN node en ON e.end_id = en.id | |
| WHERE 1=1 | |
| """ | |
| params = [] | |
| try: | |
| with get_db_connection() as conn: | |
| progress(0.3, desc="π Filters...") | |
| if start_node and start_node.strip(): | |
| if start_node.startswith('http://'): | |
| pattern = f"{start_node}%" | |
| else: | |
| pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%" | |
| query += " AND s.id LIKE ?" | |
| params.append(pattern) | |
| if relation and relation.strip(): | |
| if relation.startswith('http://'): | |
| rel_value = relation | |
| elif relation.startswith('/r/'): | |
| rel_value = f"{CONCEPTNET_BASE}{relation}" | |
| else: | |
| rel_value = f"{CONCEPTNET_BASE}/r/{relation}" | |
| query += " AND r.id = ?" | |
| params.append(rel_value) | |
| if end_node and end_node.strip(): | |
| if end_node.startswith('http://'): | |
| pattern = f"{end_node}%" | |
| else: | |
| pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%" | |
| query += " AND en.id LIKE ?" | |
| params.append(pattern) | |
| query += " ORDER BY e.weight DESC LIMIT ?" | |
| params.append(limit) | |
| progress(0.6, desc="β‘ Running...") | |
| start_time = time.time() | |
| df = pd.read_sql_query(query, conn, params=params) | |
| elapsed = time.time() - start_time | |
| progress(1.0, desc="β Done!") | |
| log_progress(f"Done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS") | |
| if df.empty: | |
| return pd.DataFrame(), f"β οΈ No results ({elapsed:.2f}s)" | |
| df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label'] | |
| return df, f"β {len(df)} results in {elapsed:.2f}s" | |
| except Exception as e: | |
| log_progress(f"Error: {e}", "ERROR") | |
| import traceback | |
| traceback.print_exc() | |
| return pd.DataFrame(), f"β {e}" | |
| def run_raw_query(sql_query): | |
| if not sql_query.strip().upper().startswith("SELECT"): | |
| return pd.DataFrame(), "β Only SELECT" | |
| try: | |
| with get_db_connection() as conn: | |
| start = time.time() | |
| df = pd.read_sql_query(sql_query, conn) | |
| elapsed = time.time() - start | |
| return df, f"β {len(df)} rows in {elapsed:.3f}s" | |
| except Exception as e: | |
| return pd.DataFrame(), f"β {e}" | |
| def get_schema_info(): | |
| md = f"# π Schema\n\n" | |
| md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n" | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| md += "## Relations\n\n" | |
| cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20") | |
| for rel_id, label in cursor.fetchall(): | |
| md += f"- **{label}:** `{rel_id}`\n" | |
| md += "\n## Tables\n\n" | |
| cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'") | |
| for table, in cursor.fetchall(): | |
| cursor.execute(f"SELECT COUNT(*) FROM {table}") | |
| md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n" | |
| except Exception as e: | |
| md += f"\nError: {e}\n" | |
| return md | |
| # ===== FASTAPI - Create app FIRST ===== | |
| app = FastAPI(title="ConceptNet API", version="1.0") | |
| def api_docs(): | |
| """API documentation - accessible at /api""" | |
| return { | |
| "name": "ConceptNet API", | |
| "version": "1.0", | |
| "endpoints": { | |
| "/api/profile/{word}": "Semantic profile (params: lang, limit)", | |
| "/api/query": "Query edges (params: start, relation, end, limit)", | |
| "/api/relations": "List relations", | |
| "/api/languages": "List languages" | |
| }, | |
| "examples": { | |
| "profile": "/api/profile/dog?lang=en&limit=10", | |
| "query": "/api/query?start=dog&relation=IsA&limit=20" | |
| }, | |
| "note": "Visit root (/) for the Gradio UI" | |
| } | |
| def api_profile(word: str, lang: str = "en", limit: int = 10): | |
| """Get semantic profile""" | |
| return JSONResponse(get_semantic_profile_json(word, lang, limit)) | |
| def api_query(start: Optional[str] = None, | |
| relation: Optional[str] = None, | |
| end: Optional[str] = None, | |
| limit: int = 50): | |
| """Query edges""" | |
| return JSONResponse(query_edges_json(start, relation, end, limit)) | |
| def api_relations(): | |
| """List relations""" | |
| return JSONResponse({"relations": [{"name": name, "url": url} for name, url in RELATIONS]}) | |
| def api_languages(): | |
| """List languages""" | |
| return JSONResponse({"languages": TARGET_LANGUAGES}) | |
| # ===== GRADIO UI ===== | |
| with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π§ ConceptNet Explorer") | |
| gr.Markdown( | |
| f"**Multi-language semantic network** | " | |
| f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | " | |
| f"**API:** `/api/profile/{{word}}` `/api/query`" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("π Semantic Profile"): | |
| gr.Markdown("**Explore semantic relations for any word**") | |
| with gr.Row(): | |
| word_input = gr.Textbox(label="Word", placeholder="dog", value="dog") | |
| lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language") | |
| semantic_btn = gr.Button("π Get Semantic Profile", variant="primary", size="lg") | |
| semantic_output = gr.Markdown() | |
| gr.Examples( | |
| examples=[["dog", "en"], ["hund", "de"], ["perro", "es"], ["chien", "fr"]], | |
| inputs=[word_input, lang_input] | |
| ) | |
| with gr.TabItem("β‘ Query Builder"): | |
| with gr.Row(): | |
| start_input = gr.Textbox(label="Start", placeholder="dog") | |
| rel_input = gr.Textbox(label="Relation", placeholder="IsA", value="IsA") | |
| end_input = gr.Textbox(label="End", placeholder="") | |
| limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=50) | |
| query_btn = gr.Button("βΆοΈ Run Query", variant="primary", size="lg") | |
| status_output = gr.Markdown() | |
| results_output = gr.DataFrame(wrap=True) | |
| with gr.TabItem("π» Raw SQL"): | |
| raw_sql_input = gr.Textbox( | |
| label="SQL", | |
| value=f"SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id = '{CONCEPTNET_BASE}/c/en/dog' LIMIT 10", | |
| lines=3 | |
| ) | |
| raw_btn = gr.Button("βΆοΈ Execute") | |
| raw_status = gr.Markdown() | |
| raw_results = gr.DataFrame() | |
| with gr.TabItem("π Schema"): | |
| schema_btn = gr.Button("π Load Schema") | |
| schema_output = gr.Markdown() | |
| with gr.TabItem("π API"): | |
| gr.Markdown("## JSON API Endpoints\n") | |
| gr.Markdown("### API Documentation\n```\nGET /api\n```") | |
| gr.Markdown("### Get Semantic Profile\n```\nGET /api/profile/{word}?lang=en&limit=10\n```") | |
| gr.Markdown("### Query Edges\n```\nGET /api/query?start=dog&relation=IsA&limit=50\n```") | |
| gr.Markdown("### List Relations\n```\nGET /api/relations\n```") | |
| gr.Markdown("### Examples\n") | |
| gr.Markdown("```\ncurl https://your-space.hf.space/api/profile/dog?lang=en\n```") | |
| gr.Markdown("```\ncurl 'https://your-space.hf.space/api/query?start=dog&relation=IsA&limit=10'\n```") | |
| gr.Markdown( | |
| "---\n" | |
| "**Performance:** Exact match on rel_id for fast queries | " | |
| "**API:** Full REST API at `/api/*` endpoints" | |
| ) | |
| semantic_btn.click(get_semantic_profile, [word_input, lang_input], semantic_output) | |
| query_btn.click(run_query, [start_input, rel_input, end_input, limit_slider], [results_output, status_output]) | |
| raw_btn.click(run_raw_query, raw_sql_input, [raw_results, raw_status]) | |
| schema_btn.click(get_schema_info, None, schema_output) | |
| # ===== MOUNT GRADIO TO FASTAPI - Gradio at root, API at /api/* ===== | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| if __name__ == "__main__": | |
| log_progress("="*60, "SUCCESS") | |
| log_progress("π APP READY!", "SUCCESS") | |
| log_progress("="*60, "SUCCESS") | |
| log_progress("UI: http://localhost:7860/", "INFO") | |
| log_progress("API: http://localhost:7860/api", "INFO") | |
| log_progress(" http://localhost:7860/api/profile/dog", "INFO") | |
| log_progress(" http://localhost:7860/api/query?start=dog&relation=IsA", "INFO") | |
| log_progress("="*60, "SUCCESS") | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |