Spaces:
Running
Running
| # ============================================================================ | |
| # 1. CONSOLIDATED IMPORTS | |
| # ============================================================================ | |
| import gradio as gr | |
| import sqlite3 | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download, HfApi | |
| from fastapi import FastAPI, Response | |
| from fastapi.responses import JSONResponse | |
| import os | |
| import time | |
| import json | |
| from typing import Optional, Dict, List, Any | |
| import uvicorn | |
| import traceback | |
| from pathlib import Path | |
| from contextlib import asynccontextmanager # <-- FIX 1: Add this import | |
| # ============================================================================ | |
| # 2. SHARED GLOBALS & CONFIGURATION | |
| # ============================================================================ | |
| # --- Languages --- | |
| TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh'] | |
| print(f"π Target Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}") | |
| # --- Hugging Face & DB Config --- | |
| INDEXED_REPO_ID = "cstr/conceptnet-de-indexed" | |
| INDEXED_DB_FILENAME = "conceptnet-de-indexed.db" | |
| PROGRESS_FILENAME = "indexing_progress.json" | |
| CONCEPTNET_BASE_URI = "http://conceptnet.io" | |
| DB_PATH: Optional[str] = None # Will be set by setup_database() | |
| # --- HF Token --- | |
| HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN") | |
| # --- All relations MUST be full URLs --- | |
| CONCEPTNET_RELATIONS: Dict[str, str] = { | |
| "RelatedTo": f"{CONCEPTNET_BASE_URI}/r/RelatedTo", | |
| "IsA": f"{CONCEPTNET_BASE_URI}/r/IsA", | |
| "PartOf": f"{CONCEPTNET_BASE_URI}/r/PartOf", | |
| "HasA": f"{CONCEPTNET_BASE_URI}/r/HasA", | |
| "UsedFor": f"{CONCEPTNET_BASE_URI}/r/UsedFor", | |
| "CapableOf": f"{CONCEPTNET_BASE_URI}/r/CapableOf", | |
| "AtLocation": f"{CONCEPTNET_BASE_URI}/r/AtLocation", | |
| "Causes": f"{CONCEPTNET_BASE_URI}/r/Causes", | |
| "HasSubevent": f"{CONCEPTNET_BASE_URI}/r/HasSubevent", | |
| "HasFirstSubevent": f"{CONCEPTNET_BASE_URI}/r/HasFirstSubevent", | |
| "HasLastSubevent": f"{CONCEPTNET_BASE_URI}/r/HasLastSubevent", | |
| "HasPrerequisite": f"{CONCEPTNET_BASE_URI}/r/HasPrerequisite", | |
| "HasProperty": f"{CONCEPTNET_BASE_URI}/r/HasProperty", | |
| "MotivatedByGoal": f"{CONCEPTNET_BASE_URI}/r/MotivatedByGoal", | |
| "ObstructedBy": f"{CONCEPTNET_BASE_URI}/r/ObstructedBy", | |
| "Desires": f"{CONCEPTNET_BASE_URI}/r/Desires", | |
| "CreatedBy": f"{CONCEPTNET_BASE_URI}/r/CreatedBy", | |
| "Synonym": f"{CONCEPTNET_BASE_URI}/r/Synonym", | |
| "Antonym": f"{CONCEPTNET_BASE_URI}/r/Antonym", | |
| "DistinctFrom": f"{CONCEPTNET_BASE_URI}/r/DistinctFrom", | |
| "DerivedFrom": f"{CONCEPTNET_BASE_URI}/r/DerivedFrom", | |
| "SymbolOf": f"{CONCEPTNET_BASE_URI}/r/SymbolOf", | |
| "DefinedAs": f"{CONCEPTNET_BASE_URI}/r/DefinedAs", | |
| "MannerOf": f"{CONCEPTNET_BASE_URI}/r/MannerOf", | |
| "LocatedNear": f"{CONCEPTNET_BASE_URI}/r/LocatedNear", | |
| "HasContext": f"{CONCEPTNET_BASE_URI}/r/HasContext", | |
| "SimilarTo": f"{CONCEPTNET_BASE_URI}/r/SimilarTo", | |
| "EtymologicallyRelatedTo": f"{CONCEPTNET_BASE_URI}/r/EtymologicallyRelatedTo", | |
| "EtymologicallyDerivedFrom": f"{CONCEPTNET_BASE_URI}/r/EtymologicallyDerivedFrom", | |
| "CausesDesire": f"{CONCEPTNET_BASE_URI}/r/CausesDesire", | |
| "MadeOf": f"{CONCEPTNET_BASE_URI}/r/MadeOf", | |
| "ReceivesAction": f"{CONCEPTNET_BASE_URI}/r/ReceivesAction", | |
| "ExternalURL": f"{CONCEPTNET_BASE_URI}/r/ExternalURL", | |
| "NotDesires": f"{CONCEPTNET_BASE_URI}/r/NotDesires", | |
| "NotUsedFor": f"{CONCEPTNET_BASE_URI}/r/NotUsedFor", | |
| "NotCapableOf": f"{CONCEPTNET_BASE_URI}/r/NotCapableOf", | |
| "NotHasProperty": f"{CONCEPTNET_BASE_URI}/r/NotHasProperty", | |
| } | |
| # Sorted list of (Label, Full_URL) tuples for Gradio dropdowns | |
| RELATION_CHOICES = sorted(CONCEPTNET_RELATIONS.items()) | |
| # ============================================================================ | |
| # 3. DATABASE SETUP & HELPERS | |
| # ============================================================================ | |
| def log_progress(message, level="INFO"): | |
| """Helper for logging with emoji prefixes.""" | |
| timestamp = time.strftime("%H:%M:%S") | |
| prefix = {"INFO": "βΉοΈ ", "SUCCESS": "β ", "ERROR": "β", "WARN": "β οΈ ", "DEBUG": "π"}.get(level, "") | |
| print(f"[{timestamp}] {prefix} {message}") | |
| def check_remote_progress(): | |
| """Check HF Hub for the indexing progress file.""" | |
| if not HF_TOKEN: | |
| log_progress("No HF_TOKEN. Assuming local DB or public repo.", "WARN") | |
| try: | |
| progress_path = hf_hub_download( | |
| repo_id=INDEXED_REPO_ID, | |
| filename=PROGRESS_FILENAME, | |
| repo_type="dataset" | |
| ) | |
| with open(progress_path, 'r') as f: | |
| return json.load(f) | |
| except Exception: | |
| return {"indexing_complete": False} # Fallback | |
| try: | |
| api = HfApi() | |
| api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN) | |
| progress_path = hf_hub_download( | |
| repo_id=INDEXED_REPO_ID, | |
| filename=PROGRESS_FILENAME, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| with open(progress_path, 'r') as f: | |
| return json.load(f) | |
| except Exception as e: | |
| log_progress(f"Could not check remote progress: {e}", "WARN") | |
| return {"indexing_complete": False} | |
| def setup_database(): | |
| """ | |
| Downloads the pre-indexed database from Hugging Face Hub. | |
| This should only be called once on app startup. | |
| """ | |
| global DB_PATH | |
| log_progress("Attempting to load indexed database...", "INFO") | |
| local_path = Path(INDEXED_DB_FILENAME) | |
| if local_path.exists() and local_path.stat().st_size > 1000000: | |
| log_progress(f"Found existing local DB: {local_path.resolve()}", "SUCCESS") | |
| DB_PATH = str(local_path.resolve()) | |
| return DB_PATH | |
| progress = check_remote_progress() | |
| if progress.get("indexing_complete"): | |
| try: | |
| log_progress(f"Downloading {INDEXED_DB_FILENAME} from {INDEXED_REPO_ID}...", "INFO") | |
| path = hf_hub_download( | |
| repo_id=INDEXED_REPO_ID, | |
| filename=INDEXED_DB_FILENAME, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| DB_PATH = path | |
| log_progress(f"Indexed DB loaded successfully from Hub: {path}", "SUCCESS") | |
| return path | |
| except Exception as e: | |
| log_progress(f"Failed to download indexed DB: {e}", "ERROR") | |
| return None | |
| else: | |
| log_progress("Remote indexing is not complete or progress file not found.", "ERROR") | |
| return None | |
| def get_db_connection() -> sqlite3.Connection: | |
| """ | |
| Returns a new SQLite connection to the database. | |
| Includes performance PRAGMAs. | |
| """ | |
| if DB_PATH is None: | |
| raise ConnectionError("Database path is not set. The lifespan startup event might have failed.") | |
| conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, check_same_thread=False) | |
| conn.execute("PRAGMA journal_mode = OFF") | |
| conn.execute("PRAGMA synchronous = 0") | |
| conn.execute("PRAGMA cache_size = -256000") # 256MB cache | |
| conn.execute("PRAGMA mmap_size = 4294967296") # 4GB mmap | |
| conn.execute("PRAGMA temp_store = MEMORY") | |
| return conn | |
| # ============================================================================ | |
| # 4. API (FASTAPI) ENDPOINTS | |
| # ============================================================================ | |
| # --- FIX 2: Define the lifespan event manager --- | |
| async def lifespan(app: FastAPI): | |
| # --- Code to run ON STARTUP --- | |
| log_progress("Lifespan startup: Setting up database...", "INFO") | |
| try: | |
| if not setup_database(): | |
| print("\n" + "="*70) | |
| print("β CRITICAL ERROR: Could not set up the database.") | |
| print(f" Please check connection or manually download '{INDEXED_DB_FILENAME}'") | |
| print(f" from '{INDEXED_REPO_ID}' and place it in this directory.") | |
| print("="*70 + "\n") | |
| else: | |
| print(f"β Database is ready at: {DB_PATH}") | |
| except Exception as e: | |
| print(f"β CRITICAL ERROR during database setup: {e}") | |
| traceback.print_exc() | |
| # --- App is now running --- | |
| yield | |
| # --- Code to run ON SHUTDOWN (optional) --- | |
| log_progress("Lifespan shutdown.", "INFO") | |
| # --- FIX 3: Pass the lifespan manager to the FastAPI app --- | |
| app = FastAPI( | |
| title="ConceptNet Explorer API", | |
| version="1.0", | |
| description="A versatile API for querying a ConceptNet SQLite database.", | |
| lifespan=lifespan # <-- Here | |
| ) | |
| def api_docs(): | |
| """API documentation - accessible at /api""" | |
| return { | |
| "name": "ConceptNet Explorer API", | |
| "version": "1.0", | |
| "endpoints": { | |
| "/api/profile/{word}": "Get a full semantic profile for a word.", | |
| "/api/query": "Run a specific query for edges.", | |
| "/api/relations": "List all available relation types.", | |
| "/api/languages": "List all supported languages." | |
| }, | |
| "examples": { | |
| "profile": "/api/profile/dog?lang=en&limit=10", | |
| "query": f"/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA&limit=20" | |
| }, | |
| "note": "Visit the root path (/) for the Gradio UI." | |
| } | |
| def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10): | |
| """ | |
| API Endpoint: Get a full semantic profile for a word as JSON. | |
| """ | |
| if not word or lang not in TARGET_LANGUAGES: | |
| return JSONResponse( | |
| status_code=400, | |
| content={"error": "Invalid input. 'word' is required and 'lang' must be valid."} | |
| ) | |
| word_normalized = word.strip().lower().replace(' ', '_') | |
| like_path = f"{CONCEPTNET_BASE_URI}/c/{lang}/{word_normalized}%" | |
| result = { | |
| "query": {"word": word, "normalized": word_normalized, "lang": lang, "limit_per_relation": limit}, | |
| "nodes_found": [], | |
| "relations": {}, | |
| "total_edges_found": 0 | |
| } | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,)) | |
| result["nodes_found"] = [{"id": nid, "label": label} for nid, label in cursor.fetchall()] | |
| if not result["nodes_found"]: | |
| return JSONResponse( | |
| status_code=404, | |
| content={"error": f"Word '{word}' (normalized: '{word_normalized}') not found in language '{lang}'."} | |
| ) | |
| for rel_name, rel_uri in RELATION_CHOICES: | |
| outgoing = [] | |
| incoming = [] | |
| cursor.execute(""" | |
| SELECT en.label, e.weight, en.id | |
| FROM edge e | |
| JOIN node en ON e.end_id = en.id | |
| WHERE e.start_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC | |
| LIMIT ? | |
| """, (like_path, rel_uri, limit)) | |
| outgoing = [{"target_label": label, "weight": weight, "target_id": eid} | |
| for label, weight, eid in cursor.fetchall()] | |
| cursor.execute(""" | |
| SELECT s.label, e.weight, s.id | |
| FROM edge e | |
| JOIN node s ON e.start_id = s.id | |
| WHERE e.end_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC | |
| LIMIT ? | |
| """, (like_path, rel_uri, limit)) | |
| incoming = [{"source_label": label, "weight": weight, "source_id": sid} | |
| for label, weight, sid in cursor.fetchall()] | |
| if outgoing or incoming: | |
| result["relations"][rel_name] = { | |
| "uri": rel_uri, | |
| "outgoing": outgoing, | |
| "incoming": incoming, | |
| "count": len(outgoing) + len(incoming) | |
| } | |
| result["total_edges_found"] += len(outgoing) + len(incoming) | |
| return JSONResponse(content=result) | |
| except Exception as e: | |
| log_progress(f"API /profile error: {e}", "ERROR") | |
| traceback.print_exc() | |
| return JSONResponse(status_code=500, content={"error": str(e)}) | |
| def query_edges_json( | |
| start_node: Optional[str] = None, | |
| relation_uri: Optional[str] = None, | |
| end_node: Optional[str] = None, | |
| lang: str = 'en', | |
| limit: int = 50 | |
| ): | |
| """ | |
| API Endpoint: Query edges with flexible filters. | |
| """ | |
| query = """ | |
| SELECT | |
| e.id as edge_id, s.id as start_id, r.id as relation_id, en.id as end_id, | |
| e.weight, s.label as start_label, r.label as relation_label, en.label as end_label | |
| FROM edge e | |
| JOIN relation r ON e.rel_id = r.id | |
| JOIN node s ON e.start_id = s.id | |
| JOIN node en ON e.end_id = en.id | |
| WHERE 1=1 | |
| """ | |
| params: List[Any] = [] | |
| try: | |
| def build_node_pattern(node_str: str) -> str: | |
| if node_str.startswith(f'{CONCEPTNET_BASE_URI}/c/'): | |
| return f"{node_str}%" | |
| return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%" | |
| with get_db_connection() as conn: | |
| if start_node: | |
| query += " AND s.id LIKE ?" | |
| params.append(build_node_pattern(start_node)) | |
| if relation_uri: | |
| query += " AND r.id = ?" | |
| params.append(relation_uri) | |
| if end_node: | |
| query += " AND en.id LIKE ?" | |
| params.append(build_node_pattern(end_node)) | |
| query += " ORDER BY e.weight DESC LIMIT ?" | |
| params.append(limit) | |
| df = pd.read_sql_query(query, conn, params=params) | |
| return { | |
| "query": {"start_node": start_node, "relation_uri": relation_uri, "end_node": end_node, "lang": lang, "limit": limit}, | |
| "results": df.to_dict(orient='records'), | |
| "count": len(df) | |
| } | |
| except Exception as e: | |
| log_progress(f"API /query error: {e}", "ERROR") | |
| traceback.print_exc() | |
| return JSONResponse(status_code=500, content={"error": str(e)}) | |
| def api_relations(): | |
| """API Endpoint: List all configured relations.""" | |
| return JSONResponse(content={"relations": CONCEPTNET_RELATIONS}) | |
| def api_languages(): | |
| """API Endpoint: List all configured languages.""" | |
| return JSONResponse(content={"languages": TARGET_LANGUAGES}) | |
| # ============================================================================ | |
| # 5. GRADIO UI HELPER FUNCTIONS | |
| # ============================================================================ | |
| def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()): | |
| """ | |
| Gradio UI Function: Get semantic profile formatted as Markdown. | |
| """ | |
| log_progress(f"UI Profile: {word} ({lang})", "INFO") | |
| if not word or lang not in TARGET_LANGUAGES: | |
| return "β οΈ Invalid input. Please provide a word and select a language." | |
| progress(0, desc="π Starting...") | |
| word_normalized = word.strip().lower().replace(' ', '_') | |
| like_path = f"{CONCEPTNET_BASE_URI}/c/{lang}/{word_normalized}%" | |
| output_md = f"# π§ Semantic Profile: '{word}' ({lang.upper()})\n\n" | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| progress(0.05, desc="π Finding nodes...") | |
| cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,)) | |
| nodes = cursor.fetchall() | |
| if not nodes: | |
| return f"# π§ '{word}'\n\nβ οΈ **Word not found** (as `.../c/{lang}/{word_normalized}...`)" | |
| log_progress(f"Found {len(nodes)} nodes", "SUCCESS") | |
| output_md += "**Matching Nodes:**\n" | |
| for node_id, label in nodes[:3]: | |
| output_md += f"- **{label}** (ID: `{node_id}`)\n" | |
| output_md += "\n---\n" | |
| total_found = 0 | |
| num_relations = len(RELATION_CHOICES) | |
| for i, (rel_name, rel_uri) in enumerate(RELATION_CHOICES): | |
| progress((i + 0.1) / num_relations, desc=f"π {rel_name}...") | |
| cursor.execute(""" | |
| SELECT en.label, e.weight | |
| FROM edge e JOIN node en ON e.end_id = en.id | |
| WHERE e.start_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC LIMIT 10 | |
| """, (like_path, rel_uri)) | |
| outgoing = cursor.fetchall() | |
| cursor.execute(""" | |
| SELECT s.label, e.weight | |
| FROM edge e JOIN node s ON e.start_id = s.id | |
| WHERE e.end_id LIKE ? AND e.rel_id = ? | |
| ORDER BY e.weight DESC LIMIT 10 | |
| """, (like_path, rel_uri)) | |
| incoming = cursor.fetchall() | |
| if outgoing or incoming: | |
| output_md += f"### {rel_name} (`{rel_uri}`)\n\n" | |
| total_found += len(outgoing) + len(incoming) | |
| for label, weight in outgoing: | |
| output_md += f"- **{word}** β *{label}* `[{weight:.3f}]`\n" | |
| for label, weight in incoming: | |
| output_md += f"- *{label}* β **{word}** `[{weight:.3f}]`\n" | |
| output_md += "\n" | |
| progress((i + 1) / num_relations, desc=f"β {rel_name}") | |
| progress(1.0, desc="β Complete!") | |
| output_md += f"---\n**Total relations found:** {total_found}\n" | |
| log_progress(f"Complete: {total_found} relations", "SUCCESS") | |
| return output_md | |
| except Exception as e: | |
| log_progress(f"UI Profile Error: {e}", "ERROR") | |
| traceback.print_exc() | |
| return f"**β An unexpected error occurred:**\n\n```\n{e}\n```" | |
| def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, limit: int, progress=gr.Progress()): | |
| """ | |
| Gradio UI Function: Query builder. | |
| """ | |
| log_progress(f"UI Query: start={start_node}, rel={relation_uri}, end={end_node}, lang={lang}", "INFO") | |
| progress(0, desc="π Building query...") | |
| query = """ | |
| SELECT | |
| s.label as start_label, r.label as relation_label, en.label as end_label, | |
| e.weight, s.id as start_id, r.id as relation_id, en.id as end_id, e.id as edge_id | |
| FROM edge e | |
| JOIN relation r ON e.rel_id = r.id | |
| JOIN node s ON e.start_id = s.id | |
| JOIN node en ON e.end_id = en.id | |
| WHERE 1=1 | |
| """ | |
| params: List[Any] = [] | |
| try: | |
| def build_node_pattern(node_str: str) -> str: | |
| if node_str.strip().startswith(f'{CONCEPTNET_BASE_URI}/c/'): | |
| return f"{node_str.strip()}%" | |
| return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%" | |
| with get_db_connection() as conn: | |
| progress(0.3, desc="π Applying filters...") | |
| if start_node and start_node.strip(): | |
| query += " AND s.id LIKE ?" | |
| params.append(build_node_pattern(start_node)) | |
| if relation_uri and relation_uri.strip(): | |
| query += " AND r.id = ?" | |
| params.append(relation_uri) | |
| if end_node and end_node.strip(): | |
| query += " AND en.id LIKE ?" | |
| params.append(build_node_pattern(end_node)) | |
| query += " ORDER BY e.weight DESC LIMIT ?" | |
| params.append(limit) | |
| progress(0.6, desc="β‘ Running query...") | |
| start_time = time.time() | |
| df = pd.read_sql_query(query, conn, params=params) | |
| elapsed = time.time() - start_time | |
| progress(1.0, desc="β Done!") | |
| log_progress(f"Query Done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS") | |
| if df.empty: | |
| return pd.DataFrame(), f"β οΈ No results found ({elapsed:.2f}s)" | |
| cols_to_show = [ | |
| 'start_label', 'relation_label', 'end_label', 'weight', | |
| 'start_id', 'relation_id', 'end_id' | |
| ] | |
| df = df[cols_to_show] | |
| return df, f"β {len(df)} results in {elapsed:.2f}s" | |
| except Exception as e: | |
| log_progress(f"UI Query Error: {e}", "ERROR") | |
| traceback.print_exc() | |
| return pd.DataFrame(), f"β **Error:**\n\n```\n{e}\n```" | |
| def run_raw_query_ui(sql_query: str): | |
| """ | |
| Gradio UI Function: Raw SQL query. | |
| """ | |
| if not sql_query.strip().upper().startswith("SELECT"): | |
| return pd.DataFrame(), "β **Error:** Only `SELECT` statements are allowed." | |
| try: | |
| with get_db_connection() as conn: | |
| start = time.time() | |
| df = pd.read_sql_query(sql_query, conn) | |
| elapsed = time.time() - start | |
| return df, f"β {len(df)} rows in {elapsed:.3f}s" | |
| except Exception as e: | |
| return pd.DataFrame(), f"β **Error:**\n\n```\n{e}\n```" | |
| def get_schema_info_ui(): | |
| """ | |
| Gradio UI Function: Display schema information. | |
| """ | |
| md = f"# π Database Schema\n\n" | |
| md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n" | |
| if DB_PATH: | |
| md += f"**Database File:** `{DB_PATH}`\n\n" | |
| else: | |
| md += "**Database File:** `NOT LOADED`\n\n" | |
| md += "β οΈ **Database not loaded.** Schema info may be incomplete. Check logs.\n" | |
| return md | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| md += "## Tables & Row Counts\n\n" | |
| cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'") | |
| tables = [row[0] for row in cursor.fetchall()] | |
| for table in tables: | |
| cursor.execute(f"SELECT COUNT(*) FROM {table}") | |
| count = cursor.fetchone()[0] | |
| md += f"- **{table}:** {count:,} rows\n" | |
| md += "\n## Configured Relations\n\n" | |
| md += "This list populates the 'Query Builder' dropdown. The values are the full URLs used in the DB.\n\n" | |
| for name, uri in RELATION_CHOICES: | |
| md += f"- **{name}:** `{uri}`\n" | |
| except Exception as e: | |
| md += f"\n**β Error loading schema:**\n\n```\n{e}\n```\n" | |
| return md | |
| # ============================================================================ | |
| # 6. GRADIO UI DEFINITION | |
| # ============================================================================ | |
| def create_gradio_ui(): | |
| """Builds the consolidated Gradio interface.""" | |
| with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft(primary_hue="blue")) as demo: | |
| gr.Markdown( | |
| "# π§ ConceptNet Explorer\n" | |
| "An interface for querying the ConceptNet semantic network." | |
| ) | |
| gr.Markdown( | |
| f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | " | |
| f"**Database:** `{INDEXED_REPO_ID}` | " | |
| f"**JSON API:** Access at `/api`" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("π Semantic Profile"): | |
| gr.Markdown("Explore all semantic relations for a single word. This queries all 34 relation types.") | |
| with gr.Row(): | |
| profile_word_input = gr.Textbox( | |
| label="Word", | |
| placeholder="e.g., dog", | |
| value="dog", | |
| info="Enter a word" | |
| ) | |
| profile_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language") | |
| profile_btn = gr.Button("π Get Semantic Profile", variant="primary", size="lg") | |
| profile_output = gr.Markdown(label="Profile Results") | |
| gr.Examples( | |
| examples=[["dog", "en"], ["Hund", "de"], ["perro", "es"], ["chien", "fr"], ["gatto", "it"]], | |
| inputs=[profile_word_input, profile_lang_input] | |
| ) | |
| with gr.TabItem("β‘ Query Builder"): | |
| gr.Markdown("Construct a specific query using nodes and a relation.") | |
| with gr.Row(): | |
| query_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Node Language") | |
| query_limit_slider = gr.Slider(label="Result Limit", minimum=1, maximum=500, value=50, step=10) | |
| with gr.Row(): | |
| query_start_input = gr.Textbox( | |
| label="Start Node", | |
| placeholder="e.g., dog (word) or /c/en/dog (URI)" | |
| ) | |
| query_rel_input = gr.Dropdown( | |
| label="Relation", | |
| choices=RELATION_CHOICES, | |
| value=f"{CONCEPTNET_BASE_URI}/r/IsA" | |
| ) | |
| query_end_input = gr.Textbox( | |
| label="End Node", | |
| placeholder="e.g., animal (word) or /c/en/animal (URI)" | |
| ) | |
| query_btn = gr.Button("βΆοΈ Run Query", variant="primary", size="lg") | |
| query_status_output = gr.Markdown() | |
| query_results_output = gr.DataFrame( | |
| label="Query Results", | |
| wrap=True, | |
| interactive=False | |
| ) | |
| with gr.TabItem("π» Raw SQL"): | |
| gr.Markdown( | |
| "**Warning:** Directly query the SQLite database. Only `SELECT` statements are allowed. " | |
| "Use the 'Schema' tab to see table names." | |
| ) | |
| raw_sql_input = gr.Textbox( | |
| label="SQL Query", | |
| value=f"SELECT s.label, r.label, en.label, e.weight\nFROM edge e\nJOIN relation r ON e.rel_id = r.id\nJOIN node s ON e.start_id = s.id\nJOIN node en ON e.end_id = en.id\nWHERE s.id LIKE '{CONCEPTNET_BASE_URI}/c/en/dog%'\n AND r.id = '{CONCEPTNET_BASE_URI}/r/IsA'\nORDER BY e.weight DESC\nLIMIT 10", | |
| lines=5, | |
| max_lines=20 | |
| ) | |
| raw_btn = gr.Button("βΆοΈ Execute SQL") | |
| raw_status = gr.Markdown() | |
| raw_results = gr.DataFrame(label="SQL Results", interactive=False) | |
| with gr.TabItem("π Schema") as schema_tab: | |
| gr.Markdown("View the database schema and table counts.") | |
| schema_btn = gr.Button("π Load Schema Info") | |
| schema_output = gr.Markdown() | |
| with gr.TabItem("π API Docs"): | |
| gr.Markdown( | |
| "## JSON API Endpoints\n" | |
| "This Gradio app is mounted on a FastAPI server. You can use the following JSON API endpoints directly." | |
| ) | |
| gr.JSON({ | |
| "docs": "/api", | |
| "profile": "/api/profile/{word}?lang=en&limit=10", | |
| "query": f"/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA&lang=en&limit=50", | |
| "relations": "/api/relations", | |
| "languages": "/api/languages" | |
| }, label="API Endpoints") | |
| gr.Markdown( | |
| "### Example (cURL)\n" | |
| "```bash\n# (Assumes app is running at localhost:7860)\ncurl http://localhost:7860/api/profile/dog?lang=en\n```\n" | |
| f"```bash\ncurl 'http://localhost:7860/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA&limit=10'\n```" | |
| ) | |
| # --- Link UI components to functions --- | |
| profile_btn.click( | |
| fn=get_semantic_profile_ui, | |
| inputs=[profile_word_input, profile_lang_input], | |
| outputs=[profile_output], | |
| api_name="get_semantic_profile" | |
| ) | |
| query_btn.click( | |
| fn=run_query_ui, | |
| inputs=[query_start_input, query_rel_input, query_end_input, query_lang_input, query_limit_slider], | |
| outputs=[query_results_output, query_status_output], | |
| api_name="run_query" | |
| ) | |
| raw_btn.click( | |
| fn=run_raw_query_ui, | |
| inputs=[raw_sql_input], | |
| outputs=[raw_results, raw_status] | |
| ) | |
| schema_tab.select( | |
| fn=get_schema_info_ui, | |
| inputs=None, | |
| outputs=[schema_output] | |
| ) | |
| schema_btn.click( | |
| fn=get_schema_info_ui, | |
| inputs=None, | |
| outputs=[schema_output] | |
| ) | |
| return demo | |
| # ============================================================================ | |
| # 7. APP MOUNTING & LAUNCH | |
| # ============================================================================ | |
| # --- FIX 4: REMOVE the top-level call to setup_database() --- | |
| # This is now handled by the FastAPI lifespan event | |
| log_progress("Creating Gradio UI...", "INFO") | |
| demo = create_gradio_ui() | |
| log_progress("Mounting Gradio UI onto FastAPI app...", "INFO") | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| # ============================================================================ | |
| # 8. MAIN EXECUTION BLOCK | |
| # ============================================================================ | |
| # This block is for LOCAL execution (e.g., `python app.py`) | |
| # The Hugging Face platform will IGNORE this and run `app` using Gunicorn | |
| if __name__ == "__main__": | |
| log_progress("="*60, "SUCCESS") | |
| log_progress("π (LOCAL) CONCEPTNET EXPLORER APP READY!", "SUCCESS") | |
| log_progress("="*60, "SUCCESS") | |
| log_progress("UI: http://localhost:7860/", "INFO") | |
| log_progress("API: http://localhost:7860/api", "INFO") | |
| log_progress(f" http://localhost:7860/api/profile/dog", "INFO") | |
| log_progress(f" http://localhost:7860/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA", "INFO") | |
| log_progress("="*60, "SUCCESS") | |
| uvicorn.run( | |
| "app:app", # Run the 'app' object from the 'app' file | |
| host="0.0.0.0", | |
| port=7860, | |
| reload=True # Enable reload for local dev | |
| ) |