import gradio as gr import sqlite3 import pandas as pd from huggingface_hub import hf_hub_download import os import traceback # --- 1. Download and Cache the Database --- # This is the cricital step for HF Spaces. # hf_hub_download caches the file. When your app.py restarts, # it will find the file in the cache and NOT re-download 19.3 GB. print("Downloading database (if not already cached)...") REPO_ID = "ysenarath/conceptnet-sqlite" DB_FILENAME = "data/conceptnet.db" DB_PATH = hf_hub_download( repo_id=REPO_ID, filename=DB_FILENAME, repo_type="dataset" ) print(f"Database is available at: {DB_PATH}") # --- 2. Database Helper Functions --- def get_db_connection(): """ Creates a new read-only connection to the SQLite database. This is safer for Gradio's multi-threading. """ try: # Connect in read-only mode db_uri = f"file:{DB_PATH}?mode=ro" conn = sqlite3.connect(db_uri, uri=True) return conn except Exception as e: print(f"Error connecting to DB: {e}") traceback.print_exc() return None def get_schema_info(): """ Dynamically queries the SQLite database to get its schema. """ print("Getting schema info...") schema_md = "# Database Schema\n\n" try: with get_db_connection() as conn: cursor = conn.cursor() # Get all table names cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';") tables = cursor.fetchall() if not tables: return "Could not find any tables in the database." for table in tables: table_name = table[0] schema_md += f"## Table: `{table_name}`\n\n" schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n" schema_md += "|:------------|:----------|:---------|:------------|\n" # Get column info for each table cursor.execute(f"PRAGMA table_info({table_name});") columns = cursor.fetchall() for col in columns: # col structure: (cid, name, type, notnull, dflt_value, pk) name = col[1] dtype = col[2] notnull = bool(col[3]) pk = bool(col[5]) schema_md += f"| `{name}` | `{dtype}` | {notnull} | {pk} |\n" schema_md += "\n" return schema_md except Exception as e: print(f"Error in get_schema_info: {e}") traceback.print_exc() return f"An error occurred while fetching schema: {e}" def run_query(start_node, relation, end_node, limit): """ Runs a query against the ConceptNet database. *** ASSUMPTION ***: This function *assumes* the main table is named 'edges' and has columns 'start', 'rel', and 'end'. Use the 'Schema Explorer' tab to verify this. If the names are different (e.g., 'conceptnet_edges'), you must edit this function. """ print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'") # --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! --- TABLE_NAME = "edges" START_COL = "start" REL_COL = "rel" END_COL = "end" # --- query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1" params = [] try: if start_node: query += f" AND {START_COL} LIKE ?" params.append(start_node) if relation: query += f" AND {REL_COL} LIKE ?" params.append(relation) if end_node: query += f" AND {END_COL} LIKE ?" params.append(end_node) query += " LIMIT ?" params.append(limit) print(f"Executing SQL: {query}") print(f"With params: {params}") with get_db_connection() as conn: df = pd.read_sql_query(query, conn, params=params) if df.empty: return pd.DataFrame(), "Query ran successfully but returned no results." return df, "Query successful!" except Exception as e: print(f"Error in run_query: {e}") traceback.print_exc() err_msg = f"**Query Failed!**\n\n`{e}`\n\n**Tip:** Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different." return pd.DataFrame(), err_msg # --- 3. Build the Gradio UI --- with gr.Blocks(title="ConceptNet SQLite Explorer") as demo: gr.Markdown("# ConceptNet SQLite Explorer") gr.Markdown(f"Successfully loaded database from `{DB_PATH}`") with gr.Tabs(): with gr.TabItem("Schema Explorer"): gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.") schema_btn = gr.Button("Show Database Schema", variant="secondary") schema_output = gr.Markdown("Schema will appear here...") with gr.TabItem("Query Edges"): gr.Markdown( "**Run a query against the database.**\n" f"This tab *assumes* the table is named `edges` and columns are `start`, `rel`, and `end`.\n" "Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)." ) with gr.Row(): start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog") rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA") end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal") limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1) query_btn = gr.Button("Run Query", variant="primary") status_output = gr.Markdown("Status will appear here...") results_output = gr.DataFrame(label="Query Results", interactive=False) # --- 4. Connect UI Elements to Functions --- schema_btn.click( fn=get_schema_info, inputs=None, outputs=schema_output, api_name="get_schema" ) query_btn.click( fn=run_query, inputs=[start_input, rel_input, end_input, limit_slider], outputs=[results_output, status_output], api_name="run_query" ) if __name__ == "__main__": demo.launch()