Spaces:
Running
Running
| import gradio as gr | |
| import sqlite3 | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| import traceback | |
| # --- 1. Download and Cache the Database --- | |
| # This is the cricital step for HF Spaces. | |
| # hf_hub_download caches the file. When your app.py restarts, | |
| # it will find the file in the cache and NOT re-download 19.3 GB. | |
| print("Downloading database (if not already cached)...") | |
| REPO_ID = "ysenarath/conceptnet-sqlite" | |
| DB_FILENAME = "data/conceptnet.db" | |
| DB_PATH = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=DB_FILENAME, | |
| repo_type="dataset" | |
| ) | |
| print(f"Database is available at: {DB_PATH}") | |
| # --- 2. Database Helper Functions --- | |
| def get_db_connection(): | |
| """ | |
| Creates a new read-only connection to the SQLite database. | |
| This is safer for Gradio's multi-threading. | |
| """ | |
| try: | |
| # Connect in read-only mode | |
| db_uri = f"file:{DB_PATH}?mode=ro" | |
| conn = sqlite3.connect(db_uri, uri=True) | |
| return conn | |
| except Exception as e: | |
| print(f"Error connecting to DB: {e}") | |
| traceback.print_exc() | |
| return None | |
| def get_schema_info(): | |
| """ | |
| Dynamically queries the SQLite database to get its schema. | |
| """ | |
| print("Getting schema info...") | |
| schema_md = "# Database Schema\n\n" | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| # Get all table names | |
| cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';") | |
| tables = cursor.fetchall() | |
| if not tables: | |
| return "Could not find any tables in the database." | |
| for table in tables: | |
| table_name = table[0] | |
| schema_md += f"## Table: `{table_name}`\n\n" | |
| schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n" | |
| schema_md += "|:------------|:----------|:---------|:------------|\n" | |
| # Get column info for each table | |
| cursor.execute(f"PRAGMA table_info({table_name});") | |
| columns = cursor.fetchall() | |
| for col in columns: | |
| # col structure: (cid, name, type, notnull, dflt_value, pk) | |
| name = col[1] | |
| dtype = col[2] | |
| notnull = bool(col[3]) | |
| pk = bool(col[5]) | |
| schema_md += f"| `{name}` | `{dtype}` | {notnull} | {pk} |\n" | |
| schema_md += "\n" | |
| return schema_md | |
| except Exception as e: | |
| print(f"Error in get_schema_info: {e}") | |
| traceback.print_exc() | |
| return f"An error occurred while fetching schema: {e}" | |
| def run_query(start_node, relation, end_node, limit): | |
| """ | |
| Runs a query against the ConceptNet database. | |
| *** ASSUMPTION ***: | |
| This function *assumes* the main table is named 'edges' and has | |
| columns 'start', 'rel', and 'end'. | |
| Use the 'Schema Explorer' tab to verify this. If the names are | |
| different (e.g., 'conceptnet_edges'), you must edit this function. | |
| """ | |
| print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'") | |
| # --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! --- | |
| TABLE_NAME = "edges" | |
| START_COL = "start" | |
| REL_COL = "rel" | |
| END_COL = "end" | |
| # --- | |
| query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1" | |
| params = [] | |
| try: | |
| if start_node: | |
| query += f" AND {START_COL} LIKE ?" | |
| params.append(start_node) | |
| if relation: | |
| query += f" AND {REL_COL} LIKE ?" | |
| params.append(relation) | |
| if end_node: | |
| query += f" AND {END_COL} LIKE ?" | |
| params.append(end_node) | |
| query += " LIMIT ?" | |
| params.append(limit) | |
| print(f"Executing SQL: {query}") | |
| print(f"With params: {params}") | |
| with get_db_connection() as conn: | |
| df = pd.read_sql_query(query, conn, params=params) | |
| if df.empty: | |
| return pd.DataFrame(), "Query ran successfully but returned no results." | |
| return df, "Query successful!" | |
| except Exception as e: | |
| print(f"Error in run_query: {e}") | |
| traceback.print_exc() | |
| err_msg = f"**Query Failed!**\n\n`{e}`\n\n**Tip:** Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different." | |
| return pd.DataFrame(), err_msg | |
| # --- 3. Build the Gradio UI --- | |
| with gr.Blocks(title="ConceptNet SQLite Explorer") as demo: | |
| gr.Markdown("# ConceptNet SQLite Explorer") | |
| gr.Markdown(f"Successfully loaded database from `{DB_PATH}`") | |
| with gr.Tabs(): | |
| with gr.TabItem("Schema Explorer"): | |
| gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.") | |
| schema_btn = gr.Button("Show Database Schema", variant="secondary") | |
| schema_output = gr.Markdown("Schema will appear here...") | |
| with gr.TabItem("Query Edges"): | |
| gr.Markdown( | |
| "**Run a query against the database.**\n" | |
| f"This tab *assumes* the table is named `edges` and columns are `start`, `rel`, and `end`.\n" | |
| "Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)." | |
| ) | |
| with gr.Row(): | |
| start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog") | |
| rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA") | |
| end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal") | |
| limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1) | |
| query_btn = gr.Button("Run Query", variant="primary") | |
| status_output = gr.Markdown("Status will appear here...") | |
| results_output = gr.DataFrame(label="Query Results", interactive=False) | |
| # --- 4. Connect UI Elements to Functions --- | |
| schema_btn.click( | |
| fn=get_schema_info, | |
| inputs=None, | |
| outputs=schema_output, | |
| api_name="get_schema" | |
| ) | |
| query_btn.click( | |
| fn=run_query, | |
| inputs=[start_input, rel_input, end_input, limit_slider], | |
| outputs=[results_output, status_output], | |
| api_name="run_query" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |