Spaces:
Running
Running
| import gradio as gr | |
| import sqlite3 | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| import traceback | |
| # --- 1. Download and Cache the Database --- | |
| print("Downloading database (if not already cached)...") | |
| REPO_ID = "ysenarath/conceptnet-sqlite" | |
| DB_FILENAME = "data/conceptnet-v5.7.0.db" | |
| # This is the critical caching step for HF Spaces | |
| DB_PATH = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=DB_FILENAME, | |
| repo_type="dataset" | |
| ) | |
| print(f"Database is available at: {DB_PATH}") | |
| # --- 2. Database Helper Functions --- | |
| def get_db_connection(): | |
| """ | |
| Creates a new read-only connection to the SQLite database. | |
| This is safer for Gradio's multi-threading. | |
| """ | |
| try: | |
| # Connect in read-only mode | |
| db_uri = f"file:{DB_PATH}?mode=ro" | |
| conn = sqlite3.connect(db_uri, uri=True) | |
| return conn | |
| except Exception as e: | |
| print(f"Error connecting to DB: {e}") | |
| traceback.print_exc() | |
| return None | |
| def get_schema_info(): | |
| """ | |
| Dynamically queries the SQLite database to get its schema. | |
| """ | |
| print("Getting schema info...") | |
| schema_md = "# Database Schema\n\n" | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';") | |
| tables = cursor.fetchall() | |
| if not tables: | |
| return "Could not find any tables in the database." | |
| for table in tables: | |
| table_name = table[0] | |
| schema_md += f"## Table: `{table_name}`\n\n" | |
| schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n" | |
| schema_md += "|:------------|:----------|:---------|:------------|\n" | |
| cursor.execute(f"PRAGMA table_info({table_name});") | |
| columns = cursor.fetchall() | |
| for col in columns: | |
| # col structure: (cid, name, type, notnull, dflt_value, pk) | |
| name, dtype, notnull, pk = col[1], col[2], col[3], col[5] | |
| schema_md += f"| `{name}` | `{dtype}` | {bool(notnull)} | {bool(pk)} |\n" | |
| schema_md += "\n" | |
| return schema_md | |
| except Exception as e: | |
| print(f"Error in get_schema_info: {e}") | |
| traceback.print_exc() | |
| return f"An error occurred while fetching schema: {e}" | |
| def run_query(start_node, relation, end_node, limit): | |
| """ | |
| Runs an efficient query by filtering on text paths first, | |
| then joining by indexed IDs. | |
| *** FIX ***: This now queries the `node.id` column (which contains the full path) | |
| and the `relation.label` column, based on the raw query results. | |
| """ | |
| print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'") | |
| # --- FIX: Select the `id` column (which has the path) --- | |
| select_clause = """ | |
| SELECT | |
| e.id AS edge_id, | |
| s.id AS start_id_path, | |
| r.label AS relation_label, | |
| en.id AS end_id_path, | |
| e.weight, | |
| e.dataset, | |
| e.surface_text, | |
| s.label AS start_label_text, | |
| en.label AS end_label_text | |
| FROM | |
| edge AS e | |
| JOIN | |
| relation AS r ON e.rel_id = r.id | |
| JOIN | |
| node AS s ON e.start_id = s.id | |
| JOIN | |
| node AS en ON e.end_id = en.id | |
| """ | |
| where_conditions = [] | |
| params = [] | |
| try: | |
| # --- FIX: Query `node.id` for nodes, `relation.label` for relations --- | |
| if start_node: | |
| # We add a wildcard to the front to match the full http://... ID | |
| # unless the user has already added their own wildcard. | |
| param_val = start_node if "%" in start_node else f"%{start_node}" | |
| where_conditions.append(f"e.start_id IN (SELECT id FROM node WHERE id LIKE ?)") | |
| params.append(param_val) | |
| if relation: | |
| # Relation table uses `label` which is a direct match (e.g., /r/IsA) | |
| op = "LIKE" if "%" in relation else "=" | |
| where_conditions.append(f"e.rel_id IN (SELECT id FROM relation WHERE label {op} ?)") | |
| params.append(relation) | |
| if end_node: | |
| # We add a wildcard to the front to match the full http://... ID | |
| param_val = end_node if "%" in end_node else f"%{end_node}" | |
| where_conditions.append(f"e.end_id IN (SELECT id FROM node WHERE id LIKE ?)") | |
| params.append(param_val) | |
| # --- End of Fix --- | |
| if not where_conditions: | |
| where_clause = " WHERE 1=1" # Get random edges if no filter | |
| else: | |
| where_clause = " WHERE " + " AND ".join(where_conditions) | |
| query = select_clause + where_clause + " LIMIT ?" | |
| params.append(limit) | |
| print(f"Executing SQL: {query}") | |
| print(f"With params: {params}") | |
| with get_db_connection() as conn: | |
| df = pd.read_sql_query(query, conn, params=params) | |
| if df.empty: | |
| return pd.DataFrame(), "Query ran successfully but returned no results. (This is common if your query is very specific)." | |
| return df, "Query successful!" | |
| except Exception as e: | |
| print(f"Error in run_query: {e}") | |
| traceback.print_exc() | |
| err_msg = f"**Query Failed!**\n\n`{e}`" | |
| return pd.DataFrame(), err_msg | |
| def run_raw_query(sql_query): | |
| """ | |
| Executes a raw, read-only SQL query against the database. | |
| DANGEROUS but useful for debugging. | |
| """ | |
| print(f"Running raw query: {sql_query}") | |
| # Basic safety check for read-only | |
| if not sql_query.strip().upper().startswith("SELECT"): | |
| return pd.DataFrame(), "**Error:** Only `SELECT` statements are allowed for safety." | |
| try: | |
| with get_db_connection() as conn: | |
| df = pd.read_sql_query(sql_query, conn) | |
| if df.empty: | |
| return df, "Query ran successfully but returned no results." | |
| return df, "Raw query successful!" | |
| except Exception as e: | |
| print(f"Error in run_raw_query: {e}") | |
| traceback.print_exc() | |
| return pd.DataFrame(), f"**Query Failed!**\n\n`{e}`" | |
| # --- 3. Build the Gradio UI --- | |
| with gr.Blocks(title="ConceptNet SQLite Explorer") as demo: | |
| gr.Markdown(f"# ConceptNet SQLite Explorer (DB: `{DB_FILENAME}`)") | |
| gr.Markdown(f"**Note:** Initial query might take a few seconds as the database warms up.") | |
| with gr.Tabs(): | |
| with gr.TabItem("Query Builder"): | |
| gr.Markdown( | |
| "**Run a query against the database.**\n" | |
| "This query joins the `edge`, `node`, and `relation` tables to find connections.\n" | |
| "Use the *path part* of the ID (e.g., `/c/en/dog` or `/r/Is%`)." | |
| ) | |
| with gr.Row(): | |
| # --- FIX: Updated labels for clarity --- | |
| start_input = gr.Textbox(label="Start Node Path", placeholder="/c/en/dog") | |
| rel_input = gr.Textbox(label="Relation Path", placeholder="/r/IsA") | |
| end_input = gr.Textbox(label="End Node Path", placeholder="/c/en/animal") | |
| limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1) | |
| query_btn = gr.Button("Run Query", variant="primary") | |
| status_output = gr.Markdown("Status will appear here...") | |
| results_output = gr.DataFrame(label="Query Results", interactive=False) | |
| with gr.TabItem("Raw SQL Query"): | |
| gr.Markdown("**Danger Zone:** Run a raw `SELECT` query against the database. Use the Schema Explorer tab to see table/column names.") | |
| # --- FIX: Updated placeholder to a useful, correct query --- | |
| raw_sql_input = gr.Textbox( | |
| label="Raw SQL Query", | |
| placeholder="SELECT id, label, language, path FROM node WHERE id LIKE '%/c/en/dog' LIMIT 5", | |
| lines=5 | |
| ) | |
| raw_query_btn = gr.Button("Run Raw SQL", variant="secondary") | |
| raw_status_output = gr.Markdown("Status will appear here...") | |
| raw_results_output = gr.DataFrame(label="Raw Query Results", interactive=False) | |
| with gr.TabItem("Schema Explorer"): | |
| gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.") | |
| schema_btn = gr.Button("Show Database Schema", variant="secondary") | |
| schema_output = gr.Markdown("Schema will appear here...") | |
| # --- 4. Connect UI Elements to Functions --- | |
| query_btn.click( | |
| fn=run_query, | |
| inputs=[start_input, rel_input, end_input, limit_slider], | |
| outputs=[results_output, status_output], | |
| api_name="run_query" | |
| ) | |
| raw_query_btn.click( | |
| fn=run_raw_query, | |
| inputs=[raw_sql_input], | |
| outputs=[raw_results_output, raw_status_output], | |
| api_name="run_raw_query" | |
| ) | |
| schema_btn.click( | |
| fn=get_schema_info, | |
| inputs=None, | |
| outputs=schema_output, | |
| api_name="get_schema" | |
| ) | |
| if __name__ == "__main__": | |
| # Removed experimental ssr_mode=False | |
| demo.launch(ssr_mode=False) |