Spaces:

cstr
/

conceptnet_db

Running

File size: 6,509 Bytes

9fc6811

import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import traceback

# --- 1. Download and Cache the Database ---
# This is the cricital step for HF Spaces.
# hf_hub_download caches the file. When your app.py restarts,
# it will find the file in the cache and NOT re-download 19.3 GB.
print("Downloading database (if not already cached)...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet.db"
DB_PATH = hf_hub_download(
    repo_id=REPO_ID, 
    filename=DB_FILENAME, 
    repo_type="dataset"
)
print(f"Database is available at: {DB_PATH}")

# --- 2. Database Helper Functions ---

def get_db_connection():
    """
    Creates a new read-only connection to the SQLite database.
    This is safer for Gradio's multi-threading.
    """
    try:
        # Connect in read-only mode
        db_uri = f"file:{DB_PATH}?mode=ro"
        conn = sqlite3.connect(db_uri, uri=True)
        return conn
    except Exception as e:
        print(f"Error connecting to DB: {e}")
        traceback.print_exc()
        return None

def get_schema_info():
    """
    Dynamically queries the SQLite database to get its schema.
    """
    print("Getting schema info...")
    schema_md = "# Database Schema\n\n"
    try:
        with get_db_connection() as conn:
            cursor = conn.cursor()
            
            # Get all table names
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
            tables = cursor.fetchall()
            
            if not tables:
                return "Could not find any tables in the database."

            for table in tables:
                table_name = table[0]
                schema_md += f"## Table: `{table_name}`\n\n"
                schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
                schema_md += "|:------------|:----------|:---------|:------------|\n"
                
                # Get column info for each table
                cursor.execute(f"PRAGMA table_info({table_name});")
                columns = cursor.fetchall()
                for col in columns:
                    # col structure: (cid, name, type, notnull, dflt_value, pk)
                    name = col[1]
                    dtype = col[2]
                    notnull = bool(col[3])
                    pk = bool(col[5])
                    schema_md += f"| `{name}` | `{dtype}` | {notnull} | {pk} |\n"
                schema_md += "\n"
        
        return schema_md
    except Exception as e:
        print(f"Error in get_schema_info: {e}")
        traceback.print_exc()
        return f"An error occurred while fetching schema: {e}"

def run_query(start_node, relation, end_node, limit):
    """
    Runs a query against the ConceptNet database.
    
    *** ASSUMPTION ***:
    This function *assumes* the main table is named 'edges' and has
    columns 'start', 'rel', and 'end'.
    
    Use the 'Schema Explorer' tab to verify this. If the names are
    different (e.g., 'conceptnet_edges'), you must edit this function.
    """
    print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
    
    # --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! ---
    TABLE_NAME = "edges" 
    START_COL = "start"
    REL_COL = "rel"
    END_COL = "end"
    # ---
    
    query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1"
    params = []
    
    try:
        if start_node:
            query += f" AND {START_COL} LIKE ?"
            params.append(start_node)
        if relation:
            query += f" AND {REL_COL} LIKE ?"
            params.append(relation)
        if end_node:
            query += f" AND {END_COL} LIKE ?"
            params.append(end_node)
            
        query += " LIMIT ?"
        params.append(limit)
        
        print(f"Executing SQL: {query}")
        print(f"With params: {params}")

        with get_db_connection() as conn:
            df = pd.read_sql_query(query, conn, params=params)
        
        if df.empty:
            return pd.DataFrame(), "Query ran successfully but returned no results."
            
        return df, "Query successful!"

    except Exception as e:
        print(f"Error in run_query: {e}")
        traceback.print_exc()
        err_msg = f"**Query Failed!**\n\n`{e}`\n\n**Tip:** Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different."
        return pd.DataFrame(), err_msg

# --- 3. Build the Gradio UI ---

with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
    gr.Markdown("# ConceptNet SQLite Explorer")
    gr.Markdown(f"Successfully loaded database from `{DB_PATH}`")
    
    with gr.Tabs():
        with gr.TabItem("Schema Explorer"):
            gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
            schema_btn = gr.Button("Show Database Schema", variant="secondary")
            schema_output = gr.Markdown("Schema will appear here...")
            
        with gr.TabItem("Query Edges"):
            gr.Markdown(
                "**Run a query against the database.**\n"
                f"This tab *assumes* the table is named `edges` and columns are `start`, `rel`, and `end`.\n"
                "Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)."
            )
            with gr.Row():
                start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog")
                rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA")
                end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal")
            
            limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
            query_btn = gr.Button("Run Query", variant="primary")
            
            status_output = gr.Markdown("Status will appear here...")
            results_output = gr.DataFrame(label="Query Results", interactive=False)

    # --- 4. Connect UI Elements to Functions ---
    schema_btn.click(
        fn=get_schema_info,
        inputs=None,
        outputs=schema_output,
        api_name="get_schema"
    )
    
    query_btn.click(
        fn=run_query,
        inputs=[start_input, rel_input, end_input, limit_slider],
        outputs=[results_output, status_output],
        api_name="run_query"
    )

if __name__ == "__main__":
    demo.launch()