File size: 6,509 Bytes
9fc6811
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import traceback

# --- 1. Download and Cache the Database ---
# This is the cricital step for HF Spaces.
# hf_hub_download caches the file. When your app.py restarts,
# it will find the file in the cache and NOT re-download 19.3 GB.
print("Downloading database (if not already cached)...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet.db"
DB_PATH = hf_hub_download(
    repo_id=REPO_ID, 
    filename=DB_FILENAME, 
    repo_type="dataset"
)
print(f"Database is available at: {DB_PATH}")

# --- 2. Database Helper Functions ---

def get_db_connection():
    """
    Creates a new read-only connection to the SQLite database.
    This is safer for Gradio's multi-threading.
    """
    try:
        # Connect in read-only mode
        db_uri = f"file:{DB_PATH}?mode=ro"
        conn = sqlite3.connect(db_uri, uri=True)
        return conn
    except Exception as e:
        print(f"Error connecting to DB: {e}")
        traceback.print_exc()
        return None

def get_schema_info():
    """
    Dynamically queries the SQLite database to get its schema.
    """
    print("Getting schema info...")
    schema_md = "# Database Schema\n\n"
    try:
        with get_db_connection() as conn:
            cursor = conn.cursor()
            
            # Get all table names
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
            tables = cursor.fetchall()
            
            if not tables:
                return "Could not find any tables in the database."

            for table in tables:
                table_name = table[0]
                schema_md += f"## Table: `{table_name}`\n\n"
                schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
                schema_md += "|:------------|:----------|:---------|:------------|\n"
                
                # Get column info for each table
                cursor.execute(f"PRAGMA table_info({table_name});")
                columns = cursor.fetchall()
                for col in columns:
                    # col structure: (cid, name, type, notnull, dflt_value, pk)
                    name = col[1]
                    dtype = col[2]
                    notnull = bool(col[3])
                    pk = bool(col[5])
                    schema_md += f"| `{name}` | `{dtype}` | {notnull} | {pk} |\n"
                schema_md += "\n"
        
        return schema_md
    except Exception as e:
        print(f"Error in get_schema_info: {e}")
        traceback.print_exc()
        return f"An error occurred while fetching schema: {e}"

def run_query(start_node, relation, end_node, limit):
    """
    Runs a query against the ConceptNet database.
    
    *** ASSUMPTION ***:
    This function *assumes* the main table is named 'edges' and has
    columns 'start', 'rel', and 'end'.
    
    Use the 'Schema Explorer' tab to verify this. If the names are
    different (e.g., 'conceptnet_edges'), you must edit this function.
    """
    print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
    
    # --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! ---
    TABLE_NAME = "edges" 
    START_COL = "start"
    REL_COL = "rel"
    END_COL = "end"
    # ---
    
    query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1"
    params = []
    
    try:
        if start_node:
            query += f" AND {START_COL} LIKE ?"
            params.append(start_node)
        if relation:
            query += f" AND {REL_COL} LIKE ?"
            params.append(relation)
        if end_node:
            query += f" AND {END_COL} LIKE ?"
            params.append(end_node)
            
        query += " LIMIT ?"
        params.append(limit)
        
        print(f"Executing SQL: {query}")
        print(f"With params: {params}")

        with get_db_connection() as conn:
            df = pd.read_sql_query(query, conn, params=params)
        
        if df.empty:
            return pd.DataFrame(), "Query ran successfully but returned no results."
            
        return df, "Query successful!"

    except Exception as e:
        print(f"Error in run_query: {e}")
        traceback.print_exc()
        err_msg = f"**Query Failed!**\n\n`{e}`\n\n**Tip:** Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different."
        return pd.DataFrame(), err_msg

# --- 3. Build the Gradio UI ---

with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
    gr.Markdown("# ConceptNet SQLite Explorer")
    gr.Markdown(f"Successfully loaded database from `{DB_PATH}`")
    
    with gr.Tabs():
        with gr.TabItem("Schema Explorer"):
            gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
            schema_btn = gr.Button("Show Database Schema", variant="secondary")
            schema_output = gr.Markdown("Schema will appear here...")
            
        with gr.TabItem("Query Edges"):
            gr.Markdown(
                "**Run a query against the database.**\n"
                f"This tab *assumes* the table is named `edges` and columns are `start`, `rel`, and `end`.\n"
                "Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)."
            )
            with gr.Row():
                start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog")
                rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA")
                end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal")
            
            limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
            query_btn = gr.Button("Run Query", variant="primary")
            
            status_output = gr.Markdown("Status will appear here...")
            results_output = gr.DataFrame(label="Query Results", interactive=False)

    # --- 4. Connect UI Elements to Functions ---
    schema_btn.click(
        fn=get_schema_info,
        inputs=None,
        outputs=schema_output,
        api_name="get_schema"
    )
    
    query_btn.click(
        fn=run_query,
        inputs=[start_input, rel_input, end_input, limit_slider],
        outputs=[results_output, status_output],
        api_name="run_query"
    )

if __name__ == "__main__":
    demo.launch()