File size: 9,382 Bytes
9fc6811
 
 
 
 
 
 
 
 
 
b945468
 
bb71fb2
9fc6811
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb71fb2
 
9fc6811
b945468
 
9fc6811
 
 
 
 
 
 
bb71fb2
 
 
1f46ad2
 
9fc6811
 
 
1f46ad2
78578c2
 
 
1f46ad2
78578c2
1f46ad2
78578c2
 
bb71fb2
 
 
78578c2
 
 
 
 
 
 
 
 
 
bb71fb2
9fc6811
 
 
1f46ad2
9fc6811
1f46ad2
 
 
 
 
bb71fb2
9fc6811
1f46ad2
bb71fb2
 
9fc6811
bb71fb2
9fc6811
1f46ad2
 
 
 
bb71fb2
 
 
 
 
 
9fc6811
78578c2
9fc6811
 
 
 
 
 
 
 
 
bb71fb2
9fc6811
 
 
 
 
 
78578c2
9fc6811
bb71fb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
 
 
 
bb71fb2
 
9fc6811
 
bb71fb2
9fc6811
 
78578c2
1f46ad2
9fc6811
 
1f46ad2
bb71fb2
 
 
9fc6811
 
 
 
 
 
bb71fb2
 
 
1f46ad2
bb71fb2
 
1f46ad2
bb71fb2
 
 
 
 
9fc6811
bb71fb2
 
 
 
 
9fc6811
 
 
 
 
 
 
bb71fb2
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
 
bb71fb2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import traceback

# --- 1. Download and Cache the Database ---
print("Downloading database (if not already cached)...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet-v5.7.0.db"

# This is the critical caching step for HF Spaces
DB_PATH = hf_hub_download(
    repo_id=REPO_ID, 
    filename=DB_FILENAME, 
    repo_type="dataset"
)
print(f"Database is available at: {DB_PATH}")

# --- 2. Database Helper Functions ---

def get_db_connection():
    """
    Creates a new read-only connection to the SQLite database.
    This is safer for Gradio's multi-threading.
    """
    try:
        # Connect in read-only mode
        db_uri = f"file:{DB_PATH}?mode=ro"
        conn = sqlite3.connect(db_uri, uri=True)
        return conn
    except Exception as e:
        print(f"Error connecting to DB: {e}")
        traceback.print_exc()
        return None

def get_schema_info():
    """
    Dynamically queries the SQLite database to get its schema.
    """
    print("Getting schema info...")
    schema_md = "# Database Schema\n\n"
    try:
        with get_db_connection() as conn:
            cursor = conn.cursor()
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
            tables = cursor.fetchall()
            
            if not tables:
                return "Could not find any tables in the database."

            for table in tables:
                table_name = table[0]
                schema_md += f"## Table: `{table_name}`\n\n"
                schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
                schema_md += "|:------------|:----------|:---------|:------------|\n"
                
                cursor.execute(f"PRAGMA table_info({table_name});")
                columns = cursor.fetchall()
                for col in columns:
                    # col structure: (cid, name, type, notnull, dflt_value, pk)
                    name, dtype, notnull, pk = col[1], col[2], col[3], col[5]
                    schema_md += f"| `{name}` | `{dtype}` | {bool(notnull)} | {bool(pk)} |\n"
                schema_md += "\n"
            
            return schema_md
    except Exception as e:
        print(f"Error in get_schema_info: {e}")
        traceback.print_exc()
        return f"An error occurred while fetching schema: {e}"

def run_query(start_node, relation, end_node, limit):
    """
    Runs an efficient query by filtering on text paths first,
    then joining by indexed IDs.
    
    *** FIX ***: This now queries the `node.id` column (which contains the full path)
    and the `relation.label` column, based on the raw query results.
    """
    print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
    
    # --- FIX: Select the `id` column (which has the path) ---
    select_clause = """
        SELECT
            e.id AS edge_id,
            s.id AS start_id_path,
            r.label AS relation_label,
            en.id AS end_id_path,
            e.weight,
            e.dataset,
            e.surface_text,
            s.label AS start_label_text,
            en.label AS end_label_text
        FROM
            edge AS e
        JOIN
            relation AS r ON e.rel_id = r.id
        JOIN
            node AS s ON e.start_id = s.id
        JOIN
            node AS en ON e.end_id = en.id
    """
    
    where_conditions = []
    params = []
    
    try:
        # --- FIX: Query `node.id` for nodes, `relation.label` for relations ---
        if start_node:
            # We add a wildcard to the front to match the full http://... ID
            # unless the user has already added their own wildcard.
            param_val = start_node if "%" in start_node else f"%{start_node}"
            where_conditions.append(f"e.start_id IN (SELECT id FROM node WHERE id LIKE ?)")
            params.append(param_val)
        
        if relation:
            # Relation table uses `label` which is a direct match (e.g., /r/IsA)
            op = "LIKE" if "%" in relation else "="
            where_conditions.append(f"e.rel_id IN (SELECT id FROM relation WHERE label {op} ?)")
            params.append(relation)

        if end_node:
            # We add a wildcard to the front to match the full http://... ID
            param_val = end_node if "%" in end_node else f"%{end_node}"
            where_conditions.append(f"e.end_id IN (SELECT id FROM node WHERE id LIKE ?)")
            params.append(param_val)
        # --- End of Fix ---

        if not where_conditions:
            where_clause = " WHERE 1=1" # Get random edges if no filter
        else:
            where_clause = " WHERE " + " AND ".join(where_conditions)
            
        query = select_clause + where_clause + " LIMIT ?"
        params.append(limit)
        
        print(f"Executing SQL: {query}")
        print(f"With params: {params}")

        with get_db_connection() as conn:
            df = pd.read_sql_query(query, conn, params=params)
        
        if df.empty:
            return pd.DataFrame(), "Query ran successfully but returned no results. (This is common if your query is very specific)."
            
        return df, "Query successful!"

    except Exception as e:
        print(f"Error in run_query: {e}")
        traceback.print_exc()
        err_msg = f"**Query Failed!**\n\n`{e}`"
        return pd.DataFrame(), err_msg

def run_raw_query(sql_query):
    """
    Executes a raw, read-only SQL query against the database.
    DANGEROUS but useful for debugging.
    """
    print(f"Running raw query: {sql_query}")
    
    # Basic safety check for read-only
    if not sql_query.strip().upper().startswith("SELECT"):
        return pd.DataFrame(), "**Error:** Only `SELECT` statements are allowed for safety."
        
    try:
        with get_db_connection() as conn:
            df = pd.read_sql_query(sql_query, conn)
        
        if df.empty:
            return df, "Query ran successfully but returned no results."
        
        return df, "Raw query successful!"
    except Exception as e:
        print(f"Error in run_raw_query: {e}")
        traceback.print_exc()
        return pd.DataFrame(), f"**Query Failed!**\n\n`{e}`"


# --- 3. Build the Gradio UI ---

with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
    gr.Markdown(f"# ConceptNet SQLite Explorer (DB: `{DB_FILENAME}`)")
    gr.Markdown(f"**Note:** Initial query might take a few seconds as the database warms up.")
    
    with gr.Tabs():
        with gr.TabItem("Query Builder"):
            gr.Markdown(
                "**Run a query against the database.**\n"
                "This query joins the `edge`, `node`, and `relation` tables to find connections.\n"
                "Use the *path part* of the ID (e.g., `/c/en/dog` or `/r/Is%`)."
            )
            with gr.Row():
                # --- FIX: Updated labels for clarity ---
                start_input = gr.Textbox(label="Start Node Path", placeholder="/c/en/dog")
                rel_input = gr.Textbox(label="Relation Path", placeholder="/r/IsA")
                end_input = gr.Textbox(label="End Node Path", placeholder="/c/en/animal")
            
            limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
            query_btn = gr.Button("Run Query", variant="primary")
            
            status_output = gr.Markdown("Status will appear here...")
            results_output = gr.DataFrame(label="Query Results", interactive=False)
        
        with gr.TabItem("Raw SQL Query"):
            gr.Markdown("**Danger Zone:** Run a raw `SELECT` query against the database. Use the Schema Explorer tab to see table/column names.")
            # --- FIX: Updated placeholder to a useful, correct query ---
            raw_sql_input = gr.Textbox(
                label="Raw SQL Query", 
                placeholder="SELECT id, label, language, path FROM node WHERE id LIKE '%/c/en/dog' LIMIT 5",
                lines=5
            )
            raw_query_btn = gr.Button("Run Raw SQL", variant="secondary")
            raw_status_output = gr.Markdown("Status will appear here...")
            raw_results_output = gr.DataFrame(label="Raw Query Results", interactive=False)

        with gr.TabItem("Schema Explorer"):
            gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
            schema_btn = gr.Button("Show Database Schema", variant="secondary")
            schema_output = gr.Markdown("Schema will appear here...")
            
    # --- 4. Connect UI Elements to Functions ---
    query_btn.click(
        fn=run_query,
        inputs=[start_input, rel_input, end_input, limit_slider],
        outputs=[results_output, status_output],
        api_name="run_query"
    )
    
    raw_query_btn.click(
        fn=run_raw_query,
        inputs=[raw_sql_input],
        outputs=[raw_results_output, raw_status_output],
        api_name="run_raw_query"
    )

    schema_btn.click(
        fn=get_schema_info,
        inputs=None,
        outputs=schema_output,
        api_name="get_schema"
    )

if __name__ == "__main__":
    # Removed experimental ssr_mode=False
    demo.launch(ssr_mode=False)