Spaces:
Running
Running
File size: 9,382 Bytes
9fc6811 b945468 bb71fb2 9fc6811 bb71fb2 9fc6811 b945468 9fc6811 bb71fb2 1f46ad2 9fc6811 1f46ad2 78578c2 1f46ad2 78578c2 1f46ad2 78578c2 bb71fb2 78578c2 bb71fb2 9fc6811 1f46ad2 9fc6811 1f46ad2 bb71fb2 9fc6811 1f46ad2 bb71fb2 9fc6811 bb71fb2 9fc6811 1f46ad2 bb71fb2 9fc6811 78578c2 9fc6811 bb71fb2 9fc6811 78578c2 9fc6811 bb71fb2 9fc6811 bb71fb2 9fc6811 bb71fb2 9fc6811 78578c2 1f46ad2 9fc6811 1f46ad2 bb71fb2 9fc6811 bb71fb2 1f46ad2 bb71fb2 1f46ad2 bb71fb2 9fc6811 bb71fb2 9fc6811 bb71fb2 9fc6811 bb71fb2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import traceback
# --- 1. Download and Cache the Database ---
print("Downloading database (if not already cached)...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet-v5.7.0.db"
# This is the critical caching step for HF Spaces
DB_PATH = hf_hub_download(
repo_id=REPO_ID,
filename=DB_FILENAME,
repo_type="dataset"
)
print(f"Database is available at: {DB_PATH}")
# --- 2. Database Helper Functions ---
def get_db_connection():
"""
Creates a new read-only connection to the SQLite database.
This is safer for Gradio's multi-threading.
"""
try:
# Connect in read-only mode
db_uri = f"file:{DB_PATH}?mode=ro"
conn = sqlite3.connect(db_uri, uri=True)
return conn
except Exception as e:
print(f"Error connecting to DB: {e}")
traceback.print_exc()
return None
def get_schema_info():
"""
Dynamically queries the SQLite database to get its schema.
"""
print("Getting schema info...")
schema_md = "# Database Schema\n\n"
try:
with get_db_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
tables = cursor.fetchall()
if not tables:
return "Could not find any tables in the database."
for table in tables:
table_name = table[0]
schema_md += f"## Table: `{table_name}`\n\n"
schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
schema_md += "|:------------|:----------|:---------|:------------|\n"
cursor.execute(f"PRAGMA table_info({table_name});")
columns = cursor.fetchall()
for col in columns:
# col structure: (cid, name, type, notnull, dflt_value, pk)
name, dtype, notnull, pk = col[1], col[2], col[3], col[5]
schema_md += f"| `{name}` | `{dtype}` | {bool(notnull)} | {bool(pk)} |\n"
schema_md += "\n"
return schema_md
except Exception as e:
print(f"Error in get_schema_info: {e}")
traceback.print_exc()
return f"An error occurred while fetching schema: {e}"
def run_query(start_node, relation, end_node, limit):
"""
Runs an efficient query by filtering on text paths first,
then joining by indexed IDs.
*** FIX ***: This now queries the `node.id` column (which contains the full path)
and the `relation.label` column, based on the raw query results.
"""
print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
# --- FIX: Select the `id` column (which has the path) ---
select_clause = """
SELECT
e.id AS edge_id,
s.id AS start_id_path,
r.label AS relation_label,
en.id AS end_id_path,
e.weight,
e.dataset,
e.surface_text,
s.label AS start_label_text,
en.label AS end_label_text
FROM
edge AS e
JOIN
relation AS r ON e.rel_id = r.id
JOIN
node AS s ON e.start_id = s.id
JOIN
node AS en ON e.end_id = en.id
"""
where_conditions = []
params = []
try:
# --- FIX: Query `node.id` for nodes, `relation.label` for relations ---
if start_node:
# We add a wildcard to the front to match the full http://... ID
# unless the user has already added their own wildcard.
param_val = start_node if "%" in start_node else f"%{start_node}"
where_conditions.append(f"e.start_id IN (SELECT id FROM node WHERE id LIKE ?)")
params.append(param_val)
if relation:
# Relation table uses `label` which is a direct match (e.g., /r/IsA)
op = "LIKE" if "%" in relation else "="
where_conditions.append(f"e.rel_id IN (SELECT id FROM relation WHERE label {op} ?)")
params.append(relation)
if end_node:
# We add a wildcard to the front to match the full http://... ID
param_val = end_node if "%" in end_node else f"%{end_node}"
where_conditions.append(f"e.end_id IN (SELECT id FROM node WHERE id LIKE ?)")
params.append(param_val)
# --- End of Fix ---
if not where_conditions:
where_clause = " WHERE 1=1" # Get random edges if no filter
else:
where_clause = " WHERE " + " AND ".join(where_conditions)
query = select_clause + where_clause + " LIMIT ?"
params.append(limit)
print(f"Executing SQL: {query}")
print(f"With params: {params}")
with get_db_connection() as conn:
df = pd.read_sql_query(query, conn, params=params)
if df.empty:
return pd.DataFrame(), "Query ran successfully but returned no results. (This is common if your query is very specific)."
return df, "Query successful!"
except Exception as e:
print(f"Error in run_query: {e}")
traceback.print_exc()
err_msg = f"**Query Failed!**\n\n`{e}`"
return pd.DataFrame(), err_msg
def run_raw_query(sql_query):
"""
Executes a raw, read-only SQL query against the database.
DANGEROUS but useful for debugging.
"""
print(f"Running raw query: {sql_query}")
# Basic safety check for read-only
if not sql_query.strip().upper().startswith("SELECT"):
return pd.DataFrame(), "**Error:** Only `SELECT` statements are allowed for safety."
try:
with get_db_connection() as conn:
df = pd.read_sql_query(sql_query, conn)
if df.empty:
return df, "Query ran successfully but returned no results."
return df, "Raw query successful!"
except Exception as e:
print(f"Error in run_raw_query: {e}")
traceback.print_exc()
return pd.DataFrame(), f"**Query Failed!**\n\n`{e}`"
# --- 3. Build the Gradio UI ---
with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
gr.Markdown(f"# ConceptNet SQLite Explorer (DB: `{DB_FILENAME}`)")
gr.Markdown(f"**Note:** Initial query might take a few seconds as the database warms up.")
with gr.Tabs():
with gr.TabItem("Query Builder"):
gr.Markdown(
"**Run a query against the database.**\n"
"This query joins the `edge`, `node`, and `relation` tables to find connections.\n"
"Use the *path part* of the ID (e.g., `/c/en/dog` or `/r/Is%`)."
)
with gr.Row():
# --- FIX: Updated labels for clarity ---
start_input = gr.Textbox(label="Start Node Path", placeholder="/c/en/dog")
rel_input = gr.Textbox(label="Relation Path", placeholder="/r/IsA")
end_input = gr.Textbox(label="End Node Path", placeholder="/c/en/animal")
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
query_btn = gr.Button("Run Query", variant="primary")
status_output = gr.Markdown("Status will appear here...")
results_output = gr.DataFrame(label="Query Results", interactive=False)
with gr.TabItem("Raw SQL Query"):
gr.Markdown("**Danger Zone:** Run a raw `SELECT` query against the database. Use the Schema Explorer tab to see table/column names.")
# --- FIX: Updated placeholder to a useful, correct query ---
raw_sql_input = gr.Textbox(
label="Raw SQL Query",
placeholder="SELECT id, label, language, path FROM node WHERE id LIKE '%/c/en/dog' LIMIT 5",
lines=5
)
raw_query_btn = gr.Button("Run Raw SQL", variant="secondary")
raw_status_output = gr.Markdown("Status will appear here...")
raw_results_output = gr.DataFrame(label="Raw Query Results", interactive=False)
with gr.TabItem("Schema Explorer"):
gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
schema_btn = gr.Button("Show Database Schema", variant="secondary")
schema_output = gr.Markdown("Schema will appear here...")
# --- 4. Connect UI Elements to Functions ---
query_btn.click(
fn=run_query,
inputs=[start_input, rel_input, end_input, limit_slider],
outputs=[results_output, status_output],
api_name="run_query"
)
raw_query_btn.click(
fn=run_raw_query,
inputs=[raw_sql_input],
outputs=[raw_results_output, raw_status_output],
api_name="run_raw_query"
)
schema_btn.click(
fn=get_schema_info,
inputs=None,
outputs=schema_output,
api_name="get_schema"
)
if __name__ == "__main__":
# Removed experimental ssr_mode=False
demo.launch(ssr_mode=False) |