Spaces:
Running
Running
File size: 6,509 Bytes
9fc6811 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import traceback
# --- 1. Download and Cache the Database ---
# This is the cricital step for HF Spaces.
# hf_hub_download caches the file. When your app.py restarts,
# it will find the file in the cache and NOT re-download 19.3 GB.
print("Downloading database (if not already cached)...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet.db"
DB_PATH = hf_hub_download(
repo_id=REPO_ID,
filename=DB_FILENAME,
repo_type="dataset"
)
print(f"Database is available at: {DB_PATH}")
# --- 2. Database Helper Functions ---
def get_db_connection():
"""
Creates a new read-only connection to the SQLite database.
This is safer for Gradio's multi-threading.
"""
try:
# Connect in read-only mode
db_uri = f"file:{DB_PATH}?mode=ro"
conn = sqlite3.connect(db_uri, uri=True)
return conn
except Exception as e:
print(f"Error connecting to DB: {e}")
traceback.print_exc()
return None
def get_schema_info():
"""
Dynamically queries the SQLite database to get its schema.
"""
print("Getting schema info...")
schema_md = "# Database Schema\n\n"
try:
with get_db_connection() as conn:
cursor = conn.cursor()
# Get all table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
tables = cursor.fetchall()
if not tables:
return "Could not find any tables in the database."
for table in tables:
table_name = table[0]
schema_md += f"## Table: `{table_name}`\n\n"
schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
schema_md += "|:------------|:----------|:---------|:------------|\n"
# Get column info for each table
cursor.execute(f"PRAGMA table_info({table_name});")
columns = cursor.fetchall()
for col in columns:
# col structure: (cid, name, type, notnull, dflt_value, pk)
name = col[1]
dtype = col[2]
notnull = bool(col[3])
pk = bool(col[5])
schema_md += f"| `{name}` | `{dtype}` | {notnull} | {pk} |\n"
schema_md += "\n"
return schema_md
except Exception as e:
print(f"Error in get_schema_info: {e}")
traceback.print_exc()
return f"An error occurred while fetching schema: {e}"
def run_query(start_node, relation, end_node, limit):
"""
Runs a query against the ConceptNet database.
*** ASSUMPTION ***:
This function *assumes* the main table is named 'edges' and has
columns 'start', 'rel', and 'end'.
Use the 'Schema Explorer' tab to verify this. If the names are
different (e.g., 'conceptnet_edges'), you must edit this function.
"""
print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
# --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! ---
TABLE_NAME = "edges"
START_COL = "start"
REL_COL = "rel"
END_COL = "end"
# ---
query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1"
params = []
try:
if start_node:
query += f" AND {START_COL} LIKE ?"
params.append(start_node)
if relation:
query += f" AND {REL_COL} LIKE ?"
params.append(relation)
if end_node:
query += f" AND {END_COL} LIKE ?"
params.append(end_node)
query += " LIMIT ?"
params.append(limit)
print(f"Executing SQL: {query}")
print(f"With params: {params}")
with get_db_connection() as conn:
df = pd.read_sql_query(query, conn, params=params)
if df.empty:
return pd.DataFrame(), "Query ran successfully but returned no results."
return df, "Query successful!"
except Exception as e:
print(f"Error in run_query: {e}")
traceback.print_exc()
err_msg = f"**Query Failed!**\n\n`{e}`\n\n**Tip:** Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different."
return pd.DataFrame(), err_msg
# --- 3. Build the Gradio UI ---
with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
gr.Markdown("# ConceptNet SQLite Explorer")
gr.Markdown(f"Successfully loaded database from `{DB_PATH}`")
with gr.Tabs():
with gr.TabItem("Schema Explorer"):
gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
schema_btn = gr.Button("Show Database Schema", variant="secondary")
schema_output = gr.Markdown("Schema will appear here...")
with gr.TabItem("Query Edges"):
gr.Markdown(
"**Run a query against the database.**\n"
f"This tab *assumes* the table is named `edges` and columns are `start`, `rel`, and `end`.\n"
"Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)."
)
with gr.Row():
start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog")
rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA")
end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal")
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
query_btn = gr.Button("Run Query", variant="primary")
status_output = gr.Markdown("Status will appear here...")
results_output = gr.DataFrame(label="Query Results", interactive=False)
# --- 4. Connect UI Elements to Functions ---
schema_btn.click(
fn=get_schema_info,
inputs=None,
outputs=schema_output,
api_name="get_schema"
)
query_btn.click(
fn=run_query,
inputs=[start_input, rel_input, end_input, limit_slider],
outputs=[results_output, status_output],
api_name="run_query"
)
if __name__ == "__main__":
demo.launch() |