Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import sqlite3
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from huggingface_hub import hf_hub_download
|
| 5 |
+
import os
|
| 6 |
+
import traceback
|
| 7 |
+
|
| 8 |
+
# --- 1. Download and Cache the Database ---
|
| 9 |
+
# This is the cricital step for HF Spaces.
|
| 10 |
+
# hf_hub_download caches the file. When your app.py restarts,
|
| 11 |
+
# it will find the file in the cache and NOT re-download 19.3 GB.
|
| 12 |
+
print("Downloading database (if not already cached)...")
|
| 13 |
+
REPO_ID = "ysenarath/conceptnet-sqlite"
|
| 14 |
+
DB_FILENAME = "data/conceptnet.db"
|
| 15 |
+
DB_PATH = hf_hub_download(
|
| 16 |
+
repo_id=REPO_ID,
|
| 17 |
+
filename=DB_FILENAME,
|
| 18 |
+
repo_type="dataset"
|
| 19 |
+
)
|
| 20 |
+
print(f"Database is available at: {DB_PATH}")
|
| 21 |
+
|
| 22 |
+
# --- 2. Database Helper Functions ---
|
| 23 |
+
|
| 24 |
+
def get_db_connection():
|
| 25 |
+
"""
|
| 26 |
+
Creates a new read-only connection to the SQLite database.
|
| 27 |
+
This is safer for Gradio's multi-threading.
|
| 28 |
+
"""
|
| 29 |
+
try:
|
| 30 |
+
# Connect in read-only mode
|
| 31 |
+
db_uri = f"file:{DB_PATH}?mode=ro"
|
| 32 |
+
conn = sqlite3.connect(db_uri, uri=True)
|
| 33 |
+
return conn
|
| 34 |
+
except Exception as e:
|
| 35 |
+
print(f"Error connecting to DB: {e}")
|
| 36 |
+
traceback.print_exc()
|
| 37 |
+
return None
|
| 38 |
+
|
| 39 |
+
def get_schema_info():
|
| 40 |
+
"""
|
| 41 |
+
Dynamically queries the SQLite database to get its schema.
|
| 42 |
+
"""
|
| 43 |
+
print("Getting schema info...")
|
| 44 |
+
schema_md = "# Database Schema\n\n"
|
| 45 |
+
try:
|
| 46 |
+
with get_db_connection() as conn:
|
| 47 |
+
cursor = conn.cursor()
|
| 48 |
+
|
| 49 |
+
# Get all table names
|
| 50 |
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
|
| 51 |
+
tables = cursor.fetchall()
|
| 52 |
+
|
| 53 |
+
if not tables:
|
| 54 |
+
return "Could not find any tables in the database."
|
| 55 |
+
|
| 56 |
+
for table in tables:
|
| 57 |
+
table_name = table[0]
|
| 58 |
+
schema_md += f"## Table: `{table_name}`\n\n"
|
| 59 |
+
schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
|
| 60 |
+
schema_md += "|:------------|:----------|:---------|:------------|\n"
|
| 61 |
+
|
| 62 |
+
# Get column info for each table
|
| 63 |
+
cursor.execute(f"PRAGMA table_info({table_name});")
|
| 64 |
+
columns = cursor.fetchall()
|
| 65 |
+
for col in columns:
|
| 66 |
+
# col structure: (cid, name, type, notnull, dflt_value, pk)
|
| 67 |
+
name = col[1]
|
| 68 |
+
dtype = col[2]
|
| 69 |
+
notnull = bool(col[3])
|
| 70 |
+
pk = bool(col[5])
|
| 71 |
+
schema_md += f"| `{name}` | `{dtype}` | {notnull} | {pk} |\n"
|
| 72 |
+
schema_md += "\n"
|
| 73 |
+
|
| 74 |
+
return schema_md
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"Error in get_schema_info: {e}")
|
| 77 |
+
traceback.print_exc()
|
| 78 |
+
return f"An error occurred while fetching schema: {e}"
|
| 79 |
+
|
| 80 |
+
def run_query(start_node, relation, end_node, limit):
|
| 81 |
+
"""
|
| 82 |
+
Runs a query against the ConceptNet database.
|
| 83 |
+
|
| 84 |
+
*** ASSUMPTION ***:
|
| 85 |
+
This function *assumes* the main table is named 'edges' and has
|
| 86 |
+
columns 'start', 'rel', and 'end'.
|
| 87 |
+
|
| 88 |
+
Use the 'Schema Explorer' tab to verify this. If the names are
|
| 89 |
+
different (e.g., 'conceptnet_edges'), you must edit this function.
|
| 90 |
+
"""
|
| 91 |
+
print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
|
| 92 |
+
|
| 93 |
+
# --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! ---
|
| 94 |
+
TABLE_NAME = "edges"
|
| 95 |
+
START_COL = "start"
|
| 96 |
+
REL_COL = "rel"
|
| 97 |
+
END_COL = "end"
|
| 98 |
+
# ---
|
| 99 |
+
|
| 100 |
+
query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1"
|
| 101 |
+
params = []
|
| 102 |
+
|
| 103 |
+
try:
|
| 104 |
+
if start_node:
|
| 105 |
+
query += f" AND {START_COL} LIKE ?"
|
| 106 |
+
params.append(start_node)
|
| 107 |
+
if relation:
|
| 108 |
+
query += f" AND {REL_COL} LIKE ?"
|
| 109 |
+
params.append(relation)
|
| 110 |
+
if end_node:
|
| 111 |
+
query += f" AND {END_COL} LIKE ?"
|
| 112 |
+
params.append(end_node)
|
| 113 |
+
|
| 114 |
+
query += " LIMIT ?"
|
| 115 |
+
params.append(limit)
|
| 116 |
+
|
| 117 |
+
print(f"Executing SQL: {query}")
|
| 118 |
+
print(f"With params: {params}")
|
| 119 |
+
|
| 120 |
+
with get_db_connection() as conn:
|
| 121 |
+
df = pd.read_sql_query(query, conn, params=params)
|
| 122 |
+
|
| 123 |
+
if df.empty:
|
| 124 |
+
return pd.DataFrame(), "Query ran successfully but returned no results."
|
| 125 |
+
|
| 126 |
+
return df, "Query successful!"
|
| 127 |
+
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"Error in run_query: {e}")
|
| 130 |
+
traceback.print_exc()
|
| 131 |
+
err_msg = f"**Query Failed!**\n\n`{e}`\n\n**Tip:** Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different."
|
| 132 |
+
return pd.DataFrame(), err_msg
|
| 133 |
+
|
| 134 |
+
# --- 3. Build the Gradio UI ---
|
| 135 |
+
|
| 136 |
+
with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
|
| 137 |
+
gr.Markdown("# ConceptNet SQLite Explorer")
|
| 138 |
+
gr.Markdown(f"Successfully loaded database from `{DB_PATH}`")
|
| 139 |
+
|
| 140 |
+
with gr.Tabs():
|
| 141 |
+
with gr.TabItem("Schema Explorer"):
|
| 142 |
+
gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
|
| 143 |
+
schema_btn = gr.Button("Show Database Schema", variant="secondary")
|
| 144 |
+
schema_output = gr.Markdown("Schema will appear here...")
|
| 145 |
+
|
| 146 |
+
with gr.TabItem("Query Edges"):
|
| 147 |
+
gr.Markdown(
|
| 148 |
+
"**Run a query against the database.**\n"
|
| 149 |
+
f"This tab *assumes* the table is named `edges` and columns are `start`, `rel`, and `end`.\n"
|
| 150 |
+
"Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)."
|
| 151 |
+
)
|
| 152 |
+
with gr.Row():
|
| 153 |
+
start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog")
|
| 154 |
+
rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA")
|
| 155 |
+
end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal")
|
| 156 |
+
|
| 157 |
+
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
|
| 158 |
+
query_btn = gr.Button("Run Query", variant="primary")
|
| 159 |
+
|
| 160 |
+
status_output = gr.Markdown("Status will appear here...")
|
| 161 |
+
results_output = gr.DataFrame(label="Query Results", interactive=False)
|
| 162 |
+
|
| 163 |
+
# --- 4. Connect UI Elements to Functions ---
|
| 164 |
+
schema_btn.click(
|
| 165 |
+
fn=get_schema_info,
|
| 166 |
+
inputs=None,
|
| 167 |
+
outputs=schema_output,
|
| 168 |
+
api_name="get_schema"
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
query_btn.click(
|
| 172 |
+
fn=run_query,
|
| 173 |
+
inputs=[start_input, rel_input, end_input, limit_slider],
|
| 174 |
+
outputs=[results_output, status_output],
|
| 175 |
+
api_name="run_query"
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
if __name__ == "__main__":
|
| 179 |
+
demo.launch()
|