conceptnet_db / app.py
cstr's picture
Create app.py
9fc6811 verified
raw
history blame
6.51 kB
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download
import os
import traceback
# --- 1. Download and Cache the Database ---
# This is the cricital step for HF Spaces.
# hf_hub_download caches the file. When your app.py restarts,
# it will find the file in the cache and NOT re-download 19.3 GB.
print("Downloading database (if not already cached)...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet.db"
DB_PATH = hf_hub_download(
repo_id=REPO_ID,
filename=DB_FILENAME,
repo_type="dataset"
)
print(f"Database is available at: {DB_PATH}")
# --- 2. Database Helper Functions ---
def get_db_connection():
"""
Creates a new read-only connection to the SQLite database.
This is safer for Gradio's multi-threading.
"""
try:
# Connect in read-only mode
db_uri = f"file:{DB_PATH}?mode=ro"
conn = sqlite3.connect(db_uri, uri=True)
return conn
except Exception as e:
print(f"Error connecting to DB: {e}")
traceback.print_exc()
return None
def get_schema_info():
"""
Dynamically queries the SQLite database to get its schema.
"""
print("Getting schema info...")
schema_md = "# Database Schema\n\n"
try:
with get_db_connection() as conn:
cursor = conn.cursor()
# Get all table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
tables = cursor.fetchall()
if not tables:
return "Could not find any tables in the database."
for table in tables:
table_name = table[0]
schema_md += f"## Table: `{table_name}`\n\n"
schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
schema_md += "|:------------|:----------|:---------|:------------|\n"
# Get column info for each table
cursor.execute(f"PRAGMA table_info({table_name});")
columns = cursor.fetchall()
for col in columns:
# col structure: (cid, name, type, notnull, dflt_value, pk)
name = col[1]
dtype = col[2]
notnull = bool(col[3])
pk = bool(col[5])
schema_md += f"| `{name}` | `{dtype}` | {notnull} | {pk} |\n"
schema_md += "\n"
return schema_md
except Exception as e:
print(f"Error in get_schema_info: {e}")
traceback.print_exc()
return f"An error occurred while fetching schema: {e}"
def run_query(start_node, relation, end_node, limit):
"""
Runs a query against the ConceptNet database.
*** ASSUMPTION ***:
This function *assumes* the main table is named 'edges' and has
columns 'start', 'rel', and 'end'.
Use the 'Schema Explorer' tab to verify this. If the names are
different (e.g., 'conceptnet_edges'), you must edit this function.
"""
print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
# --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! ---
TABLE_NAME = "edges"
START_COL = "start"
REL_COL = "rel"
END_COL = "end"
# ---
query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1"
params = []
try:
if start_node:
query += f" AND {START_COL} LIKE ?"
params.append(start_node)
if relation:
query += f" AND {REL_COL} LIKE ?"
params.append(relation)
if end_node:
query += f" AND {END_COL} LIKE ?"
params.append(end_node)
query += " LIMIT ?"
params.append(limit)
print(f"Executing SQL: {query}")
print(f"With params: {params}")
with get_db_connection() as conn:
df = pd.read_sql_query(query, conn, params=params)
if df.empty:
return pd.DataFrame(), "Query ran successfully but returned no results."
return df, "Query successful!"
except Exception as e:
print(f"Error in run_query: {e}")
traceback.print_exc()
err_msg = f"**Query Failed!**\n\n`{e}`\n\n**Tip:** Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different."
return pd.DataFrame(), err_msg
# --- 3. Build the Gradio UI ---
with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
gr.Markdown("# ConceptNet SQLite Explorer")
gr.Markdown(f"Successfully loaded database from `{DB_PATH}`")
with gr.Tabs():
with gr.TabItem("Schema Explorer"):
gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
schema_btn = gr.Button("Show Database Schema", variant="secondary")
schema_output = gr.Markdown("Schema will appear here...")
with gr.TabItem("Query Edges"):
gr.Markdown(
"**Run a query against the database.**\n"
f"This tab *assumes* the table is named `edges` and columns are `start`, `rel`, and `end`.\n"
"Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)."
)
with gr.Row():
start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog")
rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA")
end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal")
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
query_btn = gr.Button("Run Query", variant="primary")
status_output = gr.Markdown("Status will appear here...")
results_output = gr.DataFrame(label="Query Results", interactive=False)
# --- 4. Connect UI Elements to Functions ---
schema_btn.click(
fn=get_schema_info,
inputs=None,
outputs=schema_output,
api_name="get_schema"
)
query_btn.click(
fn=run_query,
inputs=[start_input, rel_input, end_input, limit_slider],
outputs=[results_output, status_output],
api_name="run_query"
)
if __name__ == "__main__":
demo.launch()