Spaces:

cstr
/

conceptnet_db

Running

App Files Files Community

conceptnet_db / app.py

cstr

Update app.py

1f46ad2 verified about 2 months ago

raw

history blame

9.38 kB

	import gradio as gr
	import sqlite3
	import pandas as pd
	from huggingface_hub import hf_hub_download
	import os
	import traceback

	# --- 1. Download and Cache the Database ---
	print("Downloading database (if not already cached)...")
	REPO_ID = "ysenarath/conceptnet-sqlite"
	DB_FILENAME = "data/conceptnet-v5.7.0.db"

	# This is the critical caching step for HF Spaces
	DB_PATH = hf_hub_download(
	repo_id=REPO_ID,
	filename=DB_FILENAME,
	repo_type="dataset"
	)
	print(f"Database is available at: {DB_PATH}")

	# --- 2. Database Helper Functions ---

	def get_db_connection():
	"""
	Creates a new read-only connection to the SQLite database.
	This is safer for Gradio's multi-threading.
	"""
	try:
	# Connect in read-only mode
	db_uri = f"file:{DB_PATH}?mode=ro"
	conn = sqlite3.connect(db_uri, uri=True)
	return conn
	except Exception as e:
	print(f"Error connecting to DB: {e}")
	traceback.print_exc()
	return None

	def get_schema_info():
	"""
	Dynamically queries the SQLite database to get its schema.
	"""
	print("Getting schema info...")
	schema_md = "# Database Schema\n\n"
	try:
	with get_db_connection() as conn:
	cursor = conn.cursor()
	cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
	tables = cursor.fetchall()

	if not tables:
	return "Could not find any tables in the database."

	for table in tables:
	table_name = table[0]
	schema_md += f"## Table: `{table_name}`\n\n"
	schema_md += "\| Column Name \| Data Type \| Not Null \| Primary Key \|\n"
	schema_md += "\|:------------\|:----------\|:---------\|:------------\|\n"

	cursor.execute(f"PRAGMA table_info({table_name});")
	columns = cursor.fetchall()
	for col in columns:
	# col structure: (cid, name, type, notnull, dflt_value, pk)
	name, dtype, notnull, pk = col[1], col[2], col[3], col[5]
	schema_md += f"\| `{name}` \| `{dtype}` \| {bool(notnull)} \| {bool(pk)} \|\n"
	schema_md += "\n"

	return schema_md
	except Exception as e:
	print(f"Error in get_schema_info: {e}")
	traceback.print_exc()
	return f"An error occurred while fetching schema: {e}"

	def run_query(start_node, relation, end_node, limit):
	"""
	Runs an efficient query by filtering on text paths first,
	then joining by indexed IDs.

	* FIX *: This now queries the `node.id` column (which contains the full path)
	and the `relation.label` column, based on the raw query results.
	"""
	print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")

	# --- FIX: Select the `id` column (which has the path) ---
	select_clause = """
	SELECT
	e.id AS edge_id,
	s.id AS start_id_path,
	r.label AS relation_label,
	en.id AS end_id_path,
	e.weight,
	e.dataset,
	e.surface_text,
	s.label AS start_label_text,
	en.label AS end_label_text
	FROM
	edge AS e
	JOIN
	relation AS r ON e.rel_id = r.id
	JOIN
	node AS s ON e.start_id = s.id
	JOIN
	node AS en ON e.end_id = en.id
	"""

	where_conditions = []
	params = []

	try:
	# --- FIX: Query `node.id` for nodes, `relation.label` for relations ---
	if start_node:
	# We add a wildcard to the front to match the full http://... ID
	# unless the user has already added their own wildcard.
	param_val = start_node if "%" in start_node else f"%{start_node}"
	where_conditions.append(f"e.start_id IN (SELECT id FROM node WHERE id LIKE ?)")
	params.append(param_val)

	if relation:
	# Relation table uses `label` which is a direct match (e.g., /r/IsA)
	op = "LIKE" if "%" in relation else "="
	where_conditions.append(f"e.rel_id IN (SELECT id FROM relation WHERE label {op} ?)")
	params.append(relation)

	if end_node:
	# We add a wildcard to the front to match the full http://... ID
	param_val = end_node if "%" in end_node else f"%{end_node}"
	where_conditions.append(f"e.end_id IN (SELECT id FROM node WHERE id LIKE ?)")
	params.append(param_val)
	# --- End of Fix ---

	if not where_conditions:
	where_clause = " WHERE 1=1" # Get random edges if no filter
	else:
	where_clause = " WHERE " + " AND ".join(where_conditions)

	query = select_clause + where_clause + " LIMIT ?"
	params.append(limit)

	print(f"Executing SQL: {query}")
	print(f"With params: {params}")

	with get_db_connection() as conn:
	df = pd.read_sql_query(query, conn, params=params)

	if df.empty:
	return pd.DataFrame(), "Query ran successfully but returned no results. (This is common if your query is very specific)."

	return df, "Query successful!"

	except Exception as e:
	print(f"Error in run_query: {e}")
	traceback.print_exc()
	err_msg = f"Query Failed!\n\n`{e}`"
	return pd.DataFrame(), err_msg

	def run_raw_query(sql_query):
	"""
	Executes a raw, read-only SQL query against the database.
	DANGEROUS but useful for debugging.
	"""
	print(f"Running raw query: {sql_query}")

	# Basic safety check for read-only
	if not sql_query.strip().upper().startswith("SELECT"):
	return pd.DataFrame(), "Error: Only `SELECT` statements are allowed for safety."

	try:
	with get_db_connection() as conn:
	df = pd.read_sql_query(sql_query, conn)

	if df.empty:
	return df, "Query ran successfully but returned no results."

	return df, "Raw query successful!"
	except Exception as e:
	print(f"Error in run_raw_query: {e}")
	traceback.print_exc()
	return pd.DataFrame(), f"Query Failed!\n\n`{e}`"


	# --- 3. Build the Gradio UI ---

	with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
	gr.Markdown(f"# ConceptNet SQLite Explorer (DB: `{DB_FILENAME}`)")
	gr.Markdown(f"Note: Initial query might take a few seconds as the database warms up.")

	with gr.Tabs():
	with gr.TabItem("Query Builder"):
	gr.Markdown(
	"Run a query against the database.\n"
	"This query joins the `edge`, `node`, and `relation` tables to find connections.\n"
	"Use the path part of the ID (e.g., `/c/en/dog` or `/r/Is%`)."
	)
	with gr.Row():
	# --- FIX: Updated labels for clarity ---
	start_input = gr.Textbox(label="Start Node Path", placeholder="/c/en/dog")
	rel_input = gr.Textbox(label="Relation Path", placeholder="/r/IsA")
	end_input = gr.Textbox(label="End Node Path", placeholder="/c/en/animal")

	limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
	query_btn = gr.Button("Run Query", variant="primary")

	status_output = gr.Markdown("Status will appear here...")
	results_output = gr.DataFrame(label="Query Results", interactive=False)

	with gr.TabItem("Raw SQL Query"):
	gr.Markdown("Danger Zone: Run a raw `SELECT` query against the database. Use the Schema Explorer tab to see table/column names.")
	# --- FIX: Updated placeholder to a useful, correct query ---
	raw_sql_input = gr.Textbox(
	label="Raw SQL Query",
	placeholder="SELECT id, label, language, path FROM node WHERE id LIKE '%/c/en/dog' LIMIT 5",
	lines=5
	)
	raw_query_btn = gr.Button("Run Raw SQL", variant="secondary")
	raw_status_output = gr.Markdown("Status will appear here...")
	raw_results_output = gr.DataFrame(label="Raw Query Results", interactive=False)

	with gr.TabItem("Schema Explorer"):
	gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
	schema_btn = gr.Button("Show Database Schema", variant="secondary")
	schema_output = gr.Markdown("Schema will appear here...")

	# --- 4. Connect UI Elements to Functions ---
	query_btn.click(
	fn=run_query,
	inputs=[start_input, rel_input, end_input, limit_slider],
	outputs=[results_output, status_output],
	api_name="run_query"
	)

	raw_query_btn.click(
	fn=run_raw_query,
	inputs=[raw_sql_input],
	outputs=[raw_results_output, raw_status_output],
	api_name="run_raw_query"
	)

	schema_btn.click(
	fn=get_schema_info,
	inputs=None,
	outputs=schema_output,
	api_name="get_schema"
	)

	if __name__ == "__main__":
	# Removed experimental ssr_mode=False
	demo.launch(ssr_mode=False)