Spaces:

cstr
/

conceptnet_db

Running

App Files Files Community

conceptnet_db / app.py

cstr

Create app.py

9fc6811 verified about 1 month ago

raw

history blame

6.51 kB

	import gradio as gr
	import sqlite3
	import pandas as pd
	from huggingface_hub import hf_hub_download
	import os
	import traceback

	# --- 1. Download and Cache the Database ---
	# This is the cricital step for HF Spaces.
	# hf_hub_download caches the file. When your app.py restarts,
	# it will find the file in the cache and NOT re-download 19.3 GB.
	print("Downloading database (if not already cached)...")
	REPO_ID = "ysenarath/conceptnet-sqlite"
	DB_FILENAME = "data/conceptnet.db"
	DB_PATH = hf_hub_download(
	repo_id=REPO_ID,
	filename=DB_FILENAME,
	repo_type="dataset"
	)
	print(f"Database is available at: {DB_PATH}")

	# --- 2. Database Helper Functions ---

	def get_db_connection():
	"""
	Creates a new read-only connection to the SQLite database.
	This is safer for Gradio's multi-threading.
	"""
	try:
	# Connect in read-only mode
	db_uri = f"file:{DB_PATH}?mode=ro"
	conn = sqlite3.connect(db_uri, uri=True)
	return conn
	except Exception as e:
	print(f"Error connecting to DB: {e}")
	traceback.print_exc()
	return None

	def get_schema_info():
	"""
	Dynamically queries the SQLite database to get its schema.
	"""
	print("Getting schema info...")
	schema_md = "# Database Schema\n\n"
	try:
	with get_db_connection() as conn:
	cursor = conn.cursor()

	# Get all table names
	cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
	tables = cursor.fetchall()

	if not tables:
	return "Could not find any tables in the database."

	for table in tables:
	table_name = table[0]
	schema_md += f"## Table: `{table_name}`\n\n"
	schema_md += "\| Column Name \| Data Type \| Not Null \| Primary Key \|\n"
	schema_md += "\|:------------\|:----------\|:---------\|:------------\|\n"

	# Get column info for each table
	cursor.execute(f"PRAGMA table_info({table_name});")
	columns = cursor.fetchall()
	for col in columns:
	# col structure: (cid, name, type, notnull, dflt_value, pk)
	name = col[1]
	dtype = col[2]
	notnull = bool(col[3])
	pk = bool(col[5])
	schema_md += f"\| `{name}` \| `{dtype}` \| {notnull} \| {pk} \|\n"
	schema_md += "\n"

	return schema_md
	except Exception as e:
	print(f"Error in get_schema_info: {e}")
	traceback.print_exc()
	return f"An error occurred while fetching schema: {e}"

	def run_query(start_node, relation, end_node, limit):
	"""
	Runs a query against the ConceptNet database.

	* ASSUMPTION *:
	This function assumes the main table is named 'edges' and has
	columns 'start', 'rel', and 'end'.

	Use the 'Schema Explorer' tab to verify this. If the names are
	different (e.g., 'conceptnet_edges'), you must edit this function.
	"""
	print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")

	# --- !! EDIT THESE IF SCHEMA IS DIFFERENT !! ---
	TABLE_NAME = "edges"
	START_COL = "start"
	REL_COL = "rel"
	END_COL = "end"
	# ---

	query = f"SELECT * FROM {TABLE_NAME} WHERE 1=1"
	params = []

	try:
	if start_node:
	query += f" AND {START_COL} LIKE ?"
	params.append(start_node)
	if relation:
	query += f" AND {REL_COL} LIKE ?"
	params.append(relation)
	if end_node:
	query += f" AND {END_COL} LIKE ?"
	params.append(end_node)

	query += " LIMIT ?"
	params.append(limit)

	print(f"Executing SQL: {query}")
	print(f"With params: {params}")

	with get_db_connection() as conn:
	df = pd.read_sql_query(query, conn, params=params)

	if df.empty:
	return pd.DataFrame(), "Query ran successfully but returned no results."

	return df, "Query successful!"

	except Exception as e:
	print(f"Error in run_query: {e}")
	traceback.print_exc()
	err_msg = f"Query Failed!\n\n`{e}`\n\nTip: Did you check the 'Schema Explorer' tab? The table name might not be `{TABLE_NAME}` or the columns might be different."
	return pd.DataFrame(), err_msg

	# --- 3. Build the Gradio UI ---

	with gr.Blocks(title="ConceptNet SQLite Explorer") as demo:
	gr.Markdown("# ConceptNet SQLite Explorer")
	gr.Markdown(f"Successfully loaded database from `{DB_PATH}`")

	with gr.Tabs():
	with gr.TabItem("Schema Explorer"):
	gr.Markdown("Click the button to see all tables and columns in the database. This helps you write correct queries.")
	schema_btn = gr.Button("Show Database Schema", variant="secondary")
	schema_output = gr.Markdown("Schema will appear here...")

	with gr.TabItem("Query Edges"):
	gr.Markdown(
	"Run a query against the database.\n"
	f"This tab assumes the table is named `edges` and columns are `start`, `rel`, and `end`.\n"
	"Use the 'Schema Explorer' to confirm. You can use SQL wildcards like `%` (e.g., `/c/en/dog%`)."
	)
	with gr.Row():
	start_input = gr.Textbox(label="Start Node", placeholder="/c/en/dog")
	rel_input = gr.Textbox(label="Relation", placeholder="/r/IsA")
	end_input = gr.Textbox(label="End Node", placeholder="/c/en/animal")

	limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=10, step=1)
	query_btn = gr.Button("Run Query", variant="primary")

	status_output = gr.Markdown("Status will appear here...")
	results_output = gr.DataFrame(label="Query Results", interactive=False)

	# --- 4. Connect UI Elements to Functions ---
	schema_btn.click(
	fn=get_schema_info,
	inputs=None,
	outputs=schema_output,
	api_name="get_schema"
	)

	query_btn.click(
	fn=run_query,
	inputs=[start_input, rel_input, end_input, limit_slider],
	outputs=[results_output, status_output],
	api_name="run_query"
	)

	if __name__ == "__main__":
	demo.launch()