conceptnet_db / app.py
cstr's picture
Update app.py
254cf99 verified
raw
history blame
15 kB
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download, HfApi
import os
import time
import json
# ===== CONFIGURATION =====
TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
INDEXED_REPO_ID = "cstr/conceptnet-de-indexed"
INDEXED_DB_FILENAME = "conceptnet-de-indexed.db"
PROGRESS_FILENAME = "indexing_progress.json"
LOCAL_DB_PATH = "/tmp/conceptnet-indexed.db"
CONCEPTNET_BASE = "http://conceptnet.io"
# =========================
print(f"🌍 Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
if HF_TOKEN:
print(f"βœ… HF_TOKEN found")
ORIGINAL_REPO_ID = "ysenarath/conceptnet-sqlite"
ORIGINAL_DB_FILENAME = "data/conceptnet-v5.7.0.db"
def log_progress(message, level="INFO"):
timestamp = time.strftime("%H:%M:%S")
prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
print(f"[{timestamp}] {prefix} {message}")
def check_remote_progress():
if not HF_TOKEN:
return {"indexing_complete": False}
try:
api = HfApi()
api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN)
progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN)
with open(progress_path, 'r') as f:
return json.load(f)
except:
return {"indexing_complete": False}
def create_indexed_database():
progress = check_remote_progress()
if progress.get("indexing_complete", False):
try:
indexed_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
log_progress("Downloaded indexed DB", "SUCCESS")
return indexed_path
except:
pass
return LOCAL_DB_PATH
DB_PATH = create_indexed_database()
def get_db_connection():
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
conn.execute("PRAGMA cache_size = -256000")
return conn
def deep_debug():
"""DEEP DEBUGGING - Find out what's actually wrong!"""
log_progress("="*60, "INFO")
log_progress("DEEP DEBUGGING SESSION", "INFO")
log_progress("="*60, "INFO")
try:
with get_db_connection() as conn:
cursor = conn.cursor()
# 1. Find actual dog edges
log_progress("\n1. Finding actual edges for 'dog':", "INFO")
cursor.execute("""
SELECT e.id, e.start_id, e.rel_id, e.end_id, e.weight
FROM edge e
WHERE e.start_id LIKE 'http://conceptnet.io/c/en/dog%'
LIMIT 5
""")
edges = cursor.fetchall()
log_progress(f"Found {len(edges)} edges:", "SUCCESS")
for edge_id, start_id, rel_id, end_id, weight in edges:
print(f" {edge_id}")
print(f" start: {start_id}")
print(f" rel: {rel_id}")
print(f" end: {end_id}")
print(f" weight: {weight}")
if not edges:
log_progress("NO EDGES FOUND! Database might be corrupted!", "ERROR")
return
# 2. Check what relations actually exist
log_progress("\n2. What relations exist?", "INFO")
cursor.execute("SELECT id, label FROM relation LIMIT 20")
relations = cursor.fetchall()
log_progress(f"Found {len(relations)} relations:", "SUCCESS")
for rel_id, label in relations:
print(f" {rel_id} -> {label}")
# 3. Check if relation JOIN works
log_progress("\n3. Testing relation JOIN:", "INFO")
test_rel_id = edges[0][2] if edges else None
if test_rel_id:
log_progress(f"Looking up relation ID: {test_rel_id}", "DEBUG")
cursor.execute("SELECT id, label FROM relation WHERE id = ?", (test_rel_id,))
rel_result = cursor.fetchone()
if rel_result:
log_progress(f" βœ… Found: {rel_result[0]} -> {rel_result[1]}", "SUCCESS")
else:
log_progress(f" ❌ Relation ID not found in relation table!", "ERROR")
# 4. Test the FULL JOIN query on ONE edge
if edges:
test_start = edges[0][1]
log_progress(f"\n4. Testing full JOIN on: {test_start}", "INFO")
query = """
SELECT
e.id,
s.label AS start_label,
r.label AS relation,
en.label AS end_label,
e.weight
FROM edge e
JOIN node s ON e.start_id = s.id
JOIN relation r ON e.rel_id = r.id
JOIN node en ON e.end_id = en.id
WHERE e.start_id = ?
LIMIT 5
"""
start = time.time()
cursor.execute(query, (test_start,))
results = cursor.fetchall()
elapsed = time.time() - start
log_progress(f"Full JOIN returned {len(results)} in {elapsed:.3f}s", "SUCCESS" if results else "ERROR")
if results:
for edge_id, s_label, r_label, e_label, weight in results:
print(f" {s_label} --{r_label}--> {e_label} [{weight:.3f}]")
else:
log_progress("JOIN returned nothing! Checking each table...", "ERROR")
# Debug each join
cursor.execute("SELECT id, label FROM node WHERE id = ?", (test_start,))
start_node = cursor.fetchone()
log_progress(f" Start node: {start_node}", "DEBUG")
test_end = edges[0][3]
cursor.execute("SELECT id, label FROM node WHERE id = ?", (test_end,))
end_node = cursor.fetchone()
log_progress(f" End node: {end_node}", "DEBUG")
test_rel = edges[0][2]
cursor.execute("SELECT id, label FROM relation WHERE id = ?", (test_rel,))
rel = cursor.fetchone()
log_progress(f" Relation: {rel}", "DEBUG")
# 5. Test with LIKE and JOIN
log_progress("\n5. Testing LIKE + JOIN (what semantic profile does):", "INFO")
test_pattern = f"{CONCEPTNET_BASE}/c/en/dog%"
test_relation = "/r/IsA"
query = """
SELECT
en.label,
e.weight
FROM edge e
JOIN node en ON e.end_id = en.id
JOIN relation r ON e.rel_id = r.id
WHERE e.start_id LIKE ? AND r.label = ?
LIMIT 5
"""
log_progress(f"Pattern: {test_pattern}", "DEBUG")
log_progress(f"Relation: {test_relation}", "DEBUG")
start = time.time()
cursor.execute(query, (test_pattern, test_relation))
results = cursor.fetchall()
elapsed = time.time() - start
log_progress(f"Result: {len(results)} rows in {elapsed:.3f}s", "SUCCESS" if results else "WARN")
if results:
for label, weight in results:
print(f" dog IsA {label} [{weight:.3f}]")
else:
log_progress("No results! Let's check why...", "WARN")
# Check if edges exist with this pattern
cursor.execute("SELECT COUNT(*) FROM edge WHERE start_id LIKE ?", (test_pattern,))
edge_count = cursor.fetchone()[0]
log_progress(f" Edges with pattern: {edge_count}", "DEBUG")
# Check if any edges have this relation
cursor.execute("SELECT COUNT(*) FROM edge e JOIN relation r ON e.rel_id = r.id WHERE r.label = ?", (test_relation,))
rel_edge_count = cursor.fetchone()[0]
log_progress(f" Edges with relation {test_relation}: {rel_edge_count}", "DEBUG")
# Check if the combination exists
cursor.execute("""
SELECT COUNT(*) FROM edge e
JOIN relation r ON e.rel_id = r.id
WHERE e.start_id LIKE ? AND r.label = ?
""", (test_pattern, test_relation))
combo_count = cursor.fetchone()[0]
log_progress(f" Combination: {combo_count}", "DEBUG")
if combo_count == 0:
log_progress(" ❌ NO edges match pattern + relation!", "ERROR")
log_progress(" Checking what relations DO exist for 'dog':", "INFO")
cursor.execute("""
SELECT DISTINCT r.label, COUNT(*) as cnt
FROM edge e
JOIN relation r ON e.rel_id = r.id
WHERE e.start_id LIKE ?
GROUP BY r.label
ORDER BY cnt DESC
LIMIT 10
""", (test_pattern,))
actual_rels = cursor.fetchall()
log_progress(f" Actual relations for 'dog':", "INFO")
for rel_label, count in actual_rels:
print(f" {rel_label}: {count} edges")
log_progress("\n" + "="*60, "INFO")
log_progress("DEBUGGING COMPLETE", "INFO")
log_progress("="*60 + "\n", "INFO")
except Exception as e:
log_progress(f"Debug failed: {e}", "ERROR")
import traceback
traceback.print_exc()
# Run deep debugging
deep_debug()
def get_semantic_profile(word, lang='en', progress=gr.Progress()):
"""Semantic profile - will be fixed after we understand the debug output"""
log_progress(f"Profile request: {word} ({lang})", "INFO")
if not word or lang not in TARGET_LANGUAGES:
return "⚠️ Invalid input"
word = word.strip().lower().replace(' ', '_')
like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
output_md += "*Check server logs for detailed debug information*\n\n"
try:
with get_db_connection() as conn:
cursor = conn.cursor()
# Find nodes
cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
nodes = cursor.fetchall()
if not nodes:
return f"# 🧠 '{word}'\n\n⚠️ Not found"
for node_id, label in nodes[:3]:
output_md += f"**Node:** `{node_id}` β†’ {label}\n"
output_md += "\n## Relations Found\n\n"
# Get actual relations that exist
query = """
SELECT DISTINCT r.label, COUNT(*) as cnt
FROM edge e
JOIN relation r ON e.rel_id = r.id
WHERE e.start_id LIKE ?
GROUP BY r.label
ORDER BY cnt DESC
"""
cursor.execute(query, (like_path,))
relations = cursor.fetchall()
log_progress(f"Found {len(relations)} relation types", "INFO")
for rel_label, count in relations[:20]:
output_md += f"### {rel_label} ({count} edges)\n\n"
# Get sample edges
cursor.execute("""
SELECT en.label, e.weight
FROM edge e
JOIN node en ON e.end_id = en.id
JOIN relation r ON e.rel_id = r.id
WHERE e.start_id LIKE ? AND r.label = ?
ORDER BY e.weight DESC
LIMIT 5
""", (like_path, rel_label))
results = cursor.fetchall()
for label, weight in results:
output_md += f"- **{word}** {rel_label} β†’ *{label}* `[{weight:.3f}]`\n"
output_md += "\n"
return output_md
except Exception as e:
log_progress(f"Error: {e}", "ERROR")
import traceback
traceback.print_exc()
return f"**❌ Error:** {e}"
def run_raw_query(sql_query):
if not sql_query.strip().upper().startswith("SELECT"):
return pd.DataFrame(), "❌ Only SELECT"
try:
with get_db_connection() as conn:
start = time.time()
df = pd.read_sql_query(sql_query, conn)
elapsed = time.time() - start
return df, f"βœ… {len(df)} rows in {elapsed:.3f}s"
except Exception as e:
return pd.DataFrame(), f"❌ {e}"
def get_schema_info():
return f"# Schema\n\nCheck server logs for detailed debugging output."
# UI
with gr.Blocks(title="ConceptNet Debug", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ” ConceptNet Debugger")
gr.Markdown("**Check server logs for comprehensive debugging information!**")
with gr.Tabs():
with gr.TabItem("πŸ” Profile"):
with gr.Row():
word_input = gr.Textbox(label="Word", value="dog")
lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Lang")
profile_btn = gr.Button("Get Profile")
profile_out = gr.Markdown()
with gr.TabItem("πŸ’» SQL"):
sql_input = gr.Textbox(
label="SQL",
value="SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE 'http://conceptnet.io/c/en/dog%' LIMIT 10",
lines=3
)
sql_btn = gr.Button("Execute")
sql_status = gr.Markdown()
sql_results = gr.DataFrame()
with gr.TabItem("πŸ“Š Schema"):
schema_btn = gr.Button("Load")
schema_out = gr.Markdown()
profile_btn.click(get_semantic_profile, [word_input, lang_input], profile_out)
sql_btn.click(run_raw_query, sql_input, [sql_results, sql_status])
schema_btn.click(get_schema_info, None, schema_out)
if __name__ == "__main__":
log_progress("DEBUG MODE READY", "SUCCESS")
demo.launch(ssr_mode=False)