import gradio as gr import sqlite3 import pandas as pd from huggingface_hub import hf_hub_download, HfApi import os import time import json # ===== CONFIGURATION ===== TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh'] INDEXED_REPO_ID = "cstr/conceptnet-de-indexed" INDEXED_DB_FILENAME = "conceptnet-de-indexed.db" PROGRESS_FILENAME = "indexing_progress.json" LOCAL_DB_PATH = "/tmp/conceptnet-indexed.db" CONCEPTNET_BASE = "http://conceptnet.io" # ========================= print(f"šŸŒ Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}") HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN") if HF_TOKEN: print(f"āœ… HF_TOKEN found") ORIGINAL_REPO_ID = "ysenarath/conceptnet-sqlite" ORIGINAL_DB_FILENAME = "data/conceptnet-v5.7.0.db" def log_progress(message, level="INFO"): timestamp = time.strftime("%H:%M:%S") prefix = {"INFO": "ā„¹ļø ", "SUCCESS": "āœ…", "ERROR": "āŒ", "WARN": "āš ļø ", "DEBUG": "šŸ”"}.get(level, "") print(f"[{timestamp}] {prefix} {message}") def check_remote_progress(): if not HF_TOKEN: return {"indexing_complete": False} try: api = HfApi() api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN) progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN) with open(progress_path, 'r') as f: return json.load(f) except: return {"indexing_complete": False} def create_indexed_database(): progress = check_remote_progress() if progress.get("indexing_complete", False): try: indexed_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN) log_progress("Downloaded indexed DB", "SUCCESS") return indexed_path except: pass return LOCAL_DB_PATH DB_PATH = create_indexed_database() def get_db_connection(): conn = sqlite3.connect(DB_PATH, check_same_thread=False) conn.execute("PRAGMA cache_size = -256000") return conn def deep_debug(): """DEEP DEBUGGING - Find out what's actually wrong!""" log_progress("="*60, "INFO") log_progress("DEEP DEBUGGING SESSION", "INFO") log_progress("="*60, "INFO") try: with get_db_connection() as conn: cursor = conn.cursor() # 1. Find actual dog edges log_progress("\n1. Finding actual edges for 'dog':", "INFO") cursor.execute(""" SELECT e.id, e.start_id, e.rel_id, e.end_id, e.weight FROM edge e WHERE e.start_id LIKE 'http://conceptnet.io/c/en/dog%' LIMIT 5 """) edges = cursor.fetchall() log_progress(f"Found {len(edges)} edges:", "SUCCESS") for edge_id, start_id, rel_id, end_id, weight in edges: print(f" {edge_id}") print(f" start: {start_id}") print(f" rel: {rel_id}") print(f" end: {end_id}") print(f" weight: {weight}") if not edges: log_progress("NO EDGES FOUND! Database might be corrupted!", "ERROR") return # 2. Check what relations actually exist log_progress("\n2. What relations exist?", "INFO") cursor.execute("SELECT id, label FROM relation LIMIT 20") relations = cursor.fetchall() log_progress(f"Found {len(relations)} relations:", "SUCCESS") for rel_id, label in relations: print(f" {rel_id} -> {label}") # 3. Check if relation JOIN works log_progress("\n3. Testing relation JOIN:", "INFO") test_rel_id = edges[0][2] if edges else None if test_rel_id: log_progress(f"Looking up relation ID: {test_rel_id}", "DEBUG") cursor.execute("SELECT id, label FROM relation WHERE id = ?", (test_rel_id,)) rel_result = cursor.fetchone() if rel_result: log_progress(f" āœ… Found: {rel_result[0]} -> {rel_result[1]}", "SUCCESS") else: log_progress(f" āŒ Relation ID not found in relation table!", "ERROR") # 4. Test the FULL JOIN query on ONE edge if edges: test_start = edges[0][1] log_progress(f"\n4. Testing full JOIN on: {test_start}", "INFO") query = """ SELECT e.id, s.label AS start_label, r.label AS relation, en.label AS end_label, e.weight FROM edge e JOIN node s ON e.start_id = s.id JOIN relation r ON e.rel_id = r.id JOIN node en ON e.end_id = en.id WHERE e.start_id = ? LIMIT 5 """ start = time.time() cursor.execute(query, (test_start,)) results = cursor.fetchall() elapsed = time.time() - start log_progress(f"Full JOIN returned {len(results)} in {elapsed:.3f}s", "SUCCESS" if results else "ERROR") if results: for edge_id, s_label, r_label, e_label, weight in results: print(f" {s_label} --{r_label}--> {e_label} [{weight:.3f}]") else: log_progress("JOIN returned nothing! Checking each table...", "ERROR") # Debug each join cursor.execute("SELECT id, label FROM node WHERE id = ?", (test_start,)) start_node = cursor.fetchone() log_progress(f" Start node: {start_node}", "DEBUG") test_end = edges[0][3] cursor.execute("SELECT id, label FROM node WHERE id = ?", (test_end,)) end_node = cursor.fetchone() log_progress(f" End node: {end_node}", "DEBUG") test_rel = edges[0][2] cursor.execute("SELECT id, label FROM relation WHERE id = ?", (test_rel,)) rel = cursor.fetchone() log_progress(f" Relation: {rel}", "DEBUG") # 5. Test with LIKE and JOIN log_progress("\n5. Testing LIKE + JOIN (what semantic profile does):", "INFO") test_pattern = f"{CONCEPTNET_BASE}/c/en/dog%" test_relation = "/r/IsA" query = """ SELECT en.label, e.weight FROM edge e JOIN node en ON e.end_id = en.id JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE ? AND r.label = ? LIMIT 5 """ log_progress(f"Pattern: {test_pattern}", "DEBUG") log_progress(f"Relation: {test_relation}", "DEBUG") start = time.time() cursor.execute(query, (test_pattern, test_relation)) results = cursor.fetchall() elapsed = time.time() - start log_progress(f"Result: {len(results)} rows in {elapsed:.3f}s", "SUCCESS" if results else "WARN") if results: for label, weight in results: print(f" dog IsA {label} [{weight:.3f}]") else: log_progress("No results! Let's check why...", "WARN") # Check if edges exist with this pattern cursor.execute("SELECT COUNT(*) FROM edge WHERE start_id LIKE ?", (test_pattern,)) edge_count = cursor.fetchone()[0] log_progress(f" Edges with pattern: {edge_count}", "DEBUG") # Check if any edges have this relation cursor.execute("SELECT COUNT(*) FROM edge e JOIN relation r ON e.rel_id = r.id WHERE r.label = ?", (test_relation,)) rel_edge_count = cursor.fetchone()[0] log_progress(f" Edges with relation {test_relation}: {rel_edge_count}", "DEBUG") # Check if the combination exists cursor.execute(""" SELECT COUNT(*) FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE ? AND r.label = ? """, (test_pattern, test_relation)) combo_count = cursor.fetchone()[0] log_progress(f" Combination: {combo_count}", "DEBUG") if combo_count == 0: log_progress(" āŒ NO edges match pattern + relation!", "ERROR") log_progress(" Checking what relations DO exist for 'dog':", "INFO") cursor.execute(""" SELECT DISTINCT r.label, COUNT(*) as cnt FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE ? GROUP BY r.label ORDER BY cnt DESC LIMIT 10 """, (test_pattern,)) actual_rels = cursor.fetchall() log_progress(f" Actual relations for 'dog':", "INFO") for rel_label, count in actual_rels: print(f" {rel_label}: {count} edges") log_progress("\n" + "="*60, "INFO") log_progress("DEBUGGING COMPLETE", "INFO") log_progress("="*60 + "\n", "INFO") except Exception as e: log_progress(f"Debug failed: {e}", "ERROR") import traceback traceback.print_exc() # Run deep debugging deep_debug() def get_semantic_profile(word, lang='en', progress=gr.Progress()): """Semantic profile - will be fixed after we understand the debug output""" log_progress(f"Profile request: {word} ({lang})", "INFO") if not word or lang not in TARGET_LANGUAGES: return "āš ļø Invalid input" word = word.strip().lower().replace(' ', '_') like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%" output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n" output_md += "*Check server logs for detailed debug information*\n\n" try: with get_db_connection() as conn: cursor = conn.cursor() # Find nodes cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,)) nodes = cursor.fetchall() if not nodes: return f"# 🧠 '{word}'\n\nāš ļø Not found" for node_id, label in nodes[:3]: output_md += f"**Node:** `{node_id}` → {label}\n" output_md += "\n## Relations Found\n\n" # Get actual relations that exist query = """ SELECT DISTINCT r.label, COUNT(*) as cnt FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE ? GROUP BY r.label ORDER BY cnt DESC """ cursor.execute(query, (like_path,)) relations = cursor.fetchall() log_progress(f"Found {len(relations)} relation types", "INFO") for rel_label, count in relations[:20]: output_md += f"### {rel_label} ({count} edges)\n\n" # Get sample edges cursor.execute(""" SELECT en.label, e.weight FROM edge e JOIN node en ON e.end_id = en.id JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE ? AND r.label = ? ORDER BY e.weight DESC LIMIT 5 """, (like_path, rel_label)) results = cursor.fetchall() for label, weight in results: output_md += f"- **{word}** {rel_label} → *{label}* `[{weight:.3f}]`\n" output_md += "\n" return output_md except Exception as e: log_progress(f"Error: {e}", "ERROR") import traceback traceback.print_exc() return f"**āŒ Error:** {e}" def run_raw_query(sql_query): if not sql_query.strip().upper().startswith("SELECT"): return pd.DataFrame(), "āŒ Only SELECT" try: with get_db_connection() as conn: start = time.time() df = pd.read_sql_query(sql_query, conn) elapsed = time.time() - start return df, f"āœ… {len(df)} rows in {elapsed:.3f}s" except Exception as e: return pd.DataFrame(), f"āŒ {e}" def get_schema_info(): return f"# Schema\n\nCheck server logs for detailed debugging output." # UI with gr.Blocks(title="ConceptNet Debug", theme=gr.themes.Soft()) as demo: gr.Markdown("# šŸ” ConceptNet Debugger") gr.Markdown("**Check server logs for comprehensive debugging information!**") with gr.Tabs(): with gr.TabItem("šŸ” Profile"): with gr.Row(): word_input = gr.Textbox(label="Word", value="dog") lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Lang") profile_btn = gr.Button("Get Profile") profile_out = gr.Markdown() with gr.TabItem("šŸ’» SQL"): sql_input = gr.Textbox( label="SQL", value="SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE 'http://conceptnet.io/c/en/dog%' LIMIT 10", lines=3 ) sql_btn = gr.Button("Execute") sql_status = gr.Markdown() sql_results = gr.DataFrame() with gr.TabItem("šŸ“Š Schema"): schema_btn = gr.Button("Load") schema_out = gr.Markdown() profile_btn.click(get_semantic_profile, [word_input, lang_input], profile_out) sql_btn.click(run_raw_query, sql_input, [sql_results, sql_status]) schema_btn.click(get_schema_info, None, schema_out) if __name__ == "__main__": log_progress("DEBUG MODE READY", "SUCCESS") demo.launch(ssr_mode=False)