Spaces:
Running
Running
| import gradio as gr | |
| import sqlite3 | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download, HfApi | |
| import os | |
| import time | |
| import json | |
| # ===== CONFIGURATION ===== | |
| TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh'] | |
| INDEXED_REPO_ID = "cstr/conceptnet-de-indexed" | |
| INDEXED_DB_FILENAME = "conceptnet-de-indexed.db" | |
| PROGRESS_FILENAME = "indexing_progress.json" | |
| LOCAL_DB_PATH = "/tmp/conceptnet-indexed.db" | |
| CONCEPTNET_BASE = "http://conceptnet.io" | |
| # ========================= | |
| print(f"π Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}") | |
| HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN") | |
| if HF_TOKEN: | |
| print(f"β HF_TOKEN found") | |
| ORIGINAL_REPO_ID = "ysenarath/conceptnet-sqlite" | |
| ORIGINAL_DB_FILENAME = "data/conceptnet-v5.7.0.db" | |
| def log_progress(message, level="INFO"): | |
| timestamp = time.strftime("%H:%M:%S") | |
| prefix = {"INFO": "βΉοΈ ", "SUCCESS": "β ", "ERROR": "β", "WARN": "β οΈ ", "DEBUG": "π"}.get(level, "") | |
| print(f"[{timestamp}] {prefix} {message}") | |
| def check_remote_progress(): | |
| if not HF_TOKEN: | |
| return {"indexing_complete": False} | |
| try: | |
| api = HfApi() | |
| api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN) | |
| progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN) | |
| with open(progress_path, 'r') as f: | |
| return json.load(f) | |
| except: | |
| return {"indexing_complete": False} | |
| def create_indexed_database(): | |
| progress = check_remote_progress() | |
| if progress.get("indexing_complete", False): | |
| try: | |
| indexed_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN) | |
| log_progress("Downloaded indexed DB", "SUCCESS") | |
| return indexed_path | |
| except: | |
| pass | |
| return LOCAL_DB_PATH | |
| DB_PATH = create_indexed_database() | |
| def get_db_connection(): | |
| conn = sqlite3.connect(DB_PATH, check_same_thread=False) | |
| conn.execute("PRAGMA cache_size = -256000") | |
| return conn | |
| def deep_debug(): | |
| """DEEP DEBUGGING - Find out what's actually wrong!""" | |
| log_progress("="*60, "INFO") | |
| log_progress("DEEP DEBUGGING SESSION", "INFO") | |
| log_progress("="*60, "INFO") | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| # 1. Find actual dog edges | |
| log_progress("\n1. Finding actual edges for 'dog':", "INFO") | |
| cursor.execute(""" | |
| SELECT e.id, e.start_id, e.rel_id, e.end_id, e.weight | |
| FROM edge e | |
| WHERE e.start_id LIKE 'http://conceptnet.io/c/en/dog%' | |
| LIMIT 5 | |
| """) | |
| edges = cursor.fetchall() | |
| log_progress(f"Found {len(edges)} edges:", "SUCCESS") | |
| for edge_id, start_id, rel_id, end_id, weight in edges: | |
| print(f" {edge_id}") | |
| print(f" start: {start_id}") | |
| print(f" rel: {rel_id}") | |
| print(f" end: {end_id}") | |
| print(f" weight: {weight}") | |
| if not edges: | |
| log_progress("NO EDGES FOUND! Database might be corrupted!", "ERROR") | |
| return | |
| # 2. Check what relations actually exist | |
| log_progress("\n2. What relations exist?", "INFO") | |
| cursor.execute("SELECT id, label FROM relation LIMIT 20") | |
| relations = cursor.fetchall() | |
| log_progress(f"Found {len(relations)} relations:", "SUCCESS") | |
| for rel_id, label in relations: | |
| print(f" {rel_id} -> {label}") | |
| # 3. Check if relation JOIN works | |
| log_progress("\n3. Testing relation JOIN:", "INFO") | |
| test_rel_id = edges[0][2] if edges else None | |
| if test_rel_id: | |
| log_progress(f"Looking up relation ID: {test_rel_id}", "DEBUG") | |
| cursor.execute("SELECT id, label FROM relation WHERE id = ?", (test_rel_id,)) | |
| rel_result = cursor.fetchone() | |
| if rel_result: | |
| log_progress(f" β Found: {rel_result[0]} -> {rel_result[1]}", "SUCCESS") | |
| else: | |
| log_progress(f" β Relation ID not found in relation table!", "ERROR") | |
| # 4. Test the FULL JOIN query on ONE edge | |
| if edges: | |
| test_start = edges[0][1] | |
| log_progress(f"\n4. Testing full JOIN on: {test_start}", "INFO") | |
| query = """ | |
| SELECT | |
| e.id, | |
| s.label AS start_label, | |
| r.label AS relation, | |
| en.label AS end_label, | |
| e.weight | |
| FROM edge e | |
| JOIN node s ON e.start_id = s.id | |
| JOIN relation r ON e.rel_id = r.id | |
| JOIN node en ON e.end_id = en.id | |
| WHERE e.start_id = ? | |
| LIMIT 5 | |
| """ | |
| start = time.time() | |
| cursor.execute(query, (test_start,)) | |
| results = cursor.fetchall() | |
| elapsed = time.time() - start | |
| log_progress(f"Full JOIN returned {len(results)} in {elapsed:.3f}s", "SUCCESS" if results else "ERROR") | |
| if results: | |
| for edge_id, s_label, r_label, e_label, weight in results: | |
| print(f" {s_label} --{r_label}--> {e_label} [{weight:.3f}]") | |
| else: | |
| log_progress("JOIN returned nothing! Checking each table...", "ERROR") | |
| # Debug each join | |
| cursor.execute("SELECT id, label FROM node WHERE id = ?", (test_start,)) | |
| start_node = cursor.fetchone() | |
| log_progress(f" Start node: {start_node}", "DEBUG") | |
| test_end = edges[0][3] | |
| cursor.execute("SELECT id, label FROM node WHERE id = ?", (test_end,)) | |
| end_node = cursor.fetchone() | |
| log_progress(f" End node: {end_node}", "DEBUG") | |
| test_rel = edges[0][2] | |
| cursor.execute("SELECT id, label FROM relation WHERE id = ?", (test_rel,)) | |
| rel = cursor.fetchone() | |
| log_progress(f" Relation: {rel}", "DEBUG") | |
| # 5. Test with LIKE and JOIN | |
| log_progress("\n5. Testing LIKE + JOIN (what semantic profile does):", "INFO") | |
| test_pattern = f"{CONCEPTNET_BASE}/c/en/dog%" | |
| test_relation = "/r/IsA" | |
| query = """ | |
| SELECT | |
| en.label, | |
| e.weight | |
| FROM edge e | |
| JOIN node en ON e.end_id = en.id | |
| JOIN relation r ON e.rel_id = r.id | |
| WHERE e.start_id LIKE ? AND r.label = ? | |
| LIMIT 5 | |
| """ | |
| log_progress(f"Pattern: {test_pattern}", "DEBUG") | |
| log_progress(f"Relation: {test_relation}", "DEBUG") | |
| start = time.time() | |
| cursor.execute(query, (test_pattern, test_relation)) | |
| results = cursor.fetchall() | |
| elapsed = time.time() - start | |
| log_progress(f"Result: {len(results)} rows in {elapsed:.3f}s", "SUCCESS" if results else "WARN") | |
| if results: | |
| for label, weight in results: | |
| print(f" dog IsA {label} [{weight:.3f}]") | |
| else: | |
| log_progress("No results! Let's check why...", "WARN") | |
| # Check if edges exist with this pattern | |
| cursor.execute("SELECT COUNT(*) FROM edge WHERE start_id LIKE ?", (test_pattern,)) | |
| edge_count = cursor.fetchone()[0] | |
| log_progress(f" Edges with pattern: {edge_count}", "DEBUG") | |
| # Check if any edges have this relation | |
| cursor.execute("SELECT COUNT(*) FROM edge e JOIN relation r ON e.rel_id = r.id WHERE r.label = ?", (test_relation,)) | |
| rel_edge_count = cursor.fetchone()[0] | |
| log_progress(f" Edges with relation {test_relation}: {rel_edge_count}", "DEBUG") | |
| # Check if the combination exists | |
| cursor.execute(""" | |
| SELECT COUNT(*) FROM edge e | |
| JOIN relation r ON e.rel_id = r.id | |
| WHERE e.start_id LIKE ? AND r.label = ? | |
| """, (test_pattern, test_relation)) | |
| combo_count = cursor.fetchone()[0] | |
| log_progress(f" Combination: {combo_count}", "DEBUG") | |
| if combo_count == 0: | |
| log_progress(" β NO edges match pattern + relation!", "ERROR") | |
| log_progress(" Checking what relations DO exist for 'dog':", "INFO") | |
| cursor.execute(""" | |
| SELECT DISTINCT r.label, COUNT(*) as cnt | |
| FROM edge e | |
| JOIN relation r ON e.rel_id = r.id | |
| WHERE e.start_id LIKE ? | |
| GROUP BY r.label | |
| ORDER BY cnt DESC | |
| LIMIT 10 | |
| """, (test_pattern,)) | |
| actual_rels = cursor.fetchall() | |
| log_progress(f" Actual relations for 'dog':", "INFO") | |
| for rel_label, count in actual_rels: | |
| print(f" {rel_label}: {count} edges") | |
| log_progress("\n" + "="*60, "INFO") | |
| log_progress("DEBUGGING COMPLETE", "INFO") | |
| log_progress("="*60 + "\n", "INFO") | |
| except Exception as e: | |
| log_progress(f"Debug failed: {e}", "ERROR") | |
| import traceback | |
| traceback.print_exc() | |
| # Run deep debugging | |
| deep_debug() | |
| def get_semantic_profile(word, lang='en', progress=gr.Progress()): | |
| """Semantic profile - will be fixed after we understand the debug output""" | |
| log_progress(f"Profile request: {word} ({lang})", "INFO") | |
| if not word or lang not in TARGET_LANGUAGES: | |
| return "β οΈ Invalid input" | |
| word = word.strip().lower().replace(' ', '_') | |
| like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%" | |
| output_md = f"# π§ Semantic Profile: '{word}' ({lang.upper()})\n\n" | |
| output_md += "*Check server logs for detailed debug information*\n\n" | |
| try: | |
| with get_db_connection() as conn: | |
| cursor = conn.cursor() | |
| # Find nodes | |
| cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,)) | |
| nodes = cursor.fetchall() | |
| if not nodes: | |
| return f"# π§ '{word}'\n\nβ οΈ Not found" | |
| for node_id, label in nodes[:3]: | |
| output_md += f"**Node:** `{node_id}` β {label}\n" | |
| output_md += "\n## Relations Found\n\n" | |
| # Get actual relations that exist | |
| query = """ | |
| SELECT DISTINCT r.label, COUNT(*) as cnt | |
| FROM edge e | |
| JOIN relation r ON e.rel_id = r.id | |
| WHERE e.start_id LIKE ? | |
| GROUP BY r.label | |
| ORDER BY cnt DESC | |
| """ | |
| cursor.execute(query, (like_path,)) | |
| relations = cursor.fetchall() | |
| log_progress(f"Found {len(relations)} relation types", "INFO") | |
| for rel_label, count in relations[:20]: | |
| output_md += f"### {rel_label} ({count} edges)\n\n" | |
| # Get sample edges | |
| cursor.execute(""" | |
| SELECT en.label, e.weight | |
| FROM edge e | |
| JOIN node en ON e.end_id = en.id | |
| JOIN relation r ON e.rel_id = r.id | |
| WHERE e.start_id LIKE ? AND r.label = ? | |
| ORDER BY e.weight DESC | |
| LIMIT 5 | |
| """, (like_path, rel_label)) | |
| results = cursor.fetchall() | |
| for label, weight in results: | |
| output_md += f"- **{word}** {rel_label} β *{label}* `[{weight:.3f}]`\n" | |
| output_md += "\n" | |
| return output_md | |
| except Exception as e: | |
| log_progress(f"Error: {e}", "ERROR") | |
| import traceback | |
| traceback.print_exc() | |
| return f"**β Error:** {e}" | |
| def run_raw_query(sql_query): | |
| if not sql_query.strip().upper().startswith("SELECT"): | |
| return pd.DataFrame(), "β Only SELECT" | |
| try: | |
| with get_db_connection() as conn: | |
| start = time.time() | |
| df = pd.read_sql_query(sql_query, conn) | |
| elapsed = time.time() - start | |
| return df, f"β {len(df)} rows in {elapsed:.3f}s" | |
| except Exception as e: | |
| return pd.DataFrame(), f"β {e}" | |
| def get_schema_info(): | |
| return f"# Schema\n\nCheck server logs for detailed debugging output." | |
| # UI | |
| with gr.Blocks(title="ConceptNet Debug", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π ConceptNet Debugger") | |
| gr.Markdown("**Check server logs for comprehensive debugging information!**") | |
| with gr.Tabs(): | |
| with gr.TabItem("π Profile"): | |
| with gr.Row(): | |
| word_input = gr.Textbox(label="Word", value="dog") | |
| lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Lang") | |
| profile_btn = gr.Button("Get Profile") | |
| profile_out = gr.Markdown() | |
| with gr.TabItem("π» SQL"): | |
| sql_input = gr.Textbox( | |
| label="SQL", | |
| value="SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id LIKE 'http://conceptnet.io/c/en/dog%' LIMIT 10", | |
| lines=3 | |
| ) | |
| sql_btn = gr.Button("Execute") | |
| sql_status = gr.Markdown() | |
| sql_results = gr.DataFrame() | |
| with gr.TabItem("π Schema"): | |
| schema_btn = gr.Button("Load") | |
| schema_out = gr.Markdown() | |
| profile_btn.click(get_semantic_profile, [word_input, lang_input], profile_out) | |
| sql_btn.click(run_raw_query, sql_input, [sql_results, sql_status]) | |
| schema_btn.click(get_schema_info, None, schema_out) | |
| if __name__ == "__main__": | |
| log_progress("DEBUG MODE READY", "SUCCESS") | |
| demo.launch(ssr_mode=False) |