Spaces:

cstr
/

conceptnet_db

Running

File size: 26,874 Bytes

9fc6811
 
 
7ff8eef
9fc6811
 
7ff8eef
9fc6811
7ff8eef
 
9fc6811
b945468
7ff8eef
 
b945468
7ff8eef
9fc6811
 
 
 
 
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
 
 
 
 
7ff8eef
9fc6811
 
 
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
 
 
 
 
 
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
 
7ff8eef
9fc6811
 
7ff8eef
 
9fc6811
 
 
7ff8eef
 
 
 
 
 
9fc6811
 
 
 
 
 
 
 
7ff8eef
 
 
 
 
 
 
 
 
 
9fc6811
 
 
 
 
 
bb71fb2
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b945468
 
9fc6811
 
 
 
 
 
 
7ff8eef
9fc6811
 
 
7ff8eef
 
78578c2
 
1f46ad2
78578c2
1f46ad2
78578c2
 
bb71fb2
 
 
7ff8eef
 
 
 
78578c2
 
bb71fb2
9fc6811
 
 
7ff8eef
9fc6811
7ff8eef
 
 
 
 
 
 
bb71fb2
9fc6811
7ff8eef
 
 
 
 
 
 
bb71fb2
9fc6811
7ff8eef
 
 
 
 
 
bb71fb2
7ff8eef
 
9fc6811
7ff8eef
 
9fc6811
 
7ff8eef
9fc6811
 
7ff8eef
 
 
 
 
 
 
 
 
 
 
9fc6811
 
 
7ff8eef
9fc6811
7ff8eef
9fc6811
 
 
 
7ff8eef
9fc6811
bb71fb2
 
 
7ff8eef
bb71fb2
7ff8eef
bb71fb2
 
 
 
 
 
7ff8eef
 
 
 
 
 
 
 
 
bb71fb2
 
 
 
 
7ff8eef
bb71fb2
 
 
7ff8eef
bb71fb2
74c5fd4
 
7ff8eef
 
74c5fd4
 
7ff8eef
74c5fd4
 
7ff8eef
74c5fd4
 
7ff8eef
74c5fd4
 
 
7ff8eef
74c5fd4
 
7ff8eef
 
74c5fd4
 
 
7ff8eef
 
 
 
74c5fd4
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ef820
 
7ff8eef
 
74c5fd4
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
b9ef820
 
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74c5fd4
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74c5fd4
7ff8eef
74c5fd4
7ff8eef
 
 
 
74c5fd4
 
 
 
 
 
7ff8eef
9fc6811
 
 
7ff8eef
 
 
 
 
 
 
 
 
 
9fc6811
 
7ff8eef
74c5fd4
7ff8eef
 
74c5fd4
7ff8eef
b9ef820
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
74c5fd4
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
7ff8eef
 
9fc6811
7ff8eef
9fc6811
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
7ff8eef
 
 
 
 
 
 
 
 
 
9fc6811
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb71fb2
7ff8eef
 
 
 
 
 
bb71fb2
7ff8eef
 
 
 
bb71fb2
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
7ff8eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bb71fb2
7ff8eef
74c5fd4
 
b9ef820
74c5fd4
 
 
 
9fc6811
 
 
 
 
 
bb71fb2
 
 
 
 
 
 
 
 
 
 
 
 
 
9fc6811
 
7ff8eef
 
 
bb71fb2

import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download, snapshot_download
import os
import traceback
from pathlib import Path

# --- 1. Download and Cache the Database with Indices ---
print("Downloading ConceptNet database and indices...")
REPO_ID = "ysenarath/conceptnet-sqlite"
DB_FILENAME = "data/conceptnet-v5.7.0.db"
INDEX_FOLDER = "data/conceptnet-v5.7.0-index"
VOCAB_DB = "data/conceptnet-v5.7.0-vocab.db"

# Download the main database
DB_PATH = hf_hub_download(
    repo_id=REPO_ID, 
    filename=DB_FILENAME, 
    repo_type="dataset"
)
print(f"Main database downloaded to: {DB_PATH}")

# Download the vocabulary database (optional but helpful)
try:
    VOCAB_PATH = hf_hub_download(
        repo_id=REPO_ID,
        filename=VOCAB_DB,
        repo_type="dataset"
    )
    print(f"Vocabulary database downloaded to: {VOCAB_PATH}")
except Exception as e:
    print(f"Could not download vocabulary DB: {e}")
    VOCAB_PATH = None

# Download the entire index folder for better performance
try:
    # Use snapshot_download to get the entire data directory with indices
    CACHE_DIR = snapshot_download(
        repo_id=REPO_ID,
        repo_type="dataset",
        allow_patterns=["data/conceptnet-v5.7.0-index/*"]
    )
    INDEX_PATH = os.path.join(CACHE_DIR, INDEX_FOLDER)
    print(f"Index files downloaded to: {INDEX_PATH}")
    
    # Count index files
    if os.path.exists(INDEX_PATH):
        index_files = list(Path(INDEX_PATH).glob("*.ldb"))
        print(f"Found {len(index_files)} index files (.ldb)")
except Exception as e:
    print(f"Could not download index files: {e}")
    INDEX_PATH = None

# --- 2. Database Helper Functions ---

def get_db_connection():
    """
    Creates a new read-only connection to the SQLite database with optimizations.
    """
    try:
        db_uri = f"file:{DB_PATH}?mode=ro"
        conn = sqlite3.connect(db_uri, uri=True, check_same_thread=False)
        
        # Apply PRAGMA optimizations for read performance
        pragmas = [
            "PRAGMA query_only = ON",           # Read-only mode
            "PRAGMA temp_store = MEMORY",        # Use memory for temp tables
            "PRAGMA cache_size = -128000",       # 128MB cache (negative = KB)
            "PRAGMA page_size = 8192",           # Larger page size for better I/O
            "PRAGMA mmap_size = 2147483648",     # 2GB memory-mapped I/O
            "PRAGMA synchronous = OFF",          # Safe for read-only
            "PRAGMA journal_mode = OFF",         # No journal needed for read-only
            "PRAGMA locking_mode = NORMAL",      # Allow multiple readers
            "PRAGMA threads = 4",                # Use multiple threads
        ]
        
        for pragma in pragmas:
            try:
                conn.execute(pragma)
            except sqlite3.OperationalError as e:
                print(f"Warning: Could not apply {pragma}: {e}")
        
        return conn
    except Exception as e:
        print(f"Error connecting to DB: {e}")
        traceback.print_exc()
        return None

def verify_indices():
    """
    Check and report on database indices and their usage.
    """
    print("\n=== Database Index Analysis ===")
    try:
        with get_db_connection() as conn:
            cursor = conn.cursor()
            
            # Check all tables
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
            tables = cursor.fetchall()
            
            total_indices = 0
            for table in tables:
                table_name = table[0]
                
                # Get indices for this table
                cursor.execute(f"PRAGMA index_list({table_name})")
                indices = cursor.fetchall()
                
                if indices:
                    print(f"\n📊 Table: {table_name}")
                    for idx in indices:
                        idx_name, unique, origin = idx[1], idx[2], idx[3]
                        
                        # Get index details
                        cursor.execute(f"PRAGMA index_info({idx_name})")
                        idx_cols = cursor.fetchall()
                        
                        cols = [col[2] for col in idx_cols]
                        unique_str = "UNIQUE" if unique else "NON-UNIQUE"
                        print(f"  ├─ {idx_name} ({unique_str}) on columns: {', '.join(cols)}")
                        total_indices += 1
            
            print(f"\n✅ Total indices found: {total_indices}")
            
            # Check if FTS (Full Text Search) is available
            cursor.execute("SELECT * FROM pragma_compile_options WHERE compile_options LIKE '%FTS%'")
            fts = cursor.fetchall()
            if fts:
                print(f"✅ Full-Text Search enabled: {[f[0] for f in fts]}")
            
            # Check database page size and cache
            cursor.execute("PRAGMA page_size")
            page_size = cursor.fetchone()[0]
            cursor.execute("PRAGMA cache_size")
            cache_size = cursor.fetchone()[0]
            print(f"\n📈 Page size: {page_size} bytes")
            print(f"📈 Cache size: {abs(cache_size)} KB" if cache_size < 0 else f"📈 Cache size: {cache_size} pages")
            
            # Get database size
            cursor.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
            db_size = cursor.fetchone()[0]
            print(f"📦 Database size: {db_size / 1024 / 1024 / 1024:.2f} GB")
            
    except Exception as e:
        print(f"Error in verify_indices: {e}")
        traceback.print_exc()

def get_schema_info():
    """
    Dynamically queries the SQLite database to get its schema with index information.
    """
    print("Getting schema info...")
    schema_md = "# 📚 Database Schema\n\n"
    
    try:
        with get_db_connection() as conn:
            cursor = conn.cursor()
            
            # Get database stats
            cursor.execute("SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()")
            db_size = cursor.fetchone()[0]
            schema_md += f"**Database Size:** {db_size / 1024 / 1024 / 1024:.2f} GB\n\n"
            
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
            tables = cursor.fetchall()
            
            if not tables:
                return "Could not find any tables in the database."

            for table in tables:
                table_name = table[0]
                
                # Get row count
                try:
                    cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
                    row_count = cursor.fetchone()[0]
                    schema_md += f"## Table: `{table_name}` ({row_count:,} rows)\n\n"
                except:
                    schema_md += f"## Table: `{table_name}`\n\n"
                
                schema_md += "### Columns\n\n"
                schema_md += "| Column Name | Data Type | Not Null | Primary Key |\n"
                schema_md += "|:------------|:----------|:---------|:------------|\n"
                
                cursor.execute(f"PRAGMA table_info({table_name});")
                columns = cursor.fetchall()
                for col in columns:
                    name, dtype, notnull, pk = col[1], col[2], col[3], col[5]
                    schema_md += f"| `{name}` | `{dtype}` | {'✓' if notnull else '✗'} | {'✓' if pk else '✗'} |\n"
                
                # Show indices with details
                cursor.execute(f"PRAGMA index_list({table_name});")
                indices = cursor.fetchall()
                if indices:
                    schema_md += f"\n### Indices ({len(indices)})\n\n"
                    for idx in indices:
                        idx_name, unique, origin = idx[1], idx[2], idx[3]
                        
                        # Get indexed columns
                        cursor.execute(f"PRAGMA index_info({idx_name});")
                        idx_cols = cursor.fetchall()
                        cols = [col[2] for col in idx_cols if col[2]]
                        
                        unique_badge = "🔒 UNIQUE" if unique else "📑 INDEX"
                        schema_md += f"- **{idx_name}** {unique_badge}\n"
                        schema_md += f"  - Columns: `{', '.join(cols) if cols else 'N/A'}`\n"
                        schema_md += f"  - Origin: {origin}\n"
                
                schema_md += "\n---\n\n"
            
            return schema_md
    except Exception as e:
        print(f"Error in get_schema_info: {e}")
        traceback.print_exc()
        return f"An error occurred while fetching schema: {e}"

def run_query(start_node, relation, end_node, limit):
    """
    OPTIMIZED: Uses direct JOINs with indexed columns for maximum performance.
    """
    print(f"Running query: start='{start_node}', rel='{relation}', end='{end_node}'")
    
    # Build query using indexed JOIN approach
    query = """
        SELECT
            e.id AS edge_id,
            s.id AS start_id_path,
            r.label AS relation_label,
            en.id AS end_id_path,
            e.weight,
            e.dataset,
            e.surface_text,
            s.label AS start_label_text,
            en.label AS end_label_text
        FROM edge AS e
        INNER JOIN relation AS r ON e.rel_id = r.id
        INNER JOIN node AS s ON e.start_id = s.id
        INNER JOIN node AS en ON e.end_id = en.id
    """
    
    where_conditions = []
    params = []
    
    try:
        # Build WHERE conditions leveraging indices
        if start_node:
            if "%" in start_node:
                where_conditions.append("s.id LIKE ?")
                params.append(start_node)
            else:
                # Exact match or prefix match
                where_conditions.append("s.id LIKE ?")
                params.append(f"%{start_node}%")
        
        if relation:
            if "%" in relation:
                where_conditions.append("r.label LIKE ?")
                params.append(relation)
            else:
                # Exact match is faster
                where_conditions.append("r.label = ?")
                params.append(relation)

        if end_node:
            if "%" in end_node:
                where_conditions.append("en.id LIKE ?")
                params.append(end_node)
            else:
                where_conditions.append("en.id LIKE ?")
                params.append(f"%{end_node}%")

        if where_conditions:
            query += " WHERE " + " AND ".join(where_conditions)
            
        # Order by weight to get most relevant results first
        query += " ORDER BY e.weight DESC LIMIT ?"
        params.append(limit)
        
        print(f"Executing SQL with {len(params)} parameters")

        with get_db_connection() as conn:
            # Use EXPLAIN QUERY PLAN to verify index usage (for debugging)
            explain_query = "EXPLAIN QUERY PLAN " + query
            try:
                explain_result = conn.execute(explain_query, params).fetchall()
                print("Query Plan:")
                for row in explain_result:
                    print(f"  {row}")
            except:
                pass
            
            # Execute actual query
            df = pd.read_sql_query(query, conn, params=params)
        
        if df.empty:
            return pd.DataFrame(), "Query ran successfully but returned no results. Try broader search terms or check spelling."
            
        return df, f"✅ Query successful! Found {len(df)} results (ordered by relevance)."

    except Exception as e:
        print(f"Error in run_query: {e}")
        traceback.print_exc()
        err_msg = f"**❌ Query Failed!**\n\n```\n{e}\n```"
        return pd.DataFrame(), err_msg

def run_raw_query(sql_query):
    """
    Executes a raw, read-only SQL query with query plan analysis.
    """
    print(f"Running raw query: {sql_query[:100]}...")
    
    if not sql_query.strip().upper().startswith("SELECT"):
        return pd.DataFrame(), "**Error:** Only `SELECT` statements are allowed for safety."
        
    try:
        with get_db_connection() as conn:
            # Show query plan
            try:
                explain_result = conn.execute("EXPLAIN QUERY PLAN " + sql_query).fetchall()
                print("Query Plan:")
                for row in explain_result:
                    print(f"  {row}")
            except:
                pass
            
            df = pd.read_sql_query(sql_query, conn)
        
        if df.empty:
            return df, "Query ran successfully but returned no results."
        
        return df, f"✅ Query successful! Returned {len(df)} rows."
    except Exception as e:
        print(f"Error in run_raw_query: {e}")
        traceback.print_exc()
        return pd.DataFrame(), f"**❌ Query Failed!**\n\n```\n{e}\n```"

def get_semantic_profile(word, lang='en'):
    """
    HIGHLY OPTIMIZED: Single query with UNION ALL for all relations at once.
    Uses indexed columns for maximum speed.
    """
    if not word:
        return "⚠️ Please enter a word."
    
    word = word.strip().lower().replace(' ', '_')
    like_path = f"/c/{lang}/{word}%"
    print(f"Getting semantic profile for: {like_path}")

    # Most important relations for semantic understanding
    relations_to_check = [
        "/r/IsA", "/r/PartOf", "/r/HasA", "/r/UsedFor", "/r/CapableOf",
        "/r/Causes", "/r/HasProperty", "/r/Synonym", "/r/Antonym", 
        "/r/AtLocation", "/r/RelatedTo", "/r/DerivedFrom"
    ]
    
    output_md = f"# 🧠 Semantic Profile: '{word}'\n"
    output_md += f"**Language:** {lang.upper()} | **Search Pattern:** `{like_path}`\n\n"
    
    try:
        with get_db_connection() as conn:
            # MEGA-OPTIMIZED: Single UNION ALL query for all relations
            union_parts = []
            union_params = []
            
            for rel in relations_to_check:
                # Outgoing edges (word as subject)
                union_parts.append("""
                    SELECT 
                        ? as rel_label,
                        'out' as dir,
                        en.id as target_id,
                        en.label as target_label,
                        e.weight as weight
                    FROM edge e
                    INDEXED BY (SELECT name FROM pragma_index_list('edge') LIMIT 1)
                    INNER JOIN node s ON e.start_id = s.id
                    INNER JOIN node en ON e.end_id = en.id
                    INNER JOIN relation r ON e.rel_id = r.id
                    WHERE s.id LIKE ? AND r.label = ?
                    ORDER BY e.weight DESC
                    LIMIT 7
                """)
                union_params.extend([rel, like_path, rel])
                
                # Incoming edges (word as object)
                union_parts.append("""
                    SELECT 
                        ? as rel_label,
                        'in' as dir,
                        s.id as target_id,
                        s.label as target_label,
                        e.weight as weight
                    FROM edge e
                    INNER JOIN node s ON e.start_id = s.id
                    INNER JOIN node en ON e.end_id = en.id
                    INNER JOIN relation r ON e.rel_id = r.id
                    WHERE en.id LIKE ? AND r.label = ?
                    ORDER BY e.weight DESC
                    LIMIT 7
                """)
                union_params.extend([rel, like_path, rel])
            
            # Execute the mega-query
            full_query = " UNION ALL ".join(union_parts)
            
            print(f"Executing optimized semantic profile query...")
            cursor = conn.execute(full_query, union_params)
            results = cursor.fetchall()
            
            if not results:
                return f"""# 🧠 Semantic Profile: '{word}'

⚠️ **No results found**

This could mean:
1. The word isn't in ConceptNet for language `{lang}`
2. Try checking spelling: `{word}`
3. Try language code 'en' for English
4. Try a more common/simpler word

**Tip:** Use the Query Builder to search manually."""
            
            # Group and format results
            current_rel = None
            rel_results = []
            total_relations = 0
            
            for rel_label, direction, target_id, target_label, weight in results:
                if rel_label != current_rel:
                    if current_rel is not None:
                        # Write previous relation
                        output_md += f"## {current_rel}\n\n"
                        if rel_results:
                            for line in rel_results:
                                output_md += line
                            total_relations += len(rel_results)
                        else:
                            output_md += "*No results*\n"
                        output_md += "\n"
                    
                    current_rel = rel_label
                    rel_results = []
                
                # Format output
                weight_str = f"{weight:.3f}"
                if direction == 'out':
                    rel_results.append(
                        f"- **{word}** {rel_label} → *{target_label}* "
                        f"`[{weight_str}]`\n"
                    )
                else:
                    rel_results.append(
                        f"- *{target_label}* {rel_label} → **{word}** "
                        f"`[{weight_str}]`\n"
                    )
            
            # Write last relation
            if current_rel is not None:
                output_md += f"## {current_rel}\n\n"
                if rel_results:
                    for line in rel_results:
                        output_md += line
                    total_relations += len(rel_results)
                else:
                    output_md += "*No results*\n"
                output_md += "\n"
            
            output_md += "---\n"
            output_md += f"**Total relations found:** {total_relations}\n"
            output_md += f"*Weight indicates strength of association (higher = stronger)*\n"
        
        return output_md

    except Exception as e:
        print(f"Error in get_semantic_profile: {e}")
        traceback.print_exc()
        return f"**❌ An error occurred:**\n\n```\n{e}\n```"

# --- 3. Build the Gradio UI ---

# Verify indices on startup
verify_indices()

with gr.Blocks(title="ConceptNet SQLite Explorer", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🧠 ConceptNet SQLite Explorer")
    gr.Markdown(
        f"**Database:** `{os.path.basename(DB_PATH)}` ({os.path.getsize(DB_PATH) / 1024 / 1024 / 1024:.2f} GB) | "
        f"**Status:** {'✅ Indices Loaded' if INDEX_PATH and os.path.exists(INDEX_PATH) else '⚠️ No Index Cache'}"
    )
    gr.Markdown("*Explore semantic relationships in ConceptNet with optimized indexed queries*")
    
    with gr.Tabs():
        with gr.TabItem("🔍 Semantic Profile"):
            gr.Markdown(
                "**Get a comprehensive semantic profile for any word.**\n\n"
                "Queries common relations: IsA, HasA, UsedFor, CapableOf, Causes, HasProperty, and more.\n"
            )
            
            with gr.Row():
                with gr.Column(scale=2):
                    word_input = gr.Textbox(
                        label="Word", 
                        placeholder="dog",
                        info="Single word or phrase (use underscores for phrases)"
                    )
                with gr.Column(scale=1):
                    lang_input = gr.Textbox(
                        label="Language", 
                        value="en", 
                        placeholder="en",
                        info="ISO code (en, de, es, fr, ja, zh, etc.)"
                    )
            
            with gr.Row():
                semantic_btn = gr.Button("🔍 Get Semantic Profile", variant="primary", size="lg")
                
            with gr.Accordion("📚 Example Words", open=False):
                gr.Markdown(
                    "**English (en):** dog, cat, computer, love, happiness, run\n\n"
                    "**German (de):** hund, katze, liebe, glück\n\n"
                    "**Spanish (es):** perro, gato, amor, felicidad\n\n"
                    "**French (fr):** chien, chat, amour, bonheur\n\n"
                    "**Japanese (ja):** 犬, 猫, 愛, 幸せ"
                )
            
            semantic_output = gr.Markdown("*Click 'Get Semantic Profile' to start...*")
            
        with gr.TabItem("⚡ Query Builder"):
            gr.Markdown(
                "**Build custom queries using ConceptNet's graph structure.**\n\n"
                "Find edges connecting concepts through specific relations. Leverages database indices for fast lookups."
            )
            
            with gr.Row():
                start_input = gr.Textbox(
                    label="Start Node", 
                    placeholder="dog (or /c/en/dog)",
                    info="Leave empty for any"
                )
                rel_input = gr.Textbox(
                    label="Relation", 
                    placeholder="IsA (or /r/IsA)",
                    info="Leave empty for any"
                )
                end_input = gr.Textbox(
                    label="End Node", 
                    placeholder="animal (or /c/en/animal)",
                    info="Leave empty for any"
                )
            
            limit_slider = gr.Slider(
                label="Results Limit", 
                minimum=1, 
                maximum=500, 
                value=50, 
                step=1,
                info="Higher limits may be slower"
            )
            
            query_btn = gr.Button("▶️ Run Query", variant="primary", size="lg")
            
            with gr.Accordion("💡 Query Tips & Examples", open=False):
                gr.Markdown(
                    "**Tips:**\n"
                    "- Omit `/c/en/` prefix - it's added automatically\n"
                    "- Use `%` as wildcard: `%dog%` matches 'hotdog', 'doghouse'\n"
                    "- More specific = faster queries\n\n"
                    "**Examples:**\n"
                    "1. What can dogs do? → Start: `dog`, Relation: `CapableOf`, End: *empty*\n"
                    "2. What is a dog? → Start: `dog`, Relation: `IsA`, End: *empty*\n"
                    "3. Things at home → Start: *empty*, Relation: `AtLocation`, End: `home`\n"
                    "4. Synonyms of happy → Start: `happy`, Relation: `Synonym`, End: *empty*"
                )
            
            status_output = gr.Markdown("*Ready to query...*")
            results_output = gr.DataFrame(label="Query Results", interactive=False, wrap=True)
        
        with gr.TabItem("💻 Raw SQL"):
            gr.Markdown(
                "**Advanced:** Execute custom SQL queries.\n\n"
                "⚠️ Only `SELECT` statements allowed. Check Schema tab for table structure."
            )
            
            raw_sql_input = gr.Textbox(
                label="SQL Query", 
                placeholder="SELECT s.label, r.label, e.label FROM edge e JOIN node s ON e.start_id = s.id JOIN relation r ON e.rel_id = r.id JOIN node e ON e.end_id = e.id WHERE s.label = 'dog' LIMIT 10",
                lines=6,
                info="Write SELECT query"
            )
            
            with gr.Accordion("📋 Useful Queries", open=False):
                gr.Markdown(
                    "**Count edges by relation:**\n"
                    "```sql\n"
                    "SELECT r.label, COUNT(*) as count \n"
                    "FROM edge e \n"
                    "JOIN relation r ON e.rel_id = r.id \n"
                    "GROUP BY r.label \n"
                    "ORDER BY count DESC\n"
                    "```\n\n"
                    "**Find strongest connections:**\n"
                    "```sql\n"
                    "SELECT s.label, r.label, e.label, edge.weight\n"
                    "FROM edge \n"
                    "JOIN node s ON edge.start_id = s.id\n"
                    "JOIN relation r ON edge.rel_id = r.id\n"
                    "JOIN node e ON edge.end_id = e.id\n"
                    "ORDER BY weight DESC LIMIT 20\n"
                    "```\n\n"
                    "**Check index usage:**\n"
                    "```sql\n"
                    "EXPLAIN QUERY PLAN\n"
                    "SELECT * FROM edge WHERE start_id = '/c/en/dog'\n"
                    "```"
                )
            
            raw_query_btn = gr.Button("▶️ Execute SQL", variant="secondary", size="lg")
            raw_status_output = gr.Markdown("*Ready...*")
            raw_results_output = gr.DataFrame(label="Results", interactive=False, wrap=True)

        with gr.TabItem("📊 Schema & Indices"):
            gr.Markdown(
                "**Database structure, indices, and optimization info.**\n\n"
                "View tables, columns, and index configuration."
            )
            
            schema_btn = gr.Button("📊 Load Schema", variant="secondary", size="lg")
            schema_output = gr.Markdown("*Click button to load schema...*")
            
    gr.Markdown("---")
    gr.Markdown(
        "💡 **Performance:** Queries use database indices for fast lookups. "
        "Exact matches are faster than wildcards. "
        f"{'✅ Index files loaded from HuggingFace cache.' if INDEX_PATH else '⚠️ Running without index cache - queries may be slower.'}"
    )
            
    # Connect UI to functions
    semantic_btn.click(
        fn=get_semantic_profile,
        inputs=[word_input, lang_input],
        outputs=[semantic_output],
        api_name="get_semantic_profile"
    )
    
    query_btn.click(
        fn=run_query,
        inputs=[start_input, rel_input, end_input, limit_slider],
        outputs=[results_output, status_output],
        api_name="run_query"
    )
    
    raw_query_btn.click(
        fn=run_raw_query,
        inputs=[raw_sql_input],
        outputs=[raw_results_output, raw_status_output],
        api_name="run_raw_query"
    )

    schema_btn.click(
        fn=get_schema_info,
        inputs=None,
        outputs=schema_output,
        api_name="get_schema"
    )

if __name__ == "__main__":
    print("\n" + "="*50)
    print("🚀 Starting ConceptNet SQLite Explorer")
    print("="*50 + "\n")
    demo.launch(ssr_mode=False)