Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ import json
|
|
| 13 |
from typing import Optional, Dict, List, Any
|
| 14 |
import uvicorn
|
| 15 |
import traceback
|
| 16 |
-
from pathlib import Path # <-- FIX 1:
|
| 17 |
|
| 18 |
# ============================================================================
|
| 19 |
# 2. SHARED GLOBALS & CONFIGURATION
|
|
@@ -33,51 +33,48 @@ DB_PATH: Optional[str] = None # Will be set by setup_database()
|
|
| 33 |
# --- HF Token ---
|
| 34 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
|
| 35 |
|
| 36 |
-
# ---
|
| 37 |
-
# We use this to populate dropdowns and guide the semantic profile
|
| 38 |
CONCEPTNET_RELATIONS: Dict[str, str] = {
|
| 39 |
-
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
-
"
|
| 43 |
-
"
|
| 44 |
-
"
|
| 45 |
-
"
|
| 46 |
-
"
|
| 47 |
-
"
|
| 48 |
-
"
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
-
"
|
| 57 |
-
"
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
-
"
|
| 61 |
-
"
|
| 62 |
-
"
|
| 63 |
-
"
|
| 64 |
-
"
|
| 65 |
-
"
|
| 66 |
-
"
|
| 67 |
-
"
|
| 68 |
-
"
|
| 69 |
-
"
|
| 70 |
-
"
|
| 71 |
-
"
|
| 72 |
-
"
|
| 73 |
-
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
-
"NotCapableOf": "/r/NotCapableOf",
|
| 77 |
-
"NotHasProperty": "/r/NotHasProperty",
|
| 78 |
}
|
| 79 |
|
| 80 |
-
# Sorted list of (Label,
|
| 81 |
RELATION_CHOICES = sorted(CONCEPTNET_RELATIONS.items())
|
| 82 |
|
| 83 |
# ============================================================================
|
|
@@ -130,7 +127,7 @@ def setup_database():
|
|
| 130 |
log_progress("Attempting to load indexed database...", "INFO")
|
| 131 |
|
| 132 |
# Check if we already have it locally
|
| 133 |
-
local_path = Path(INDEXED_DB_FILENAME)
|
| 134 |
if local_path.exists() and local_path.stat().st_size > 1000000:
|
| 135 |
log_progress(f"Found existing local DB: {local_path.resolve()}", "SUCCESS")
|
| 136 |
DB_PATH = str(local_path.resolve())
|
|
@@ -164,8 +161,6 @@ def get_db_connection() -> sqlite3.Connection:
|
|
| 164 |
if DB_PATH is None:
|
| 165 |
raise ConnectionError("Database path is not set. Call setup_database() first.")
|
| 166 |
|
| 167 |
-
# check_same_thread=False is generally safe for read-only operations
|
| 168 |
-
# in a multi-threaded server like FastAPI/uvicorn.
|
| 169 |
conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, check_same_thread=False)
|
| 170 |
|
| 171 |
# Performance tuning for read-only connections
|
|
@@ -179,10 +174,8 @@ def get_db_connection() -> sqlite3.Connection:
|
|
| 179 |
|
| 180 |
# ============================================================================
|
| 181 |
# 4. API (FASTAPI) ENDPOINTS
|
| 182 |
-
# (These functions return JSON data and are called by the API)
|
| 183 |
# ============================================================================
|
| 184 |
|
| 185 |
-
# --- Create FastAPI app FIRST ---
|
| 186 |
app = FastAPI(
|
| 187 |
title="ConceptNet Explorer API",
|
| 188 |
version="1.0",
|
|
@@ -203,7 +196,7 @@ def api_docs():
|
|
| 203 |
},
|
| 204 |
"examples": {
|
| 205 |
"profile": "/api/profile/dog?lang=en&limit=10",
|
| 206 |
-
"query": "/api/query?start_node=dog&relation_uri
|
| 207 |
},
|
| 208 |
"note": "Visit the root path (/) for the Gradio UI."
|
| 209 |
}
|
|
@@ -212,7 +205,6 @@ def api_docs():
|
|
| 212 |
def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
|
| 213 |
"""
|
| 214 |
API Endpoint: Get a full semantic profile for a word as JSON.
|
| 215 |
-
Queries all 34 relation types.
|
| 216 |
"""
|
| 217 |
if not word or lang not in TARGET_LANGUAGES:
|
| 218 |
return JSONResponse(
|
|
@@ -245,7 +237,7 @@ def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
|
|
| 245 |
)
|
| 246 |
|
| 247 |
# 2. Query all relations
|
| 248 |
-
for rel_name, rel_uri in RELATION_CHOICES:
|
| 249 |
outgoing = []
|
| 250 |
incoming = []
|
| 251 |
|
|
@@ -257,7 +249,7 @@ def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
|
|
| 257 |
WHERE e.start_id LIKE ? AND e.rel_id = ?
|
| 258 |
ORDER BY e.weight DESC
|
| 259 |
LIMIT ?
|
| 260 |
-
""", (like_path, rel_uri, limit))
|
| 261 |
|
| 262 |
outgoing = [{"target_label": label, "weight": weight, "target_id": eid}
|
| 263 |
for label, weight, eid in cursor.fetchall()]
|
|
@@ -270,7 +262,7 @@ def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
|
|
| 270 |
WHERE e.end_id LIKE ? AND e.rel_id = ?
|
| 271 |
ORDER BY e.weight DESC
|
| 272 |
LIMIT ?
|
| 273 |
-
""", (like_path, rel_uri, limit))
|
| 274 |
|
| 275 |
incoming = [{"source_label": label, "weight": weight, "source_id": sid}
|
| 276 |
for label, weight, sid in cursor.fetchall()]
|
|
@@ -301,18 +293,11 @@ def query_edges_json(
|
|
| 301 |
):
|
| 302 |
"""
|
| 303 |
API Endpoint: Query edges with flexible filters.
|
| 304 |
-
Nodes (start/end) can be partial words (e.g., 'dog') or full URIs.
|
| 305 |
"""
|
| 306 |
query = """
|
| 307 |
SELECT
|
| 308 |
-
e.id as edge_id,
|
| 309 |
-
s.
|
| 310 |
-
r.id as relation_id,
|
| 311 |
-
en.id as end_id,
|
| 312 |
-
e.weight,
|
| 313 |
-
s.label as start_label,
|
| 314 |
-
r.label as relation_label,
|
| 315 |
-
en.label as end_label
|
| 316 |
FROM edge e
|
| 317 |
JOIN relation r ON e.rel_id = r.id
|
| 318 |
JOIN node s ON e.start_id = s.id
|
|
@@ -325,7 +310,6 @@ def query_edges_json(
|
|
| 325 |
def build_node_pattern(node_str: str) -> str:
|
| 326 |
if node_str.startswith(f'{CONCEPTNET_BASE_URI}/c/'):
|
| 327 |
return f"{node_str}%"
|
| 328 |
-
# Assume it's a plain word
|
| 329 |
return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
|
| 330 |
|
| 331 |
with get_db_connection() as conn:
|
|
@@ -334,8 +318,8 @@ def query_edges_json(
|
|
| 334 |
params.append(build_node_pattern(start_node))
|
| 335 |
|
| 336 |
if relation_uri:
|
| 337 |
-
#
|
| 338 |
-
query += " AND r.id = ?"
|
| 339 |
params.append(relation_uri)
|
| 340 |
|
| 341 |
if end_node:
|
|
@@ -370,7 +354,6 @@ def api_languages():
|
|
| 370 |
|
| 371 |
# ============================================================================
|
| 372 |
# 5. GRADIO UI HELPER FUNCTIONS
|
| 373 |
-
# (These functions are called by Gradio button clicks and return UI components)
|
| 374 |
# ============================================================================
|
| 375 |
|
| 376 |
def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
|
|
@@ -394,7 +377,6 @@ def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
|
|
| 394 |
cursor = conn.cursor()
|
| 395 |
|
| 396 |
progress(0.05, desc="📍 Finding nodes...")
|
| 397 |
-
|
| 398 |
cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
|
| 399 |
nodes = cursor.fetchall()
|
| 400 |
|
|
@@ -412,27 +394,23 @@ def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
|
|
| 412 |
num_relations = len(RELATION_CHOICES)
|
| 413 |
|
| 414 |
# Use the FULL list of relations
|
| 415 |
-
for i, (rel_name, rel_uri) in enumerate(RELATION_CHOICES):
|
| 416 |
progress((i + 0.1) / num_relations, desc=f"🔎 {rel_name}...")
|
| 417 |
|
| 418 |
cursor.execute("""
|
| 419 |
SELECT en.label, e.weight
|
| 420 |
-
FROM edge e
|
| 421 |
-
JOIN node en ON e.end_id = en.id
|
| 422 |
WHERE e.start_id LIKE ? AND e.rel_id = ?
|
| 423 |
-
ORDER BY e.weight DESC
|
| 424 |
-
|
| 425 |
-
""", (like_path, rel_uri))
|
| 426 |
outgoing = cursor.fetchall()
|
| 427 |
|
| 428 |
cursor.execute("""
|
| 429 |
SELECT s.label, e.weight
|
| 430 |
-
FROM edge e
|
| 431 |
-
JOIN node s ON e.start_id = s.id
|
| 432 |
WHERE e.end_id LIKE ? AND e.rel_id = ?
|
| 433 |
-
ORDER BY e.weight DESC
|
| 434 |
-
|
| 435 |
-
""", (like_path, rel_uri))
|
| 436 |
incoming = cursor.fetchall()
|
| 437 |
|
| 438 |
if outgoing or incoming:
|
|
@@ -450,7 +428,6 @@ def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
|
|
| 450 |
progress((i + 1) / num_relations, desc=f"✓ {rel_name}")
|
| 451 |
|
| 452 |
progress(1.0, desc="✅ Complete!")
|
| 453 |
-
|
| 454 |
output_md += f"---\n**Total relations found:** {total_found}\n"
|
| 455 |
log_progress(f"Complete: {total_found} relations", "SUCCESS")
|
| 456 |
|
|
@@ -471,14 +448,8 @@ def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, l
|
|
| 471 |
|
| 472 |
query = """
|
| 473 |
SELECT
|
| 474 |
-
s.label as start_label,
|
| 475 |
-
r.
|
| 476 |
-
en.label as end_label,
|
| 477 |
-
e.weight,
|
| 478 |
-
s.id as start_id,
|
| 479 |
-
r.id as relation_id,
|
| 480 |
-
en.id as end_id,
|
| 481 |
-
e.id as edge_id
|
| 482 |
FROM edge e
|
| 483 |
JOIN relation r ON e.rel_id = r.id
|
| 484 |
JOIN node s ON e.start_id = s.id
|
|
@@ -491,7 +462,6 @@ def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, l
|
|
| 491 |
def build_node_pattern(node_str: str) -> str:
|
| 492 |
if node_str.strip().startswith(f'{CONCEPTNET_BASE_URI}/c/'):
|
| 493 |
return f"{node_str.strip()}%"
|
| 494 |
-
# Assume it's a plain word
|
| 495 |
return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
|
| 496 |
|
| 497 |
with get_db_connection() as conn:
|
|
@@ -502,6 +472,7 @@ def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, l
|
|
| 502 |
params.append(build_node_pattern(start_node))
|
| 503 |
|
| 504 |
if relation_uri and relation_uri.strip():
|
|
|
|
| 505 |
query += " AND r.id = ?"
|
| 506 |
params.append(relation_uri)
|
| 507 |
|
|
@@ -525,7 +496,6 @@ def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, l
|
|
| 525 |
if df.empty:
|
| 526 |
return pd.DataFrame(), f"⚠️ No results found ({elapsed:.2f}s)"
|
| 527 |
|
| 528 |
-
# Reorder columns for better display
|
| 529 |
cols_to_show = [
|
| 530 |
'start_label', 'relation_label', 'end_label', 'weight',
|
| 531 |
'start_id', 'relation_id', 'end_id'
|
|
@@ -565,6 +535,7 @@ def get_schema_info_ui():
|
|
| 565 |
md += f"**Database File:** `{DB_PATH}`\n\n"
|
| 566 |
else:
|
| 567 |
md += "**Database File:** `NOT LOADED`\n\n"
|
|
|
|
| 568 |
return md
|
| 569 |
|
| 570 |
try:
|
|
@@ -581,7 +552,7 @@ def get_schema_info_ui():
|
|
| 581 |
md += f"- **{table}:** {count:,} rows\n"
|
| 582 |
|
| 583 |
md += "\n## Configured Relations\n\n"
|
| 584 |
-
md += "This list
|
| 585 |
for name, uri in RELATION_CHOICES:
|
| 586 |
md += f"- **{name}:** `{uri}`\n"
|
| 587 |
|
|
@@ -597,7 +568,8 @@ def get_schema_info_ui():
|
|
| 597 |
def create_gradio_ui():
|
| 598 |
"""Builds the consolidated Gradio interface."""
|
| 599 |
|
| 600 |
-
|
|
|
|
| 601 |
gr.Markdown(
|
| 602 |
"# 🧠 ConceptNet Explorer\n"
|
| 603 |
"An interface for querying the ConceptNet semantic network."
|
|
@@ -613,10 +585,15 @@ def create_gradio_ui():
|
|
| 613 |
gr.Markdown("Explore all semantic relations for a single word. This queries all 34 relation types.")
|
| 614 |
|
| 615 |
with gr.Row():
|
| 616 |
-
profile_word_input = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
profile_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language")
|
| 618 |
|
| 619 |
-
profile_btn = gr.Button("🔍 Get Semantic Profile", variant="primary")
|
| 620 |
profile_output = gr.Markdown(label="Profile Results")
|
| 621 |
|
| 622 |
gr.Examples(
|
|
@@ -637,148 +614,8 @@ def create_gradio_ui():
|
|
| 637 |
placeholder="e.g., dog (word) or /c/en/dog (URI)"
|
| 638 |
)
|
| 639 |
|
| 640 |
-
# --- THIS IS THE KEY UI IMPROVEMENT ---
|
| 641 |
query_rel_input = gr.Dropdown(
|
| 642 |
label="Relation",
|
| 643 |
-
choices=RELATION_CHOICES, #
|
| 644 |
-
value="/r/IsA" # Default
|
| 645 |
-
)
|
| 646 |
-
|
| 647 |
-
query_end_input = gr.Textbox(
|
| 648 |
-
label="End Node",
|
| 649 |
-
placeholder="e.g., animal (word) or /c/en/animal (URI)"
|
| 650 |
-
)
|
| 651 |
-
|
| 652 |
-
query_btn = gr.Button("▶️ Run Query", variant="primary")
|
| 653 |
-
|
| 654 |
-
query_status_output = gr.Markdown()
|
| 655 |
-
query_results_output = gr.DataFrame(
|
| 656 |
-
label="Query Results",
|
| 657 |
-
wrap=True,
|
| 658 |
-
interactive=False
|
| 659 |
-
)
|
| 660 |
-
|
| 661 |
-
with gr.TabItem("💻 Raw SQL"):
|
| 662 |
-
gr.Markdown(
|
| 663 |
-
"**Warning:** Directly query the SQLite database. Only `SELECT` statements are allowed. "
|
| 664 |
-
"Use the 'Schema' tab to see table names."
|
| 665 |
-
)
|
| 666 |
-
raw_sql_input = gr.Textbox(
|
| 667 |
-
label="SQL Query",
|
| 668 |
-
value=f"SELECT s.label, r.label, en.label, e.weight\nFROM edge e\nJOIN relation r ON e.rel_id = r.id\nJOIN node s ON e.start_id = s.id\nJOIN node en ON e.end_id = en.id\nWHERE s.id LIKE '{CONCEPTNET_BASE_URI}/c/en/dog%'\n AND r.id = '/r/IsA'\nORDER BY e.weight DESC\nLIMIT 10",
|
| 669 |
-
lines=5,
|
| 670 |
-
max_lines=20
|
| 671 |
-
)
|
| 672 |
-
raw_btn = gr.Button("▶️ Execute SQL")
|
| 673 |
-
raw_status = gr.Markdown()
|
| 674 |
-
raw_results = gr.DataFrame(label="SQL Results", interactive=False)
|
| 675 |
-
|
| 676 |
-
with gr.TabItem("📊 Schema") as schema_tab: # <-- FIX 2: Added variable 'schema_tab'
|
| 677 |
-
gr.Markdown("View the database schema and table counts.")
|
| 678 |
-
schema_btn = gr.Button("📊 Load Schema Info")
|
| 679 |
-
schema_output = gr.Markdown()
|
| 680 |
-
|
| 681 |
-
with gr.TabItem("🔌 API Docs"):
|
| 682 |
-
gr.Markdown(
|
| 683 |
-
"## JSON API Endpoints\n"
|
| 684 |
-
"This Gradio app is mounted on a FastAPI server. You can use the following JSON API endpoints directly."
|
| 685 |
-
)
|
| 686 |
-
gr.JSON({
|
| 687 |
-
"docs": "/api",
|
| 688 |
-
"profile": "/api/profile/{word}?lang=en&limit=10",
|
| 689 |
-
"query": "/api/query?start_node=dog&relation_uri=/r/IsA&lang=en&limit=50",
|
| 690 |
-
"relations": "/api/relations",
|
| 691 |
-
"languages": "/api/languages"
|
| 692 |
-
}, label="API Endpoints")
|
| 693 |
-
gr.Markdown(
|
| 694 |
-
"### Example (cURL)\n"
|
| 695 |
-
"```bash\n# (Assumes app is running at localhost:7860)\ncurl http://localhost:7860/api/profile/dog?lang=en\n```\n"
|
| 696 |
-
"```bash\ncurl 'http://localhost:7860/api/query?start_node=dog&relation_uri=/r/IsA&limit=10'\n```"
|
| 697 |
-
)
|
| 698 |
-
|
| 699 |
-
# --- Link UI components to functions ---
|
| 700 |
-
profile_btn.click(
|
| 701 |
-
fn=get_semantic_profile_ui,
|
| 702 |
-
inputs=[profile_word_input, profile_lang_input],
|
| 703 |
-
outputs=[profile_output],
|
| 704 |
-
api_name="get_semantic_profile" # Gradio-native API
|
| 705 |
-
)
|
| 706 |
-
|
| 707 |
-
query_btn.click(
|
| 708 |
-
fn=run_query_ui,
|
| 709 |
-
inputs=[query_start_input, query_rel_input, query_end_input, query_lang_input, query_limit_slider],
|
| 710 |
-
outputs=[query_results_output, query_status_output],
|
| 711 |
-
api_name="run_query" # Gradio-native API
|
| 712 |
-
)
|
| 713 |
-
|
| 714 |
-
raw_btn.click(
|
| 715 |
-
fn=run_raw_query_ui,
|
| 716 |
-
inputs=[raw_sql_input],
|
| 717 |
-
outputs=[raw_results, raw_status]
|
| 718 |
-
)
|
| 719 |
-
|
| 720 |
-
# --- FIX 2: Use schema_tab.select() to load data when the tab is clicked ---
|
| 721 |
-
schema_tab.select(
|
| 722 |
-
fn=get_schema_info_ui,
|
| 723 |
-
inputs=None,
|
| 724 |
-
outputs=[schema_output]
|
| 725 |
-
)
|
| 726 |
-
# We can also keep the button for a manual refresh
|
| 727 |
-
schema_btn.click(
|
| 728 |
-
fn=get_schema_info_ui,
|
| 729 |
-
inputs=None,
|
| 730 |
-
outputs=[schema_output]
|
| 731 |
-
)
|
| 732 |
-
|
| 733 |
-
return demo
|
| 734 |
-
|
| 735 |
-
# ============================================================================
|
| 736 |
-
# 7. APP MOUNTING & LAUNCH
|
| 737 |
-
# ============================================================================
|
| 738 |
-
|
| 739 |
-
# --- 1. Setup the Database (Download from HF Hub) ---
|
| 740 |
-
# This runs *before* the UI is created
|
| 741 |
-
try:
|
| 742 |
-
if not setup_database():
|
| 743 |
-
print("\n" + "="*70)
|
| 744 |
-
print("❌ CRITICAL ERROR: Could not set up the database.")
|
| 745 |
-
print(f" Please check your connection or manually download '{INDEXED_DB_FILENAME}'")
|
| 746 |
-
print(f" from '{INDEXED_REPO_ID}' and place it in this directory.")
|
| 747 |
-
print("="*70 + "\n")
|
| 748 |
-
# We don't exit, Gradio will just show errors
|
| 749 |
-
else:
|
| 750 |
-
print(f"✅ Database is ready at: {DB_PATH}")
|
| 751 |
-
except Exception as e:
|
| 752 |
-
print(f"❌ CRITICAL ERROR during database setup: {e}")
|
| 753 |
-
traceback.print_exc()
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
# --- 2. Create the Gradio UI ---
|
| 757 |
-
log_progress("Creating Gradio UI...", "INFO")
|
| 758 |
-
demo = create_gradio_ui()
|
| 759 |
-
|
| 760 |
-
# --- 3. Mount Gradio onto the FastAPI app ---
|
| 761 |
-
# This combines FastAPI (at /api/*) and Gradio (at /)
|
| 762 |
-
log_progress("Mounting Gradio UI onto FastAPI app...", "INFO")
|
| 763 |
-
app = gr.mount_gradio_app(app, demo, path="/")
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
# ============================================================================
|
| 767 |
-
# 8. MAIN EXECUTION BLOCK
|
| 768 |
-
# ============================================================================
|
| 769 |
-
|
| 770 |
-
if __name__ == "__main__":
|
| 771 |
-
log_progress("="*60, "SUCCESS")
|
| 772 |
-
log_progress("🚀 CONCEPTNET EXPLORER APP READY!", "SUCCESS")
|
| 773 |
-
log_progress("="*60, "SUCCESS")
|
| 774 |
-
log_progress("UI: http://localhost:7860/", "INFO")
|
| 775 |
-
log_progress("API: http://localhost:7860/api", "INFO")
|
| 776 |
-
log_progress(" http://localhost:7860/api/profile/dog", "INFO")
|
| 777 |
-
log_progress(" http://localhost:7860/api/query?start_node=dog&relation_uri=/r/IsA", "INFO")
|
| 778 |
-
log_progress("="*60, "SUCCESS")
|
| 779 |
-
|
| 780 |
-
uvicorn.run(
|
| 781 |
-
app,
|
| 782 |
-
host="0.0.0.0",
|
| 783 |
-
port=7860
|
| 784 |
-
)
|
|
|
|
| 13 |
from typing import Optional, Dict, List, Any
|
| 14 |
import uvicorn
|
| 15 |
import traceback
|
| 16 |
+
from pathlib import Path # <-- FIX 1: Imported Path
|
| 17 |
|
| 18 |
# ============================================================================
|
| 19 |
# 2. SHARED GLOBALS & CONFIGURATION
|
|
|
|
| 33 |
# --- HF Token ---
|
| 34 |
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
|
| 35 |
|
| 36 |
+
# --- LOGIC FIX: All relations MUST be full URLs, as per your example ---
|
|
|
|
| 37 |
CONCEPTNET_RELATIONS: Dict[str, str] = {
|
| 38 |
+
"RelatedTo": f"{CONCEPTNET_BASE_URI}/r/RelatedTo",
|
| 39 |
+
"IsA": f"{CONCEPTNET_BASE_URI}/r/IsA",
|
| 40 |
+
"PartOf": f"{CONCEPTNET_BASE_URI}/r/PartOf",
|
| 41 |
+
"HasA": f"{CONCEPTNET_BASE_URI}/r/HasA",
|
| 42 |
+
"UsedFor": f"{CONCEPTNET_BASE_URI}/r/UsedFor",
|
| 43 |
+
"CapableOf": f"{CONCEPTNET_BASE_URI}/r/CapableOf",
|
| 44 |
+
"AtLocation": f"{CONCEPTNET_BASE_URI}/r/AtLocation",
|
| 45 |
+
"Causes": f"{CONCEPTNET_BASE_URI}/r/Causes",
|
| 46 |
+
"HasSubevent": f"{CONCEPTNET_BASE_URI}/r/HasSubevent",
|
| 47 |
+
"HasFirstSubevent": f"{CONCEPTNET_BASE_URI}/r/HasFirstSubevent",
|
| 48 |
+
"HasLastSubevent": f"{CONCEPTNET_BASE_URI}/r/HasLastSubevent",
|
| 49 |
+
"HasPrerequisite": f"{CONCEPTNET_BASE_URI}/r/HasPrerequisite",
|
| 50 |
+
"HasProperty": f"{CONCEPTNET_BASE_URI}/r/HasProperty",
|
| 51 |
+
"MotivatedByGoal": f"{CONCEPTNET_BASE_URI}/r/MotivatedByGoal",
|
| 52 |
+
"ObstructedBy": f"{CONCEPTNET_BASE_URI}/r/ObstructedBy",
|
| 53 |
+
"Desires": f"{CONCEPTNET_BASE_URI}/r/Desires",
|
| 54 |
+
"CreatedBy": f"{CONCEPTNET_BASE_URI}/r/CreatedBy",
|
| 55 |
+
"Synonym": f"{CONCEPTNET_BASE_URI}/r/Synonym",
|
| 56 |
+
"Antonym": f"{CONCEPTNET_BASE_URI}/r/Antonym",
|
| 57 |
+
"DistinctFrom": f"{CONCEPTNET_BASE_URI}/r/DistinctFrom",
|
| 58 |
+
"DerivedFrom": f"{CONCEPTNET_BASE_URI}/r/DerivedFrom",
|
| 59 |
+
"SymbolOf": f"{CONCEPTNET_BASE_URI}/r/SymbolOf",
|
| 60 |
+
"DefinedAs": f"{CONCEPTNET_BASE_URI}/r/DefinedAs",
|
| 61 |
+
"MannerOf": f"{CONCEPTNET_BASE_URI}/r/MannerOf",
|
| 62 |
+
"LocatedNear": f"{CONCEPTNET_BASE_URI}/r/LocatedNear",
|
| 63 |
+
"HasContext": f"{CONCEPTNET_BASE_URI}/r/HasContext",
|
| 64 |
+
"SimilarTo": f"{CONCEPTNET_BASE_URI}/r/SimilarTo",
|
| 65 |
+
"EtymologicallyRelatedTo": f"{CONCEPTNET_BASE_URI}/r/EtymologicallyRelatedTo",
|
| 66 |
+
"EtymologicallyDerivedFrom": f"{CONCEPTNET_BASE_URI}/r/EtymologicallyDerivedFrom",
|
| 67 |
+
"CausesDesire": f"{CONCEPTNET_BASE_URI}/r/CausesDesire",
|
| 68 |
+
"MadeOf": f"{CONCEPTNET_BASE_URI}/r/MadeOf",
|
| 69 |
+
"ReceivesAction": f"{CONCEPTNET_BASE_URI}/r/ReceivesAction",
|
| 70 |
+
"ExternalURL": f"{CONCEPTNET_BASE_URI}/r/ExternalURL",
|
| 71 |
+
"NotDesires": f"{CONCEPTNET_BASE_URI}/r/NotDesires",
|
| 72 |
+
"NotUsedFor": f"{CONCEPTNET_BASE_URI}/r/NotUsedFor",
|
| 73 |
+
"NotCapableOf": f"{CONCEPTNET_BASE_URI}/r/NotCapableOf",
|
| 74 |
+
"NotHasProperty": f"{CONCEPTNET_BASE_URI}/r/NotHasProperty",
|
|
|
|
|
|
|
| 75 |
}
|
| 76 |
|
| 77 |
+
# Sorted list of (Label, Full_URL) tuples for Gradio dropdowns
|
| 78 |
RELATION_CHOICES = sorted(CONCEPTNET_RELATIONS.items())
|
| 79 |
|
| 80 |
# ============================================================================
|
|
|
|
| 127 |
log_progress("Attempting to load indexed database...", "INFO")
|
| 128 |
|
| 129 |
# Check if we already have it locally
|
| 130 |
+
local_path = Path(INDEXED_DB_FILENAME) # <-- FIX 1: 'Path' is now defined
|
| 131 |
if local_path.exists() and local_path.stat().st_size > 1000000:
|
| 132 |
log_progress(f"Found existing local DB: {local_path.resolve()}", "SUCCESS")
|
| 133 |
DB_PATH = str(local_path.resolve())
|
|
|
|
| 161 |
if DB_PATH is None:
|
| 162 |
raise ConnectionError("Database path is not set. Call setup_database() first.")
|
| 163 |
|
|
|
|
|
|
|
| 164 |
conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, check_same_thread=False)
|
| 165 |
|
| 166 |
# Performance tuning for read-only connections
|
|
|
|
| 174 |
|
| 175 |
# ============================================================================
|
| 176 |
# 4. API (FASTAPI) ENDPOINTS
|
|
|
|
| 177 |
# ============================================================================
|
| 178 |
|
|
|
|
| 179 |
app = FastAPI(
|
| 180 |
title="ConceptNet Explorer API",
|
| 181 |
version="1.0",
|
|
|
|
| 196 |
},
|
| 197 |
"examples": {
|
| 198 |
"profile": "/api/profile/dog?lang=en&limit=10",
|
| 199 |
+
"query": f"/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA&limit=20"
|
| 200 |
},
|
| 201 |
"note": "Visit the root path (/) for the Gradio UI."
|
| 202 |
}
|
|
|
|
| 205 |
def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
|
| 206 |
"""
|
| 207 |
API Endpoint: Get a full semantic profile for a word as JSON.
|
|
|
|
| 208 |
"""
|
| 209 |
if not word or lang not in TARGET_LANGUAGES:
|
| 210 |
return JSONResponse(
|
|
|
|
| 237 |
)
|
| 238 |
|
| 239 |
# 2. Query all relations
|
| 240 |
+
for rel_name, rel_uri in RELATION_CHOICES: # rel_uri is now a FULL URL
|
| 241 |
outgoing = []
|
| 242 |
incoming = []
|
| 243 |
|
|
|
|
| 249 |
WHERE e.start_id LIKE ? AND e.rel_id = ?
|
| 250 |
ORDER BY e.weight DESC
|
| 251 |
LIMIT ?
|
| 252 |
+
""", (like_path, rel_uri, limit)) # <-- LOGIC FIX: rel_uri is now correct
|
| 253 |
|
| 254 |
outgoing = [{"target_label": label, "weight": weight, "target_id": eid}
|
| 255 |
for label, weight, eid in cursor.fetchall()]
|
|
|
|
| 262 |
WHERE e.end_id LIKE ? AND e.rel_id = ?
|
| 263 |
ORDER BY e.weight DESC
|
| 264 |
LIMIT ?
|
| 265 |
+
""", (like_path, rel_uri, limit)) # <-- LOGIC FIX: rel_uri is now correct
|
| 266 |
|
| 267 |
incoming = [{"source_label": label, "weight": weight, "source_id": sid}
|
| 268 |
for label, weight, sid in cursor.fetchall()]
|
|
|
|
| 293 |
):
|
| 294 |
"""
|
| 295 |
API Endpoint: Query edges with flexible filters.
|
|
|
|
| 296 |
"""
|
| 297 |
query = """
|
| 298 |
SELECT
|
| 299 |
+
e.id as edge_id, s.id as start_id, r.id as relation_id, en.id as end_id,
|
| 300 |
+
e.weight, s.label as start_label, r.label as relation_label, en.label as end_label
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
FROM edge e
|
| 302 |
JOIN relation r ON e.rel_id = r.id
|
| 303 |
JOIN node s ON e.start_id = s.id
|
|
|
|
| 310 |
def build_node_pattern(node_str: str) -> str:
|
| 311 |
if node_str.startswith(f'{CONCEPTNET_BASE_URI}/c/'):
|
| 312 |
return f"{node_str}%"
|
|
|
|
| 313 |
return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
|
| 314 |
|
| 315 |
with get_db_connection() as conn:
|
|
|
|
| 318 |
params.append(build_node_pattern(start_node))
|
| 319 |
|
| 320 |
if relation_uri:
|
| 321 |
+
# LOGIC FIX: Pass the full URL directly
|
| 322 |
+
query += " AND r.id = ?"
|
| 323 |
params.append(relation_uri)
|
| 324 |
|
| 325 |
if end_node:
|
|
|
|
| 354 |
|
| 355 |
# ============================================================================
|
| 356 |
# 5. GRADIO UI HELPER FUNCTIONS
|
|
|
|
| 357 |
# ============================================================================
|
| 358 |
|
| 359 |
def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
|
|
|
|
| 377 |
cursor = conn.cursor()
|
| 378 |
|
| 379 |
progress(0.05, desc="📍 Finding nodes...")
|
|
|
|
| 380 |
cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
|
| 381 |
nodes = cursor.fetchall()
|
| 382 |
|
|
|
|
| 394 |
num_relations = len(RELATION_CHOICES)
|
| 395 |
|
| 396 |
# Use the FULL list of relations
|
| 397 |
+
for i, (rel_name, rel_uri) in enumerate(RELATION_CHOICES): # rel_uri is now a FULL URL
|
| 398 |
progress((i + 0.1) / num_relations, desc=f"🔎 {rel_name}...")
|
| 399 |
|
| 400 |
cursor.execute("""
|
| 401 |
SELECT en.label, e.weight
|
| 402 |
+
FROM edge e JOIN node en ON e.end_id = en.id
|
|
|
|
| 403 |
WHERE e.start_id LIKE ? AND e.rel_id = ?
|
| 404 |
+
ORDER BY e.weight DESC LIMIT 10
|
| 405 |
+
""", (like_path, rel_uri)) # <-- LOGIC FIX: rel_uri is now correct
|
|
|
|
| 406 |
outgoing = cursor.fetchall()
|
| 407 |
|
| 408 |
cursor.execute("""
|
| 409 |
SELECT s.label, e.weight
|
| 410 |
+
FROM edge e JOIN node s ON e.start_id = s.id
|
|
|
|
| 411 |
WHERE e.end_id LIKE ? AND e.rel_id = ?
|
| 412 |
+
ORDER BY e.weight DESC LIMIT 10
|
| 413 |
+
""", (like_path, rel_uri)) # <-- LOGIC FIX: rel_uri is now correct
|
|
|
|
| 414 |
incoming = cursor.fetchall()
|
| 415 |
|
| 416 |
if outgoing or incoming:
|
|
|
|
| 428 |
progress((i + 1) / num_relations, desc=f"✓ {rel_name}")
|
| 429 |
|
| 430 |
progress(1.0, desc="✅ Complete!")
|
|
|
|
| 431 |
output_md += f"---\n**Total relations found:** {total_found}\n"
|
| 432 |
log_progress(f"Complete: {total_found} relations", "SUCCESS")
|
| 433 |
|
|
|
|
| 448 |
|
| 449 |
query = """
|
| 450 |
SELECT
|
| 451 |
+
s.label as start_label, r.label as relation_label, en.label as end_label,
|
| 452 |
+
e.weight, s.id as start_id, r.id as relation_id, en.id as end_id, e.id as edge_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
FROM edge e
|
| 454 |
JOIN relation r ON e.rel_id = r.id
|
| 455 |
JOIN node s ON e.start_id = s.id
|
|
|
|
| 462 |
def build_node_pattern(node_str: str) -> str:
|
| 463 |
if node_str.strip().startswith(f'{CONCEPTNET_BASE_URI}/c/'):
|
| 464 |
return f"{node_str.strip()}%"
|
|
|
|
| 465 |
return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
|
| 466 |
|
| 467 |
with get_db_connection() as conn:
|
|
|
|
| 472 |
params.append(build_node_pattern(start_node))
|
| 473 |
|
| 474 |
if relation_uri and relation_uri.strip():
|
| 475 |
+
# LOGIC FIX: relation_uri is now the full URL from the dropdown
|
| 476 |
query += " AND r.id = ?"
|
| 477 |
params.append(relation_uri)
|
| 478 |
|
|
|
|
| 496 |
if df.empty:
|
| 497 |
return pd.DataFrame(), f"⚠️ No results found ({elapsed:.2f}s)"
|
| 498 |
|
|
|
|
| 499 |
cols_to_show = [
|
| 500 |
'start_label', 'relation_label', 'end_label', 'weight',
|
| 501 |
'start_id', 'relation_id', 'end_id'
|
|
|
|
| 535 |
md += f"**Database File:** `{DB_PATH}`\n\n"
|
| 536 |
else:
|
| 537 |
md += "**Database File:** `NOT LOADED`\n\n"
|
| 538 |
+
md += "⚠️ **Database not loaded.** Schema info may be incomplete. Check logs.\n"
|
| 539 |
return md
|
| 540 |
|
| 541 |
try:
|
|
|
|
| 552 |
md += f"- **{table}:** {count:,} rows\n"
|
| 553 |
|
| 554 |
md += "\n## Configured Relations\n\n"
|
| 555 |
+
md += "This list populates the 'Query Builder' dropdown. The values are the full URLs used in the DB.\n\n"
|
| 556 |
for name, uri in RELATION_CHOICES:
|
| 557 |
md += f"- **{name}:** `{uri}`\n"
|
| 558 |
|
|
|
|
| 568 |
def create_gradio_ui():
|
| 569 |
"""Builds the consolidated Gradio interface."""
|
| 570 |
|
| 571 |
+
# --- UI FIX: Add ssr=False to prevent rendering glitches like your screenshot ---
|
| 572 |
+
with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft(primary_hue="blue"), ssr=False) as demo:
|
| 573 |
gr.Markdown(
|
| 574 |
"# 🧠 ConceptNet Explorer\n"
|
| 575 |
"An interface for querying the ConceptNet semantic network."
|
|
|
|
| 585 |
gr.Markdown("Explore all semantic relations for a single word. This queries all 34 relation types.")
|
| 586 |
|
| 587 |
with gr.Row():
|
| 588 |
+
profile_word_input = gr.Textbox(
|
| 589 |
+
label="Word",
|
| 590 |
+
placeholder="e.g., dog",
|
| 591 |
+
value="dog",
|
| 592 |
+
info="Enter a word" # <-- UI POLISH: Added from your example
|
| 593 |
+
)
|
| 594 |
profile_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language")
|
| 595 |
|
| 596 |
+
profile_btn = gr.Button("🔍 Get Semantic Profile", variant="primary", size="lg")
|
| 597 |
profile_output = gr.Markdown(label="Profile Results")
|
| 598 |
|
| 599 |
gr.Examples(
|
|
|
|
| 614 |
placeholder="e.g., dog (word) or /c/en/dog (URI)"
|
| 615 |
)
|
| 616 |
|
|
|
|
| 617 |
query_rel_input = gr.Dropdown(
|
| 618 |
label="Relation",
|
| 619 |
+
choices=RELATION_CHOICES, # Uses (Label, Full_URL)
|
| 620 |
+
value=f"{CONCEPTNET_BASE_URI}/r/IsA" # <-- LOGIC FIX: Default value is full URL
|
| 621 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|