cstr commited on
Commit
63d785d
·
verified ·
1 Parent(s): 857ea4d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +559 -0
app.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sqlite3
3
+ import pandas as pd
4
+ from huggingface_hub import hf_hub_download
5
+ import os
6
+ import time
7
+ import json
8
+ from typing import Dict, List, Optional
9
+ from collections import defaultdict
10
+
11
+ # ===== CONFIGURATION =====
12
+ # 1. Point to the NEW normalized database
13
+ TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
14
+ NORMALIZED_REPO_ID = "cstr/conceptnet-normalized-multi"
15
+ NORMALIZED_DB_FILE = "conceptnet_normalized.db"
16
+
17
+ CONCEPTNET_BASE = "http://conceptnet.io"
18
+ # =========================
19
+
20
+ # --- All relations MUST be full URLs ---
21
+ # This dictionary is now our primary way to map names to relation IDs
22
+ CONCEPTNET_RELATIONS: Dict[str, str] = {
23
+ "RelatedTo": f"{CONCEPTNET_BASE}/r/RelatedTo",
24
+ "IsA": f"{CONCEPTNET_BASE}/r/IsA",
25
+ "PartOf": f"{CONCEPTNET_BASE}/r/PartOf",
26
+ "HasA": f"{CONCEPTNET_BASE}/r/HasA",
27
+ "UsedFor": f"{CONCEPTNET_BASE}/r/UsedFor",
28
+ "CapableOf": f"{CONCEPTNET_BASE}/r/CapableOf",
29
+ "AtLocation": f"{CONCEPTNET_BASE}/r/AtLocation",
30
+ "Causes": f"{CONCEPTNET_BASE}/r/Causes",
31
+ "HasSubevent": f"{CONCEPTNET_BASE}/r/HasSubevent",
32
+ "HasFirstSubevent": f"{CONCEPTNET_BASE}/r/HasFirstSubevent",
33
+ "HasLastSubevent": f"{CONCEPTNET_BASE}/r/HasLastSubevent",
34
+ "HasPrerequisite": f"{CONCEPTNET_BASE}/r/HasPrerequisite",
35
+ "HasProperty": f"{CONCEPTNET_BASE}/r/HasProperty",
36
+ "MotivatedByGoal": f"{CONCEPTNET_BASE}/r/MotivatedByGoal",
37
+ "ObstructedBy": f"{CONCEPTNET_BASE}/r/ObstructedBy",
38
+ "Desires": f"{CONCEPTNET_BASE}/r/Desires",
39
+ "CreatedBy": f"{CONCEPTNET_BASE}/r/CreatedBy",
40
+ "Synonym": f"{CONCEPTNET_BASE}/r/Synonym",
41
+ "Antonym": f"{CONCEPTNET_BASE}/r/Antonym",
42
+ "DistinctFrom": f"{CONCEPTNET_BASE}/r/DistinctFrom",
43
+ "DerivedFrom": f"{CONCEPTNET_BASE}/r/DerivedFrom",
44
+ "SymbolOf": f"{CONCEPTNET_BASE}/r/SymbolOf",
45
+ "DefinedAs": f"{CONCEPTNET_BASE}/r/DefinedAs",
46
+ "MannerOf": f"{CONCEPTNET_BASE}/r/MannerOf",
47
+ "LocatedNear": f"{CONCEPTNET_BASE}/r/LocatedNear",
48
+ "HasContext": f"{CONCEPTNET_BASE}/r/HasContext",
49
+ "SimilarTo": f"{CONCEPTNET_BASE}/r/SimilarTo",
50
+ "EtymologicallyRelatedTo": f"{CONCEPTNET_BASE}/r/EtymologicallyRelatedTo",
51
+ "EtymologicallyDerivedFrom": f"{CONCEPTNET_BASE}/r/EtymologicallyDerivedFrom",
52
+ "CausesDesire": f"{CONCEPTNET_BASE}/r/CausesDesire",
53
+ "MadeOf": f"{CONCEPTNET_BASE}/r/MadeOf",
54
+ "ReceivesAction": f"{CONCEPTNET_BASE}/r/ReceivesAction",
55
+ "ExternalURL": f"{CONCEPTNET_BASE}/r/ExternalURL",
56
+ "NotDesires": f"{CONCEPTNET_BASE}/r/NotDesires",
57
+ "NotUsedFor": f"{CONCEPTNET_BASE}/r/NotUsedFor",
58
+ "NotCapableOf": f"{CONCEPTNET_BASE}/r/NotCapableOf",
59
+ "NotHasProperty": f"{CONCEPTNET_BASE}/r/NotHasProperty",
60
+ }
61
+ # =========================
62
+
63
+ print(f"🌍 Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
64
+ print(f"📚 Relations: {len(CONCEPTNET_RELATIONS)} relations loaded")
65
+
66
+ def log_progress(message, level="INFO"):
67
+ """Simple logger with timestamp and emoji prefix."""
68
+ timestamp = time.strftime("%H:%M:%S")
69
+ prefix = {"INFO": "ℹ️ ", "SUCCESS": "✅", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "🔍"}.get(level, "")
70
+ print(f"[{timestamp}] {prefix} {message}")
71
+
72
+ def download_normalized_database():
73
+ """Download the NEW normalized database from HF Hub."""
74
+ log_progress(f"Downloading/Verifying {NORMALIZED_DB_FILE}...", "INFO")
75
+ try:
76
+ # This will download or use cache
77
+ return hf_hub_download(
78
+ repo_id=NORMALIZED_REPO_ID,
79
+ filename=NORMALIZED_DB_FILE,
80
+ repo_type="dataset"
81
+ )
82
+ except Exception as e:
83
+ log_progress(f"Failed to download DB: {e}", "ERROR")
84
+ return None
85
+
86
+ DB_PATH = download_normalized_database()
87
+
88
+ if not DB_PATH:
89
+ log_progress("DATABASE NOT FOUND. App will not function.", "ERROR")
90
+ else:
91
+ log_progress(f"Database loaded from: {DB_PATH}", "SUCCESS")
92
+
93
+ def get_db_connection():
94
+ """Get a thread-safe, read-only connection to the SQLite database."""
95
+ if not DB_PATH:
96
+ raise Exception("Database path is not set. Cannot create connection.")
97
+ # Connect in read-only mode
98
+ db_uri = f"file:{DB_PATH}?mode=ro"
99
+ conn = sqlite3.connect(db_uri, uri=True, check_same_thread=False)
100
+ conn.execute("PRAGMA cache_size = -256000") # 256MB cache
101
+ conn.execute("PRAGMA temp_store = MEMORY")
102
+ return conn
103
+
104
+ def node_url_to_label(url: str) -> str:
105
+ """Helper to turn '.../c/en/dog_house' into 'dog house'."""
106
+ try:
107
+ return url.split('/')[-1].replace('_', ' ')
108
+ except:
109
+ return url
110
+
111
+ def get_semantic_profile(word: str, lang: str = 'en', selected_relations: List[str] = None, progress=gr.Progress()):
112
+ """
113
+ --- REWRITTEN FOR NORMALIZED DB ---
114
+ Get semantic profile for a word.
115
+ This function is now extremely fast, running 4 queries total instead of 2N.
116
+ """
117
+ log_progress(f"Profile: {word} ({lang})", "INFO")
118
+
119
+ if not word or lang not in TARGET_LANGUAGES:
120
+ yield "⚠️ Invalid input"
121
+ return
122
+
123
+ if not DB_PATH:
124
+ yield "❌ **Error:** Database file not found."
125
+ return
126
+
127
+ # Set default relations if none are selected
128
+ if not selected_relations:
129
+ selected_relations = [
130
+ "IsA", "RelatedTo", "PartOf", "HasA", "UsedFor",
131
+ "CapableOf", "Synonym", "Antonym"
132
+ ]
133
+
134
+ word = word.strip().lower().replace(' ', '_')
135
+ exact_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}"
136
+
137
+ output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
138
+
139
+ try:
140
+ with get_db_connection() as conn:
141
+ cursor = conn.cursor()
142
+ progress(0, desc="Starting...")
143
+ yield output_md
144
+
145
+ # === STEP 1: Find Node PKs ===
146
+ progress(0.05, desc="Finding nodes...")
147
+
148
+ cursor.execute("SELECT node_pk, node_url FROM node_norm WHERE node_url = ?", (exact_path,))
149
+ exact_node = cursor.fetchone()
150
+
151
+ node_pks = []
152
+ nodes_found = []
153
+
154
+ if exact_node:
155
+ log_progress(f"Found exact node: {exact_node[1]}", "SUCCESS")
156
+ node_pks = [exact_node[0]]
157
+ nodes_found = [(exact_node[1], node_url_to_label(exact_node[1]))]
158
+ else:
159
+ log_progress(f"No exact node, falling back to LIKE...", "WARN")
160
+ like_path = f"{exact_path}%"
161
+ cursor.execute("SELECT node_pk, node_url FROM node_norm WHERE node_url LIKE ? LIMIT 5", (like_path,))
162
+ nodes = cursor.fetchall()
163
+ if not nodes:
164
+ yield f"# 🧠 '{word}'\n\n⚠️ Not found"
165
+ return
166
+ node_pks = [n[0] for n in nodes]
167
+ nodes_found = [(n[1], node_url_to_label(n[1])) for n in nodes]
168
+
169
+ for node_url, label in nodes_found[:3]:
170
+ output_md += f"**Node:** `{node_url}` → **{label}**\n"
171
+ output_md += "\n"
172
+ yield output_md
173
+
174
+ # === STEP 2: Find Relation PKs ===
175
+ progress(0.15, desc="Finding relations...")
176
+
177
+ rel_urls_to_query = tuple(CONCEPTNET_RELATIONS[name] for name in selected_relations if name in CONCEPTNET_RELATIONS)
178
+ if not rel_urls_to_query:
179
+ output_md += "⚠️ No valid relations selected."
180
+ yield output_md
181
+ return
182
+
183
+ rel_placeholders = ','.join(['?'] * len(rel_urls_to_query))
184
+ cursor.execute(f"SELECT rel_pk, rel_url FROM rel_norm WHERE rel_url IN ({rel_placeholders})", rel_urls_to_query)
185
+
186
+ # Create lookup maps
187
+ rel_pk_to_name = {}
188
+ rel_name_to_pk = {}
189
+ rel_name_to_url = {}
190
+ for pk, url in cursor.fetchall():
191
+ # Find the 'short name' (e.g., 'IsA') from the full URL
192
+ for name, url_val in CONCEPTNET_RELATIONS.items():
193
+ if url_val == url:
194
+ rel_pk_to_name[pk] = name
195
+ rel_name_to_pk[name] = pk
196
+ rel_name_to_url[name] = url
197
+ break
198
+
199
+ rel_pks_to_query = tuple(rel_pk_to_name.keys())
200
+ node_pk_placeholders = ','.join(['?'] * len(node_pks))
201
+ rel_pk_placeholders = ','.join(['?'] * len(rel_pks_to_query))
202
+
203
+ # Buckets for results
204
+ outgoing_results = defaultdict(list)
205
+ incoming_results = defaultdict(list)
206
+
207
+ # === STEP 3: Run ONE query for ALL outgoing edges ===
208
+ progress(0.4, desc="Querying outgoing edges...")
209
+ sql_out = f"""
210
+ SELECT
211
+ e.rel_fk, n_end.node_url, e.weight
212
+ FROM edge_norm e
213
+ JOIN node_norm n_end ON e.end_fk = n_end.node_pk
214
+ WHERE
215
+ e.start_fk IN ({node_pk_placeholders})
216
+ AND e.rel_fk IN ({rel_pk_placeholders})
217
+ ORDER BY e.weight DESC
218
+ LIMIT 200
219
+ """
220
+ cursor.execute(sql_out, (*node_pks, *rel_pks_to_query))
221
+
222
+ for rel_pk, node_url, weight in cursor.fetchall():
223
+ rel_name = rel_pk_to_name.get(rel_pk)
224
+ if rel_name and len(outgoing_results[rel_name]) < 7:
225
+ outgoing_results[rel_name].append((node_url_to_label(node_url), weight))
226
+
227
+ # === STEP 4: Run ONE query for ALL incoming edges ===
228
+ progress(0.7, desc="Querying incoming edges...")
229
+ sql_in = f"""
230
+ SELECT
231
+ e.rel_fk, n_start.node_url, e.weight
232
+ FROM edge_norm e
233
+ JOIN node_norm n_start ON e.start_fk = n_start.node_pk
234
+ WHERE
235
+ e.end_fk IN ({node_pk_placeholders})
236
+ AND e.rel_fk IN ({rel_pk_placeholders})
237
+ ORDER BY e.weight DESC
238
+ LIMIT 200
239
+ """
240
+ cursor.execute(sql_in, (*node_pks, *rel_pks_to_query))
241
+
242
+ for rel_pk, node_url, weight in cursor.fetchall():
243
+ rel_name = rel_pk_to_name.get(rel_pk)
244
+ if rel_name and len(incoming_results[rel_name]) < 7:
245
+ incoming_results[rel_name].append((node_url_to_label(node_url), weight))
246
+
247
+ # === STEP 5: Format results as Markdown ===
248
+ progress(0.9, desc="Formatting results...")
249
+ total = 0
250
+ for rel_name in selected_relations:
251
+ if rel_name not in rel_name_to_pk:
252
+ continue # Skip if this relation wasn't in the DB
253
+
254
+ output_md += f"## {rel_name}\n\n"
255
+ found = False
256
+
257
+ out_edges = outgoing_results.get(rel_name, [])
258
+ for label, weight in out_edges:
259
+ output_md += f"- **{word}** {rel_name} → *{label}* `[{weight:.3f}]`\n"
260
+ found = True
261
+ total += 1
262
+
263
+ in_edges = incoming_results.get(rel_name, [])
264
+ for label, weight in in_edges:
265
+ output_md += f"- *{label}* {rel_name} → **{word}** `[{weight:.3f}]`\n"
266
+ found = True
267
+ total += 1
268
+
269
+ if not found:
270
+ output_md += "*No results*\n"
271
+
272
+ output_md += "\n"
273
+ yield output_md # Yield after each relation is formatted
274
+
275
+ output_md += f"---\n**Total relations:** {total}\n"
276
+ log_progress(f"Profile complete: {total} relations", "SUCCESS")
277
+ progress(1.0, desc="✅ Complete!")
278
+ yield output_md
279
+
280
+ except Exception as e:
281
+ log_progress(f"Error: {e}", "ERROR")
282
+ import traceback
283
+ traceback.print_exc()
284
+ yield f"**❌ Error:** {e}"
285
+
286
+ def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
287
+ """
288
+ --- REWRITTEN FOR NORMALIZED DB ---
289
+ Query builder using fast integer joins.
290
+ """
291
+ log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
292
+ progress(0, desc="Building...")
293
+
294
+ if not DB_PATH:
295
+ return pd.DataFrame(), "❌ **Error:** Database file not found."
296
+
297
+ # This is the new, fast query
298
+ query = """
299
+ SELECT
300
+ n_start.node_url AS start_url,
301
+ r.rel_url AS relation_url,
302
+ n_end.node_url AS end_url,
303
+ e.weight
304
+ FROM edge_norm e
305
+ JOIN node_norm n_start ON e.start_fk = n_start.node_pk
306
+ JOIN node_norm n_end ON e.end_fk = n_end.node_pk
307
+ JOIN rel_norm r ON e.rel_fk = r.rel_pk
308
+ """
309
+
310
+ params = []
311
+ where_clauses = []
312
+
313
+ try:
314
+ with get_db_connection() as conn:
315
+ progress(0.3, desc="Adding filters...")
316
+
317
+ # Start node
318
+ if start_node and start_node.strip():
319
+ if start_node.startswith('http://'):
320
+ pattern = f"{start_node}%"
321
+ else:
322
+ pattern = f"{CONCEPTNET_BASE}/c/en/{start_node.strip().lower().replace(' ', '_')}%"
323
+ where_clauses.append("n_start.node_url LIKE ?")
324
+ params.append(pattern)
325
+
326
+ # Relation
327
+ if relation and relation.strip():
328
+ rel_value = CONCEPTNET_RELATIONS.get(relation.strip())
329
+ if rel_value:
330
+ where_clauses.append("r.rel_url = ?")
331
+ params.append(rel_value)
332
+
333
+ # End node
334
+ if end_node and end_node.strip():
335
+ if end_node.startswith('http://'):
336
+ pattern = f"{end_node}%"
337
+ else:
338
+ pattern = f"{CONCEPTNET_BASE}/c/en/{end_node.strip().lower().replace(' ', '_')}%"
339
+ where_clauses.append("n_end.node_url LIKE ?")
340
+ params.append(pattern)
341
+
342
+ if where_clauses:
343
+ query += " WHERE " + " AND ".join(where_clauses)
344
+
345
+ query += " ORDER BY e.weight DESC LIMIT ?"
346
+ params.append(limit)
347
+
348
+ progress(0.6, desc="Executing...")
349
+
350
+ start_time = time.time()
351
+ df = pd.read_sql_query(query, conn, params=params)
352
+ elapsed = time.time() - start_time
353
+
354
+ log_progress(f"Query done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS")
355
+ progress(1.0, desc="Done!")
356
+
357
+ if df.empty:
358
+ return pd.DataFrame(), f"⚠️ No results ({elapsed:.2f}s)"
359
+
360
+ # Add user-friendly labels from the URLs
361
+ df['start_label'] = df['start_url'].apply(node_url_to_label)
362
+ df['end_label'] = df['end_url'].apply(node_url_to_label)
363
+ df['relation'] = df['relation_url'].apply(lambda x: x.split('/')[-1])
364
+
365
+ # Reorder columns
366
+ df = df[['start_label', 'relation', 'end_label', 'weight', 'start_url', 'end_url', 'relation_url']]
367
+
368
+ return df, f"✅ {len(df)} results in {elapsed:.2f}s"
369
+
370
+ except Exception as e:
371
+ log_progress(f"Error: {e}", "ERROR")
372
+ import traceback
373
+ traceback.print_exc()
374
+ return pd.DataFrame(), f"❌ {e}"
375
+
376
+ def run_raw_query(sql_query):
377
+ """Execute a raw SELECT SQL query against the normalized DB."""
378
+ if not sql_query.strip().upper().startswith("SELECT"):
379
+ return pd.DataFrame(), "❌ Only SELECT queries are allowed."
380
+
381
+ if not DB_PATH:
382
+ return pd.DataFrame(), "❌ **Error:** Database file not found."
383
+
384
+ try:
385
+ with get_db_connection() as conn:
386
+ start = time.time()
387
+ df = pd.read_sql_query(sql_query, conn)
388
+ elapsed = time.time() - start
389
+ return df, f"✅ {len(df)} rows in {elapsed:.3f}s"
390
+ except Exception as e:
391
+ return pd.DataFrame(), f"❌ {e}"
392
+
393
+ def get_schema_info():
394
+ """
395
+ --- REWRITTEN FOR NORMALIZED DB ---
396
+ Get schema information for the new database.
397
+ """
398
+ if not DB_PATH:
399
+ return "❌ **Error:** Database file not found."
400
+
401
+ md = f"# 📚 Schema (Normalized)\n\n"
402
+ md += f"**Repo:** [{NORMALIZED_REPO_ID}](https://huggingface.co/datasets/{NORMALIZED_REPO_ID})\n\n"
403
+ md += "**Schema:** Text URLs (`node_norm`, `rel_norm`) are stored once. The `edge_norm` table uses fast integer keys (`_fk`) for joins.\n\n"
404
+
405
+ try:
406
+ with get_db_connection() as conn:
407
+ cursor = conn.cursor()
408
+
409
+ md += "## Tables & Row Counts\n\n"
410
+ # Use the new table names
411
+ for table in ["node_norm", "rel_norm", "edge_norm"]:
412
+ cursor.execute(f"SELECT COUNT(*) FROM {table}")
413
+ md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
414
+
415
+ md += "\n## Indices\n\n"
416
+ cursor.execute("SELECT name, sql FROM sqlite_master WHERE type='index' AND sql IS NOT NULL")
417
+ for name, sql in cursor.fetchall():
418
+ md += f"- **{name}:** `{sql}`\n"
419
+
420
+ md += "\n## Common Relations (from `rel_norm`)\n\n"
421
+ # Query the new relation table
422
+ cursor.execute("SELECT rel_url FROM rel_norm ORDER BY rel_url LIMIT 20")
423
+ for (rel_url,) in cursor.fetchall():
424
+ label = rel_url.split('/')[-1]
425
+ md += f"- **{label}:** `{rel_url}`\n"
426
+
427
+ except Exception as e:
428
+ md += f"\n**❌ Error:** {e}\n"
429
+
430
+ return md
431
+
432
+ # ===== Build Gradio UI (Mostly Unchanged) =====
433
+ with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
434
+ gr.Markdown("# 🧠 ConceptNet Explorer (Normalized v2)")
435
+ gr.Markdown(f"**Repo:** `{NORMALIZED_REPO_ID}` | **Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
436
+
437
+ if not DB_PATH:
438
+ gr.Markdown("## ❌ ERROR: DATABASE FILE NOT FOUND")
439
+ gr.Markdown(f"This app cannot start because `{NORMALIZED_DB_FILE}` could not be downloaded from `{NORMALIZED_REPO_ID}`. Please check the logs.")
440
+
441
+ else:
442
+ with gr.Tabs():
443
+ with gr.TabItem("🔍 Semantic Profile"):
444
+ gr.Markdown("**Explore semantic relations for any word. Runs on the fast normalized DB.**")
445
+
446
+ with gr.Row():
447
+ word_input = gr.Textbox(label="Word", placeholder="e.g., dog, hund, perro", value="dog", scale=3)
448
+ lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language", scale=1)
449
+
450
+ with gr.Accordion("Select Relations (fewer = faster)", open=False):
451
+ relation_input = gr.CheckboxGroup(
452
+ choices=list(CONCEPTNET_RELATIONS.keys()),
453
+ label="Relations to Query",
454
+ value=["IsA", "RelatedTo", "PartOf", "HasA", "UsedFor", "CapableOf", "Synonym", "Antonym", "AtLocation", "HasProperty"]
455
+ )
456
+
457
+ semantic_btn = gr.Button("🔍 Get Semantic Profile", variant="primary", size="lg")
458
+ semantic_output = gr.Markdown(value="Click the button to get the semantic profile.")
459
+
460
+ gr.Examples(
461
+ examples=[["dog", "en"], ["hund", "de"], ["perro", "es"], ["chat", "fr"], ["knowledge", "en"]],
462
+ inputs=[word_input, lang_input],
463
+ label="Examples"
464
+ )
465
+
466
+ with gr.TabItem("⚡ Query Builder"):
467
+ gr.Markdown("**Build custom relationship queries (now using fast integer joins).**")
468
+
469
+ with gr.Row():
470
+ start_input = gr.Textbox(label="Start Node (word)", placeholder="dog (optional)")
471
+ rel_input = gr.Dropdown(
472
+ choices=[""] + list(CONCEPTNET_RELATIONS.keys()),
473
+ label="Relation (name)",
474
+ value="IsA",
475
+ info="Leave blank to query all relations"
476
+ )
477
+ end_input = gr.Textbox(label="End Node (word)", placeholder="(optional)")
478
+
479
+ limit_slider = gr.Slider(label="Limit", minimum=1, maximum=500, value=50, step=1)
480
+ query_btn = gr.Button("▶️ Run Query", variant="primary", size="lg")
481
+
482
+ status_output = gr.Markdown()
483
+ results_output = gr.DataFrame(wrap=True) # Height bug is still fixed
484
+
485
+ with gr.TabItem("💻 Raw SQL"):
486
+ gr.Markdown("**Execute custom `SELECT` SQL queries against the *new normalized schema*.**")
487
+
488
+ # --- UPDATED Example Query ---
489
+ new_example_sql = f"""SELECT
490
+ n_start.node_url,
491
+ r.rel_url,
492
+ n_end.node_url,
493
+ e.weight
494
+ FROM edge_norm e
495
+ JOIN node_norm n_start ON e.start_fk = n_start.node_pk
496
+ JOIN node_norm n_end ON e.end_fk = n_end.node_pk
497
+ JOIN rel_norm r ON e.rel_fk = r.rel_pk
498
+ WHERE n_start.node_url = '{CONCEPTNET_BASE}/c/en/dog'
499
+ AND r.rel_url = '{CONCEPTNET_BASE}/r/IsA'
500
+ ORDER BY e.weight DESC
501
+ LIMIT 10
502
+ """
503
+ raw_sql_input = gr.Textbox(
504
+ label="SQL Query",
505
+ value=new_example_sql,
506
+ lines=13,
507
+ elem_classes=["font-mono"]
508
+ )
509
+
510
+ raw_btn = gr.Button("▶️ Execute")
511
+ raw_status = gr.Markdown()
512
+ raw_results = gr.DataFrame() # Height bug is still fixed
513
+
514
+ with gr.TabItem("📊 Schema"):
515
+ gr.Markdown("**View database schema, tables, and indices for the *new normalized DB*.**")
516
+ schema_btn = gr.Button("📊 Load Schema Info")
517
+ schema_output = gr.Markdown()
518
+
519
+ # --- Button Click Handlers (All API names preserved) ---
520
+ semantic_btn.click(
521
+ get_semantic_profile,
522
+ inputs=[word_input, lang_input, relation_input],
523
+ outputs=semantic_output,
524
+ api_name="get_semantic_profile"
525
+ )
526
+
527
+ query_btn.click(
528
+ run_query,
529
+ inputs=[start_input, rel_input, end_input, limit_slider],
530
+ outputs=[results_output, status_output],
531
+ api_name="run_query"
532
+ )
533
+
534
+ raw_btn.click(
535
+ run_raw_query,
536
+ inputs=raw_sql_input,
537
+ outputs=[raw_results, raw_status],
538
+ api_name="run_raw_query"
539
+ )
540
+
541
+ demo.load(
542
+ get_schema_info,
543
+ None,
544
+ schema_output,
545
+ api_name="get_schema"
546
+ )
547
+ schema_btn.click(
548
+ get_schema_info,
549
+ None,
550
+ schema_output,
551
+ api_name="get_schema"
552
+ )
553
+
554
+ if __name__ == "__main__":
555
+ if DB_PATH:
556
+ log_progress("APP READY! (Normalized DB)", "SUCCESS")
557
+ else:
558
+ log_progress("APP LAUNCHING WITH ERRORS (DB NOT FOUND)", "ERROR")
559
+ demo.launch(ssr_mode=False)