cstr commited on
Commit
55c07f0
Β·
verified Β·
1 Parent(s): 2c4c1d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +562 -326
app.py CHANGED
@@ -1,107 +1,254 @@
 
 
 
1
  import gradio as gr
2
  import sqlite3
3
  import pandas as pd
4
  from huggingface_hub import hf_hub_download, HfApi
5
- from fastapi import FastAPI
6
  from fastapi.responses import JSONResponse
7
  import os
8
  import time
9
  import json
10
- from typing import Optional
 
 
11
 
12
- # ===== CONFIGURATION =====
 
 
 
 
13
  TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
 
 
 
14
  INDEXED_REPO_ID = "cstr/conceptnet-de-indexed"
15
  INDEXED_DB_FILENAME = "conceptnet-de-indexed.db"
16
  PROGRESS_FILENAME = "indexing_progress.json"
17
- CONCEPTNET_BASE = "http://conceptnet.io"
18
- # =========================
19
-
20
- print(f"🌍 Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
21
 
 
22
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def log_progress(message, level="INFO"):
 
25
  timestamp = time.strftime("%H:%M:%S")
26
  prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
27
  print(f"[{timestamp}] {prefix} {message}")
28
 
29
  def check_remote_progress():
 
30
  if not HF_TOKEN:
31
- return {"indexing_complete": False}
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  try:
33
  api = HfApi()
34
  api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN)
35
- progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN)
 
 
 
 
 
36
  with open(progress_path, 'r') as f:
37
  return json.load(f)
38
- except:
 
39
  return {"indexing_complete": False}
40
 
41
- def create_indexed_database():
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  progress = check_remote_progress()
43
  if progress.get("indexing_complete"):
44
  try:
45
- path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
46
- log_progress("Indexed DB loaded", "SUCCESS")
 
 
 
 
 
 
 
47
  return path
48
- except:
49
- pass
50
- return None
51
-
52
- DB_PATH = create_indexed_database()
 
53
 
54
- def get_db_connection():
55
- conn = sqlite3.connect(DB_PATH, check_same_thread=False)
56
- conn.execute("PRAGMA cache_size = -256000")
57
- conn.execute("PRAGMA mmap_size = 4294967296")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  return conn
59
 
60
- RELATIONS = [
61
- ("IsA", f"{CONCEPTNET_BASE}/r/IsA"),
62
- ("PartOf", f"{CONCEPTNET_BASE}/r/PartOf"),
63
- ("HasA", f"{CONCEPTNET_BASE}/r/HasA"),
64
- ("UsedFor", f"{CONCEPTNET_BASE}/r/UsedFor"),
65
- ("CapableOf", f"{CONCEPTNET_BASE}/r/CapableOf"),
66
- ("Causes", f"{CONCEPTNET_BASE}/r/Causes"),
67
- ("HasProperty", f"{CONCEPTNET_BASE}/r/HasProperty"),
68
- ("Synonym", f"{CONCEPTNET_BASE}/r/Synonym"),
69
- ("Antonym", f"{CONCEPTNET_BASE}/r/Antonym"),
70
- ("AtLocation", f"{CONCEPTNET_BASE}/r/AtLocation"),
71
- ("RelatedTo", f"{CONCEPTNET_BASE}/r/RelatedTo"),
72
- ("DerivedFrom", f"{CONCEPTNET_BASE}/r/DerivedFrom"),
73
- ]
74
-
75
- def get_semantic_profile_json(word: str, lang: str = 'en', max_per_relation: int = 10):
76
- """Get semantic profile as JSON"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if not word or lang not in TARGET_LANGUAGES:
78
- return {"error": "Invalid input"}
 
 
 
79
 
80
- word = word.strip().lower().replace(' ', '_')
81
- like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
82
 
83
  result = {
84
- "word": word,
85
- "language": lang,
86
- "nodes": [],
87
  "relations": {},
88
- "total_edges": 0
89
  }
90
 
91
  try:
92
  with get_db_connection() as conn:
93
  cursor = conn.cursor()
94
 
 
95
  cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
96
- result["nodes"] = [{"id": nid, "label": label} for nid, label in cursor.fetchall()]
97
 
98
- if not result["nodes"]:
99
- return {"error": "Word not found"}
 
 
 
100
 
101
- for rel_name, rel_url in RELATIONS:
 
102
  outgoing = []
103
  incoming = []
104
 
 
105
  cursor.execute("""
106
  SELECT en.label, e.weight, en.id
107
  FROM edge e
@@ -109,11 +256,12 @@ def get_semantic_profile_json(word: str, lang: str = 'en', max_per_relation: int
109
  WHERE e.start_id LIKE ? AND e.rel_id = ?
110
  ORDER BY e.weight DESC
111
  LIMIT ?
112
- """, (like_path, rel_url, max_per_relation))
113
 
114
- outgoing = [{"target": label, "weight": weight, "target_id": eid}
115
  for label, weight, eid in cursor.fetchall()]
116
 
 
117
  cursor.execute("""
118
  SELECT s.label, e.weight, s.id
119
  FROM edge e
@@ -121,35 +269,122 @@ def get_semantic_profile_json(word: str, lang: str = 'en', max_per_relation: int
121
  WHERE e.end_id LIKE ? AND e.rel_id = ?
122
  ORDER BY e.weight DESC
123
  LIMIT ?
124
- """, (like_path, rel_url, max_per_relation))
125
 
126
- incoming = [{"source": label, "weight": weight, "source_id": sid}
127
  for label, weight, sid in cursor.fetchall()]
128
 
129
  if outgoing or incoming:
130
  result["relations"][rel_name] = {
 
131
  "outgoing": outgoing,
132
  "incoming": incoming,
133
  "count": len(outgoing) + len(incoming)
134
  }
135
- result["total_edges"] += len(outgoing) + len(incoming)
136
 
137
- return result
138
 
139
  except Exception as e:
140
- return {"error": str(e)}
 
 
141
 
142
- def get_semantic_profile(word, lang='en', progress=gr.Progress()):
143
- """Get semantic profile with progress"""
144
- log_progress(f"Profile: {word} ({lang})", "INFO")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  if not word or lang not in TARGET_LANGUAGES:
147
- return "⚠️ Invalid input"
148
 
149
  progress(0, desc="πŸ” Starting...")
150
 
151
- word = word.strip().lower().replace(' ', '_')
152
- like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
153
 
154
  output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
155
 
@@ -163,24 +398,22 @@ def get_semantic_profile(word, lang='en', progress=gr.Progress()):
163
  nodes = cursor.fetchall()
164
 
165
  if not nodes:
166
- return f"# 🧠 '{word}'\n\n⚠️ Not found"
167
 
168
  log_progress(f"Found {len(nodes)} nodes", "SUCCESS")
169
 
 
170
  for node_id, label in nodes[:3]:
171
- output_md += f"**Node:** `{node_id}` β†’ **{label}**\n"
172
- output_md += "\n"
173
 
174
- total = 0
175
- num_relations = len(RELATIONS)
176
 
177
- for i, (rel_name, rel_url) in enumerate(RELATIONS):
 
178
  progress((i + 0.1) / num_relations, desc=f"πŸ”Ž {rel_name}...")
179
 
180
- output_md += f"## {rel_name}\n\n"
181
- found = False
182
-
183
- start_time = time.time()
184
  cursor.execute("""
185
  SELECT en.label, e.weight
186
  FROM edge e
@@ -188,17 +421,8 @@ def get_semantic_profile(word, lang='en', progress=gr.Progress()):
188
  WHERE e.start_id LIKE ? AND e.rel_id = ?
189
  ORDER BY e.weight DESC
190
  LIMIT 10
191
- """, (like_path, rel_url))
192
-
193
- results = cursor.fetchall()
194
- elapsed = time.time() - start_time
195
-
196
- log_progress(f" {rel_name} out: {len(results)} in {elapsed:.3f}s", "DEBUG")
197
-
198
- for label, weight in results:
199
- output_md += f"- **{word}** {rel_name} β†’ *{label}* `[{weight:.3f}]`\n"
200
- found = True
201
- total += 1
202
 
203
  cursor.execute("""
204
  SELECT s.label, e.weight
@@ -207,149 +431,87 @@ def get_semantic_profile(word, lang='en', progress=gr.Progress()):
207
  WHERE e.end_id LIKE ? AND e.rel_id = ?
208
  ORDER BY e.weight DESC
209
  LIMIT 10
210
- """, (like_path, rel_url))
211
-
212
- results = cursor.fetchall()
213
-
214
- for label, weight in results:
215
- output_md += f"- *{label}* {rel_name} β†’ **{word}** `[{weight:.3f}]`\n"
216
- found = True
217
- total += 1
218
-
219
- if not found:
220
- output_md += "*No results*\n"
 
 
 
221
 
222
- output_md += "\n"
223
  progress((i + 1) / num_relations, desc=f"βœ“ {rel_name}")
224
 
225
  progress(1.0, desc="βœ… Complete!")
226
 
227
- output_md += f"---\n**Total relations:** {total}\n"
228
- log_progress(f"Complete: {total} relations", "SUCCESS")
229
 
230
  return output_md
231
 
232
  except Exception as e:
233
- log_progress(f"Error: {e}", "ERROR")
234
- import traceback
235
  traceback.print_exc()
236
- return f"**❌ Error:** {e}"
237
 
238
- def query_edges_json(start_node: Optional[str] = None,
239
- relation: Optional[str] = None,
240
- end_node: Optional[str] = None,
241
- limit: int = 50):
242
- """Query edges JSON"""
243
- query = """
244
- SELECT
245
- e.id, s.id, r.label, en.id, e.weight, s.label, en.label
246
- FROM edge e
247
- JOIN relation r ON e.rel_id = r.id
248
- JOIN node s ON e.start_id = s.id
249
- JOIN node en ON e.end_id = en.id
250
- WHERE 1=1
251
  """
 
 
 
252
 
253
- params = []
254
-
255
- try:
256
- with get_db_connection() as conn:
257
- if start_node:
258
- if start_node.startswith('http://'):
259
- pattern = f"{start_node}%"
260
- else:
261
- pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
262
- query += " AND s.id LIKE ?"
263
- params.append(pattern)
264
-
265
- if relation:
266
- if relation.startswith('http://'):
267
- rel_value = relation
268
- elif relation.startswith('/r/'):
269
- rel_value = f"{CONCEPTNET_BASE}{relation}"
270
- else:
271
- rel_value = f"{CONCEPTNET_BASE}/r/{relation}"
272
- query += " AND r.id = ?"
273
- params.append(rel_value)
274
-
275
- if end_node:
276
- if end_node.startswith('http://'):
277
- pattern = f"{end_node}%"
278
- else:
279
- pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
280
- query += " AND en.id LIKE ?"
281
- params.append(pattern)
282
-
283
- query += " ORDER BY e.weight DESC LIMIT ?"
284
- params.append(limit)
285
-
286
- df = pd.read_sql_query(query, conn, params=params)
287
-
288
- if df.empty:
289
- return {"results": [], "count": 0}
290
-
291
- df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label']
292
-
293
- return {
294
- "results": df.to_dict(orient='records'),
295
- "count": len(df)
296
- }
297
-
298
- except Exception as e:
299
- return {"error": str(e)}
300
-
301
- def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
302
- """Query builder"""
303
- log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
304
-
305
- progress(0, desc="πŸ” Building...")
306
 
307
  query = """
308
  SELECT
309
- e.id, s.id, r.label, en.id, e.weight, s.label, en.label
 
 
 
 
 
 
 
310
  FROM edge e
311
  JOIN relation r ON e.rel_id = r.id
312
  JOIN node s ON e.start_id = s.id
313
  JOIN node en ON e.end_id = en.id
314
  WHERE 1=1
315
  """
316
-
317
- params = []
318
 
319
  try:
 
 
 
 
 
 
320
  with get_db_connection() as conn:
321
- progress(0.3, desc="πŸ“ Filters...")
322
 
323
  if start_node and start_node.strip():
324
- if start_node.startswith('http://'):
325
- pattern = f"{start_node}%"
326
- else:
327
- pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
328
  query += " AND s.id LIKE ?"
329
- params.append(pattern)
330
-
331
- if relation and relation.strip():
332
- if relation.startswith('http://'):
333
- rel_value = relation
334
- elif relation.startswith('/r/'):
335
- rel_value = f"{CONCEPTNET_BASE}{relation}"
336
- else:
337
- rel_value = f"{CONCEPTNET_BASE}/r/{relation}"
338
  query += " AND r.id = ?"
339
- params.append(rel_value)
340
 
341
  if end_node and end_node.strip():
342
- if end_node.startswith('http://'):
343
- pattern = f"{end_node}%"
344
- else:
345
- pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
346
  query += " AND en.id LIKE ?"
347
- params.append(pattern)
348
 
349
  query += " ORDER BY e.weight DESC LIMIT ?"
350
  params.append(limit)
351
 
352
- progress(0.6, desc="⚑ Running...")
353
 
354
  start_time = time.time()
355
  df = pd.read_sql_query(query, conn, params=params)
@@ -357,23 +519,31 @@ def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
357
 
358
  progress(1.0, desc="βœ… Done!")
359
 
360
- log_progress(f"Done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS")
361
 
362
  if df.empty:
363
- return pd.DataFrame(), f"⚠️ No results ({elapsed:.2f}s)"
 
 
 
 
 
 
 
364
 
365
- df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label']
366
  return df, f"βœ… {len(df)} results in {elapsed:.2f}s"
367
 
368
  except Exception as e:
369
- log_progress(f"Error: {e}", "ERROR")
370
- import traceback
371
  traceback.print_exc()
372
- return pd.DataFrame(), f"❌ {e}"
373
 
374
- def run_raw_query(sql_query):
 
 
 
375
  if not sql_query.strip().upper().startswith("SELECT"):
376
- return pd.DataFrame(), "❌ Only SELECT"
377
  try:
378
  with get_db_connection() as conn:
379
  start = time.time()
@@ -381,161 +551,227 @@ def run_raw_query(sql_query):
381
  elapsed = time.time() - start
382
  return df, f"βœ… {len(df)} rows in {elapsed:.3f}s"
383
  except Exception as e:
384
- return pd.DataFrame(), f"❌ {e}"
385
 
386
- def get_schema_info():
387
- md = f"# πŸ“š Schema\n\n"
388
- md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n"
 
 
 
 
389
 
390
  try:
391
  with get_db_connection() as conn:
392
  cursor = conn.cursor()
393
 
394
- md += "## Relations\n\n"
395
- cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20")
396
- for rel_id, label in cursor.fetchall():
397
- md += f"- **{label}:** `{rel_id}`\n"
398
-
399
- md += "\n## Tables\n\n"
400
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
401
- for table, in cursor.fetchall():
 
 
402
  cursor.execute(f"SELECT COUNT(*) FROM {table}")
403
- md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
 
 
 
 
 
 
404
 
405
  except Exception as e:
406
- md += f"\nError: {e}\n"
407
 
408
  return md
409
 
410
- # ===== FASTAPI - Create app FIRST =====
411
- app = FastAPI(title="ConceptNet API", version="1.0")
412
-
413
- @app.get("/api")
414
- def api_docs():
415
- """API documentation - accessible at /api"""
416
- return {
417
- "name": "ConceptNet API",
418
- "version": "1.0",
419
- "endpoints": {
420
- "/api/profile/{word}": "Semantic profile (params: lang, limit)",
421
- "/api/query": "Query edges (params: start, relation, end, limit)",
422
- "/api/relations": "List relations",
423
- "/api/languages": "List languages"
424
- },
425
- "examples": {
426
- "profile": "/api/profile/dog?lang=en&limit=10",
427
- "query": "/api/query?start=dog&relation=IsA&limit=20"
428
- },
429
- "note": "Visit root (/) for the Gradio UI"
430
- }
431
-
432
- @app.get("/api/profile/{word}")
433
- def api_profile(word: str, lang: str = "en", limit: int = 10):
434
- """Get semantic profile"""
435
- return JSONResponse(get_semantic_profile_json(word, lang, limit))
436
-
437
- @app.get("/api/query")
438
- def api_query(start: Optional[str] = None,
439
- relation: Optional[str] = None,
440
- end: Optional[str] = None,
441
- limit: int = 50):
442
- """Query edges"""
443
- return JSONResponse(query_edges_json(start, relation, end, limit))
444
 
445
- @app.get("/api/relations")
446
- def api_relations():
447
- """List relations"""
448
- return JSONResponse({"relations": [{"name": name, "url": url} for name, url in RELATIONS]})
449
-
450
- @app.get("/api/languages")
451
- def api_languages():
452
- """List languages"""
453
- return JSONResponse({"languages": TARGET_LANGUAGES})
454
-
455
- # ===== GRADIO UI =====
456
- with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
457
- gr.Markdown("# 🧠 ConceptNet Explorer")
458
- gr.Markdown(
459
- f"**Multi-language semantic network** | "
460
- f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | "
461
- f"**API:** `/api/profile/{{word}}` `/api/query`"
462
- )
463
 
464
- with gr.Tabs():
465
- with gr.TabItem("πŸ” Semantic Profile"):
466
- gr.Markdown("**Explore semantic relations for any word**")
467
-
468
- with gr.Row():
469
- word_input = gr.Textbox(label="Word", placeholder="dog", value="dog")
470
- lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language")
471
-
472
- semantic_btn = gr.Button("πŸ” Get Semantic Profile", variant="primary", size="lg")
473
- semantic_output = gr.Markdown()
474
-
475
- gr.Examples(
476
- examples=[["dog", "en"], ["hund", "de"], ["perro", "es"], ["chien", "fr"]],
477
- inputs=[word_input, lang_input]
478
- )
479
 
480
- with gr.TabItem("⚑ Query Builder"):
481
- with gr.Row():
482
- start_input = gr.Textbox(label="Start", placeholder="dog")
483
- rel_input = gr.Textbox(label="Relation", placeholder="IsA", value="IsA")
484
- end_input = gr.Textbox(label="End", placeholder="")
485
-
486
- limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=50)
487
- query_btn = gr.Button("▢️ Run Query", variant="primary", size="lg")
 
 
 
 
 
 
 
488
 
489
- status_output = gr.Markdown()
490
- results_output = gr.DataFrame(wrap=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
 
492
- with gr.TabItem("πŸ’» Raw SQL"):
493
- raw_sql_input = gr.Textbox(
494
- label="SQL",
495
- value=f"SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id = '{CONCEPTNET_BASE}/c/en/dog' LIMIT 10",
496
- lines=3
497
- )
498
- raw_btn = gr.Button("▢️ Execute")
499
- raw_status = gr.Markdown()
500
- raw_results = gr.DataFrame()
501
 
502
- with gr.TabItem("πŸ“Š Schema"):
503
- schema_btn = gr.Button("πŸ“Š Load Schema")
504
- schema_output = gr.Markdown()
 
 
505
 
506
- with gr.TabItem("πŸ”Œ API"):
507
- gr.Markdown("## JSON API Endpoints\n")
508
- gr.Markdown("### API Documentation\n```\nGET /api\n```")
509
- gr.Markdown("### Get Semantic Profile\n```\nGET /api/profile/{word}?lang=en&limit=10\n```")
510
- gr.Markdown("### Query Edges\n```\nGET /api/query?start=dog&relation=IsA&limit=50\n```")
511
- gr.Markdown("### List Relations\n```\nGET /api/relations\n```")
512
- gr.Markdown("### Examples\n")
513
- gr.Markdown("```\ncurl https://your-space.hf.space/api/profile/dog?lang=en\n```")
514
- gr.Markdown("```\ncurl 'https://your-space.hf.space/api/query?start=dog&relation=IsA&limit=10'\n```")
515
-
516
- gr.Markdown(
517
- "---\n"
518
- "**Performance:** Exact match on rel_id for fast queries | "
519
- "**API:** Full REST API at `/api/*` endpoints"
520
- )
521
-
522
- semantic_btn.click(get_semantic_profile, [word_input, lang_input], semantic_output)
523
- query_btn.click(run_query, [start_input, rel_input, end_input, limit_slider], [results_output, status_output])
524
- raw_btn.click(run_raw_query, raw_sql_input, [raw_results, raw_status])
525
- schema_btn.click(get_schema_info, None, schema_output)
526
 
527
- # ===== MOUNT GRADIO TO FASTAPI - Gradio at root, API at /api/* =====
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  app = gr.mount_gradio_app(app, demo, path="/")
529
 
 
 
 
 
 
530
  if __name__ == "__main__":
531
  log_progress("="*60, "SUCCESS")
532
- log_progress("πŸš€ APP READY!", "SUCCESS")
533
  log_progress("="*60, "SUCCESS")
534
  log_progress("UI: http://localhost:7860/", "INFO")
535
  log_progress("API: http://localhost:7860/api", "INFO")
536
  log_progress(" http://localhost:7860/api/profile/dog", "INFO")
537
- log_progress(" http://localhost:7860/api/query?start=dog&relation=IsA", "INFO")
538
  log_progress("="*60, "SUCCESS")
539
 
540
- import uvicorn
541
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
1
+ # ============================================================================
2
+ # 1. CONSOLIDATED IMPORTS
3
+ # ============================================================================
4
  import gradio as gr
5
  import sqlite3
6
  import pandas as pd
7
  from huggingface_hub import hf_hub_download, HfApi
8
+ from fastapi import FastAPI, Response
9
  from fastapi.responses import JSONResponse
10
  import os
11
  import time
12
  import json
13
+ from typing import Optional, Dict, List, Any
14
+ import uvicorn
15
+ import traceback
16
 
17
+ # ============================================================================
18
+ # 2. SHARED GLOBALS & CONFIGURATION
19
+ # ============================================================================
20
+
21
+ # --- Languages ---
22
  TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
23
+ print(f"🌍 Target Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
24
+
25
+ # --- Hugging Face & DB Config ---
26
  INDEXED_REPO_ID = "cstr/conceptnet-de-indexed"
27
  INDEXED_DB_FILENAME = "conceptnet-de-indexed.db"
28
  PROGRESS_FILENAME = "indexing_progress.json"
29
+ CONCEPTNET_BASE_URI = "http://conceptnet.io"
30
+ DB_PATH: Optional[str] = None # Will be set by setup_database()
 
 
31
 
32
+ # --- HF Token ---
33
  HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
34
 
35
+ # --- Full list of 34 ConceptNet Relations (as requested) ---
36
+ # We use this to populate dropdowns and guide the semantic profile
37
+ CONCEPTNET_RELATIONS: Dict[str, str] = {
38
+ # Main Relations
39
+ "RelatedTo": "/r/RelatedTo",
40
+ "IsA": "/r/IsA",
41
+ "PartOf": "/r/PartOf",
42
+ "HasA": "/r/HasA",
43
+ "UsedFor": "/r/UsedFor",
44
+ "CapableOf": "/r/CapableOf",
45
+ "AtLocation": "/r/AtLocation",
46
+ "Causes": "/r/Causes",
47
+ "HasSubevent": "/r/HasSubevent",
48
+ "HasFirstSubevent": "/r/HasFirstSubevent",
49
+ "HasLastSubevent": "/r/HasLastSubevent",
50
+ "HasPrerequisite": "/r/HasPrerequisite",
51
+ "HasProperty": "/r/HasProperty",
52
+ "MotivatedByGoal": "/r/MotivatedByGoal",
53
+ "ObstructedBy": "/r/ObstructedBy",
54
+ "Desires": "/r/Desires",
55
+ "CreatedBy": "/r/CreatedBy",
56
+ "Synonym": "/r/Synonym",
57
+ "Antonym": "/r/Antonym",
58
+ "DistinctFrom": "/r/DistinctFrom",
59
+ "DerivedFrom": "/r/DerivedFrom",
60
+ "SymbolOf": "/r/SymbolOf",
61
+ "DefinedAs": "/r/DefinedAs",
62
+ "MannerOf": "/r/MannerOf",
63
+ "LocatedNear": "/r/LocatedNear",
64
+ "HasContext": "/r/HasContext",
65
+ "SimilarTo": "/r/SimilarTo",
66
+ "EtymologicallyRelatedTo": "/r/EtymologicallyRelatedTo",
67
+ "EtymologicallyDerivedFrom": "/r/EtymologicallyDerivedFrom",
68
+ "CausesDesire": "/r/CausesDesire",
69
+ "MadeOf": "/r/MadeOf",
70
+ "ReceivesAction": "/r/ReceivesAction",
71
+ "ExternalURL": "/r/ExternalURL",
72
+ # Negative Relations (from deprecated list, but useful)
73
+ "NotDesires": "/r/NotDesires",
74
+ "NotUsedFor": "/r/NotUsedFor",
75
+ "NotCapableOf": "/r/NotCapableOf",
76
+ "NotHasProperty": "/r/NotHasProperty",
77
+ }
78
+
79
+ # Sorted list of (Label, URI) tuples for Gradio dropdowns
80
+ RELATION_CHOICES = sorted(CONCEPTNET_RELATIONS.items())
81
+
82
+ # ============================================================================
83
+ # 3. DATABASE SETUP & HELPERS
84
+ # ============================================================================
85
+
86
  def log_progress(message, level="INFO"):
87
+ """Helper for logging with emoji prefixes."""
88
  timestamp = time.strftime("%H:%M:%S")
89
  prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
90
  print(f"[{timestamp}] {prefix} {message}")
91
 
92
  def check_remote_progress():
93
+ """Check HF Hub for the indexing progress file."""
94
  if not HF_TOKEN:
95
+ log_progress("No HF_TOKEN. Assuming local DB or public repo.", "WARN")
96
+ # Try public download first
97
+ try:
98
+ progress_path = hf_hub_download(
99
+ repo_id=INDEXED_REPO_ID,
100
+ filename=PROGRESS_FILENAME,
101
+ repo_type="dataset"
102
+ )
103
+ with open(progress_path, 'r') as f:
104
+ return json.load(f)
105
+ except Exception:
106
+ return {"indexing_complete": False} # Fallback
107
+
108
+ # Try with token if available
109
  try:
110
  api = HfApi()
111
  api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN)
112
+ progress_path = hf_hub_download(
113
+ repo_id=INDEXED_REPO_ID,
114
+ filename=PROGRESS_FILENAME,
115
+ repo_type="dataset",
116
+ token=HF_TOKEN
117
+ )
118
  with open(progress_path, 'r') as f:
119
  return json.load(f)
120
+ except Exception as e:
121
+ log_progress(f"Could not check remote progress: {e}", "WARN")
122
  return {"indexing_complete": False}
123
 
124
+ def setup_database():
125
+ """
126
+ Downloads the pre-indexed database from Hugging Face Hub.
127
+ """
128
+ global DB_PATH
129
+ log_progress("Attempting to load indexed database...", "INFO")
130
+
131
+ # Check if we already have it locally
132
+ local_path = Path(INDEXED_DB_FILENAME)
133
+ if local_path.exists() and local_path.stat().st_size > 1000000:
134
+ log_progress(f"Found existing local DB: {local_path.resolve()}", "SUCCESS")
135
+ DB_PATH = str(local_path.resolve())
136
+ return DB_PATH
137
+
138
  progress = check_remote_progress()
139
  if progress.get("indexing_complete"):
140
  try:
141
+ log_progress(f"Downloading {INDEXED_DB_FILENAME} from {INDEXED_REPO_ID}...", "INFO")
142
+ path = hf_hub_download(
143
+ repo_id=INDEXED_REPO_ID,
144
+ filename=INDEXED_DB_FILENAME,
145
+ repo_type="dataset",
146
+ token=HF_TOKEN # Will be None if not set, which is fine for public repos
147
+ )
148
+ DB_PATH = path
149
+ log_progress(f"Indexed DB loaded successfully from Hub: {path}", "SUCCESS")
150
  return path
151
+ except Exception as e:
152
+ log_progress(f"Failed to download indexed DB: {e}", "ERROR")
153
+ return None
154
+ else:
155
+ log_progress("Remote indexing is not complete or progress file not found.", "ERROR")
156
+ return None
157
 
158
+ def get_db_connection() -> sqlite3.Connection:
159
+ """
160
+ Returns a new SQLite connection to the database.
161
+ Includes performance PRAGMAs.
162
+ """
163
+ if DB_PATH is None:
164
+ raise ConnectionError("Database path is not set. Call setup_database() first.")
165
+
166
+ # check_same_thread=False is generally safe for read-only operations
167
+ # in a multi-threaded server like FastAPI/uvicorn.
168
+ conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, check_same_thread=False)
169
+
170
+ # Performance tuning for read-only connections
171
+ conn.execute("PRAGMA journal_mode = OFF")
172
+ conn.execute("PRAGMA synchronous = 0")
173
+ conn.execute("PRAGMA cache_size = -256000") # 256MB cache
174
+ conn.execute("PRAGMA mmap_size = 4294967296") # 4GB mmap
175
+ conn.execute("PRAGMA temp_store = MEMORY")
176
+
177
  return conn
178
 
179
+ # ============================================================================
180
+ # 4. API (FASTAPI) ENDPOINTS
181
+ # (These functions return JSON data and are called by the API)
182
+ # ============================================================================
183
+
184
+ # --- Create FastAPI app FIRST ---
185
+ app = FastAPI(
186
+ title="ConceptNet Explorer API",
187
+ version="1.0",
188
+ description="A versatile API for querying a ConceptNet SQLite database."
189
+ )
190
+
191
+ @app.get("/api", tags=["API"])
192
+ def api_docs():
193
+ """API documentation - accessible at /api"""
194
+ return {
195
+ "name": "ConceptNet Explorer API",
196
+ "version": "1.0",
197
+ "endpoints": {
198
+ "/api/profile/{word}": "Get a full semantic profile for a word.",
199
+ "/api/query": "Run a specific query for edges.",
200
+ "/api/relations": "List all available relation types.",
201
+ "/api/languages": "List all supported languages."
202
+ },
203
+ "examples": {
204
+ "profile": "/api/profile/dog?lang=en&limit=10",
205
+ "query": "/api/query?start_node=dog&relation_uri=/r/IsA&limit=20"
206
+ },
207
+ "note": "Visit the root path (/) for the Gradio UI."
208
+ }
209
+
210
+ @app.get("/api/profile/{word}", tags=["API"])
211
+ def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
212
+ """
213
+ API Endpoint: Get a full semantic profile for a word as JSON.
214
+ Queries all 34 relation types.
215
+ """
216
  if not word or lang not in TARGET_LANGUAGES:
217
+ return JSONResponse(
218
+ status_code=400,
219
+ content={"error": "Invalid input. 'word' is required and 'lang' must be valid."}
220
+ )
221
 
222
+ word_normalized = word.strip().lower().replace(' ', '_')
223
+ like_path = f"{CONCEPTNET_BASE_URI}/c/{lang}/{word_normalized}%"
224
 
225
  result = {
226
+ "query": {"word": word, "normalized": word_normalized, "lang": lang, "limit_per_relation": limit},
227
+ "nodes_found": [],
 
228
  "relations": {},
229
+ "total_edges_found": 0
230
  }
231
 
232
  try:
233
  with get_db_connection() as conn:
234
  cursor = conn.cursor()
235
 
236
+ # 1. Find matching nodes
237
  cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
238
+ result["nodes_found"] = [{"id": nid, "label": label} for nid, label in cursor.fetchall()]
239
 
240
+ if not result["nodes_found"]:
241
+ return JSONResponse(
242
+ status_code=404,
243
+ content={"error": f"Word '{word}' (normalized: '{word_normalized}') not found in language '{lang}'."}
244
+ )
245
 
246
+ # 2. Query all relations
247
+ for rel_name, rel_uri in RELATION_CHOICES:
248
  outgoing = []
249
  incoming = []
250
 
251
+ # Outgoing (word -> relation -> target)
252
  cursor.execute("""
253
  SELECT en.label, e.weight, en.id
254
  FROM edge e
 
256
  WHERE e.start_id LIKE ? AND e.rel_id = ?
257
  ORDER BY e.weight DESC
258
  LIMIT ?
259
+ """, (like_path, rel_uri, limit))
260
 
261
+ outgoing = [{"target_label": label, "weight": weight, "target_id": eid}
262
  for label, weight, eid in cursor.fetchall()]
263
 
264
+ # Incoming (source -> relation -> word)
265
  cursor.execute("""
266
  SELECT s.label, e.weight, s.id
267
  FROM edge e
 
269
  WHERE e.end_id LIKE ? AND e.rel_id = ?
270
  ORDER BY e.weight DESC
271
  LIMIT ?
272
+ """, (like_path, rel_uri, limit))
273
 
274
+ incoming = [{"source_label": label, "weight": weight, "source_id": sid}
275
  for label, weight, sid in cursor.fetchall()]
276
 
277
  if outgoing or incoming:
278
  result["relations"][rel_name] = {
279
+ "uri": rel_uri,
280
  "outgoing": outgoing,
281
  "incoming": incoming,
282
  "count": len(outgoing) + len(incoming)
283
  }
284
+ result["total_edges_found"] += len(outgoing) + len(incoming)
285
 
286
+ return JSONResponse(content=result)
287
 
288
  except Exception as e:
289
+ log_progress(f"API /profile error: {e}", "ERROR")
290
+ traceback.print_exc()
291
+ return JSONResponse(status_code=500, content={"error": str(e)})
292
 
293
+ @app.get("/api/query", tags=["API"])
294
+ def query_edges_json(
295
+ start_node: Optional[str] = None,
296
+ relation_uri: Optional[str] = None,
297
+ end_node: Optional[str] = None,
298
+ lang: str = 'en',
299
+ limit: int = 50
300
+ ):
301
+ """
302
+ API Endpoint: Query edges with flexible filters.
303
+ Nodes (start/end) can be partial words (e.g., 'dog') or full URIs.
304
+ """
305
+ query = """
306
+ SELECT
307
+ e.id as edge_id,
308
+ s.id as start_id,
309
+ r.id as relation_id,
310
+ en.id as end_id,
311
+ e.weight,
312
+ s.label as start_label,
313
+ r.label as relation_label,
314
+ en.label as end_label
315
+ FROM edge e
316
+ JOIN relation r ON e.rel_id = r.id
317
+ JOIN node s ON e.start_id = s.id
318
+ JOIN node en ON e.end_id = en.id
319
+ WHERE 1=1
320
+ """
321
+ params: List[Any] = []
322
+
323
+ try:
324
+ def build_node_pattern(node_str: str) -> str:
325
+ if node_str.startswith(f'{CONCEPTNET_BASE_URI}/c/'):
326
+ return f"{node_str}%"
327
+ # Assume it's a plain word
328
+ return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
329
+
330
+ with get_db_connection() as conn:
331
+ if start_node:
332
+ query += " AND s.id LIKE ?"
333
+ params.append(build_node_pattern(start_node))
334
+
335
+ if relation_uri:
336
+ # Expecting a full URI like /r/IsA
337
+ query += " AND r.id = ?"
338
+ params.append(relation_uri)
339
+
340
+ if end_node:
341
+ query += " AND en.id LIKE ?"
342
+ params.append(build_node_pattern(end_node))
343
+
344
+ query += " ORDER BY e.weight DESC LIMIT ?"
345
+ params.append(limit)
346
+
347
+ df = pd.read_sql_query(query, conn, params=params)
348
+
349
+ return {
350
+ "query": {"start_node": start_node, "relation_uri": relation_uri, "end_node": end_node, "lang": lang, "limit": limit},
351
+ "results": df.to_dict(orient='records'),
352
+ "count": len(df)
353
+ }
354
+
355
+ except Exception as e:
356
+ log_progress(f"API /query error: {e}", "ERROR")
357
+ traceback.print_exc()
358
+ return JSONResponse(status_code=500, content={"error": str(e)})
359
+
360
+ @app.get("/api/relations", tags=["API"])
361
+ def api_relations():
362
+ """API Endpoint: List all configured relations."""
363
+ return JSONResponse(content={"relations": CONCEPTNET_RELATIONS})
364
+
365
+ @app.get("/api/languages", tags=["API"])
366
+ def api_languages():
367
+ """API Endpoint: List all configured languages."""
368
+ return JSONResponse(content={"languages": TARGET_LANGUAGES})
369
+
370
+ # ============================================================================
371
+ # 5. GRADIO UI HELPER FUNCTIONS
372
+ # (These functions are called by Gradio button clicks and return UI components)
373
+ # ============================================================================
374
+
375
+ def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
376
+ """
377
+ Gradio UI Function: Get semantic profile formatted as Markdown.
378
+ """
379
+ log_progress(f"UI Profile: {word} ({lang})", "INFO")
380
 
381
  if not word or lang not in TARGET_LANGUAGES:
382
+ return "⚠️ Invalid input. Please provide a word and select a language."
383
 
384
  progress(0, desc="πŸ” Starting...")
385
 
386
+ word_normalized = word.strip().lower().replace(' ', '_')
387
+ like_path = f"{CONCEPTNET_BASE_URI}/c/{lang}/{word_normalized}%"
388
 
389
  output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
390
 
 
398
  nodes = cursor.fetchall()
399
 
400
  if not nodes:
401
+ return f"# 🧠 '{word}'\n\n⚠️ **Word not found** (as `.../c/{lang}/{word_normalized}...`)"
402
 
403
  log_progress(f"Found {len(nodes)} nodes", "SUCCESS")
404
 
405
+ output_md += "**Matching Nodes:**\n"
406
  for node_id, label in nodes[:3]:
407
+ output_md += f"- **{label}** (ID: `{node_id}`)\n"
408
+ output_md += "\n---\n"
409
 
410
+ total_found = 0
411
+ num_relations = len(RELATION_CHOICES)
412
 
413
+ # Use the FULL list of relations
414
+ for i, (rel_name, rel_uri) in enumerate(RELATION_CHOICES):
415
  progress((i + 0.1) / num_relations, desc=f"πŸ”Ž {rel_name}...")
416
 
 
 
 
 
417
  cursor.execute("""
418
  SELECT en.label, e.weight
419
  FROM edge e
 
421
  WHERE e.start_id LIKE ? AND e.rel_id = ?
422
  ORDER BY e.weight DESC
423
  LIMIT 10
424
+ """, (like_path, rel_uri))
425
+ outgoing = cursor.fetchall()
 
 
 
 
 
 
 
 
 
426
 
427
  cursor.execute("""
428
  SELECT s.label, e.weight
 
431
  WHERE e.end_id LIKE ? AND e.rel_id = ?
432
  ORDER BY e.weight DESC
433
  LIMIT 10
434
+ """, (like_path, rel_uri))
435
+ incoming = cursor.fetchall()
436
+
437
+ if outgoing or incoming:
438
+ output_md += f"### {rel_name} (`{rel_uri}`)\n\n"
439
+ total_found += len(outgoing) + len(incoming)
440
+
441
+ for label, weight in outgoing:
442
+ output_md += f"- **{word}** β†’ *{label}* `[{weight:.3f}]`\n"
443
+
444
+ for label, weight in incoming:
445
+ output_md += f"- *{label}* β†’ **{word}** `[{weight:.3f}]`\n"
446
+
447
+ output_md += "\n"
448
 
 
449
  progress((i + 1) / num_relations, desc=f"βœ“ {rel_name}")
450
 
451
  progress(1.0, desc="βœ… Complete!")
452
 
453
+ output_md += f"---\n**Total relations found:** {total_found}\n"
454
+ log_progress(f"Complete: {total_found} relations", "SUCCESS")
455
 
456
  return output_md
457
 
458
  except Exception as e:
459
+ log_progress(f"UI Profile Error: {e}", "ERROR")
 
460
  traceback.print_exc()
461
+ return f"**❌ An unexpected error occurred:**\n\n```\n{e}\n```"
462
 
463
+ def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, limit: int, progress=gr.Progress()):
 
 
 
 
 
 
 
 
 
 
 
 
464
  """
465
+ Gradio UI Function: Query builder.
466
+ """
467
+ log_progress(f"UI Query: start={start_node}, rel={relation_uri}, end={end_node}, lang={lang}", "INFO")
468
 
469
+ progress(0, desc="πŸ” Building query...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
 
471
  query = """
472
  SELECT
473
+ s.label as start_label,
474
+ r.label as relation_label,
475
+ en.label as end_label,
476
+ e.weight,
477
+ s.id as start_id,
478
+ r.id as relation_id,
479
+ en.id as end_id,
480
+ e.id as edge_id
481
  FROM edge e
482
  JOIN relation r ON e.rel_id = r.id
483
  JOIN node s ON e.start_id = s.id
484
  JOIN node en ON e.end_id = en.id
485
  WHERE 1=1
486
  """
487
+ params: List[Any] = []
 
488
 
489
  try:
490
+ def build_node_pattern(node_str: str) -> str:
491
+ if node_str.strip().startswith(f'{CONCEPTNET_BASE_URI}/c/'):
492
+ return f"{node_str.strip()}%"
493
+ # Assume it's a plain word
494
+ return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
495
+
496
  with get_db_connection() as conn:
497
+ progress(0.3, desc="πŸ“ Applying filters...")
498
 
499
  if start_node and start_node.strip():
 
 
 
 
500
  query += " AND s.id LIKE ?"
501
+ params.append(build_node_pattern(start_node))
502
+
503
+ if relation_uri and relation_uri.strip():
 
 
 
 
 
 
504
  query += " AND r.id = ?"
505
+ params.append(relation_uri)
506
 
507
  if end_node and end_node.strip():
 
 
 
 
508
  query += " AND en.id LIKE ?"
509
+ params.append(build_node_pattern(end_node))
510
 
511
  query += " ORDER BY e.weight DESC LIMIT ?"
512
  params.append(limit)
513
 
514
+ progress(0.6, desc="⚑ Running query...")
515
 
516
  start_time = time.time()
517
  df = pd.read_sql_query(query, conn, params=params)
 
519
 
520
  progress(1.0, desc="βœ… Done!")
521
 
522
+ log_progress(f"Query Done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS")
523
 
524
  if df.empty:
525
+ return pd.DataFrame(), f"⚠️ No results found ({elapsed:.2f}s)"
526
+
527
+ # Reorder columns for better display
528
+ cols_to_show = [
529
+ 'start_label', 'relation_label', 'end_label', 'weight',
530
+ 'start_id', 'relation_id', 'end_id'
531
+ ]
532
+ df = df[cols_to_show]
533
 
 
534
  return df, f"βœ… {len(df)} results in {elapsed:.2f}s"
535
 
536
  except Exception as e:
537
+ log_progress(f"UI Query Error: {e}", "ERROR")
 
538
  traceback.print_exc()
539
+ return pd.DataFrame(), f"❌ **Error:**\n\n```\n{e}\n```"
540
 
541
+ def run_raw_query_ui(sql_query: str):
542
+ """
543
+ Gradio UI Function: Raw SQL query.
544
+ """
545
  if not sql_query.strip().upper().startswith("SELECT"):
546
+ return pd.DataFrame(), "❌ **Error:** Only `SELECT` statements are allowed."
547
  try:
548
  with get_db_connection() as conn:
549
  start = time.time()
 
551
  elapsed = time.time() - start
552
  return df, f"βœ… {len(df)} rows in {elapsed:.3f}s"
553
  except Exception as e:
554
+ return pd.DataFrame(), f"❌ **Error:**\n\n```\n{e}\n```"
555
 
556
+ def get_schema_info_ui():
557
+ """
558
+ Gradio UI Function: Display schema information.
559
+ """
560
+ md = f"# πŸ“š Database Schema\n\n"
561
+ md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n"
562
+ md += f"**Database File:** `{DB_PATH}`\n\n"
563
 
564
  try:
565
  with get_db_connection() as conn:
566
  cursor = conn.cursor()
567
 
568
+ md += "## Tables & Row Counts\n\n"
 
 
 
 
 
569
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
570
+ tables = [row[0] for row in cursor.fetchall()]
571
+
572
+ for table in tables:
573
  cursor.execute(f"SELECT COUNT(*) FROM {table}")
574
+ count = cursor.fetchone()[0]
575
+ md += f"- **{table}:** {count:,} rows\n"
576
+
577
+ md += "\n## Configured Relations\n\n"
578
+ md += "This list is used to populate the 'Query Builder' dropdown and 'Semantic Profile'.\n\n"
579
+ for name, uri in RELATION_CHOICES:
580
+ md += f"- **{name}:** `{uri}`\n"
581
 
582
  except Exception as e:
583
+ md += f"\n**❌ Error loading schema:**\n\n```\n{e}\n```\n"
584
 
585
  return md
586
 
587
+ # ============================================================================
588
+ # 6. GRADIO UI DEFINITION
589
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
 
591
+ def create_gradio_ui():
592
+ """Builds the consolidated Gradio interface."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
 
594
+ with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft(primary_hue="blue")) as demo:
595
+ gr.Markdown(
596
+ "# 🧠 ConceptNet Explorer\n"
597
+ "An interface for querying the ConceptNet semantic network."
598
+ )
599
+ gr.Markdown(
600
+ f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | "
601
+ f"**Database:** `{INDEXED_REPO_ID}` | "
602
+ f"**JSON API:** Access at `/api`"
603
+ )
 
 
 
 
 
604
 
605
+ with gr.Tabs():
606
+ with gr.TabItem("πŸ” Semantic Profile"):
607
+ gr.Markdown("Explore all semantic relations for a single word. This queries all 34 relation types.")
608
+
609
+ with gr.Row():
610
+ profile_word_input = gr.Textbox(label="Word", placeholder="e.g., dog", value="dog")
611
+ profile_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language")
612
+
613
+ profile_btn = gr.Button("πŸ” Get Semantic Profile", variant="primary")
614
+ profile_output = gr.Markdown(label="Profile Results")
615
+
616
+ gr.Examples(
617
+ examples=[["dog", "en"], ["Hund", "de"], ["perro", "es"], ["chien", "fr"], ["gatto", "it"]],
618
+ inputs=[profile_word_input, profile_lang_input]
619
+ )
620
 
621
+ with gr.TabItem("⚑ Query Builder"):
622
+ gr.Markdown("Construct a specific query using nodes and a relation.")
623
+
624
+ with gr.Row():
625
+ query_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Node Language")
626
+ query_limit_slider = gr.Slider(label="Result Limit", minimum=1, maximum=500, value=50, step=10)
627
+
628
+ with gr.Row():
629
+ query_start_input = gr.Textbox(
630
+ label="Start Node",
631
+ placeholder="e.g., dog (word) or /c/en/dog (URI)"
632
+ )
633
+
634
+ # --- THIS IS THE KEY UI IMPROVEMENT ---
635
+ query_rel_input = gr.Dropdown(
636
+ label="Relation",
637
+ choices=RELATION_CHOICES, # Use the full list
638
+ value="/r/IsA" # Default to a common one
639
+ )
640
+
641
+ query_end_input = gr.Textbox(
642
+ label="End Node",
643
+ placeholder="e.g., animal (word) or /c/en/animal (URI)"
644
+ )
645
+
646
+ query_btn = gr.Button("▢️ Run Query", variant="primary")
647
+
648
+ query_status_output = gr.Markdown()
649
+ query_results_output = gr.DataFrame(
650
+ label="Query Results",
651
+ wrap=True,
652
+ interactive=False
653
+ )
654
+
655
+ with gr.TabItem("πŸ’» Raw SQL"):
656
+ gr.Markdown(
657
+ "**Warning:** Directly query the SQLite database. Only `SELECT` statements are allowed. "
658
+ "Use the 'Schema' tab to see table names."
659
+ )
660
+ raw_sql_input = gr.Textbox(
661
+ label="SQL Query",
662
+ value=f"SELECT s.label, r.label, en.label, e.weight\nFROM edge e\nJOIN relation r ON e.rel_id = r.id\nJOIN node s ON e.start_id = s.id\nJOIN node en ON e.end_id = en.id\nWHERE s.id LIKE '{CONCEPTNET_BASE_URI}/c/en/dog%'\n AND r.id = '/r/IsA'\nORDER BY e.weight DESC\nLIMIT 10",
663
+ lines=5,
664
+ max_lines=20
665
+ )
666
+ raw_btn = gr.Button("▢️ Execute SQL")
667
+ raw_status = gr.Markdown()
668
+ raw_results = gr.DataFrame(label="SQL Results", interactive=False)
669
+
670
+ with gr.TabItem("πŸ“Š Schema"):
671
+ gr.Markdown("View the database schema and table counts.")
672
+ schema_btn = gr.Button("πŸ“Š Load Schema Info")
673
+ schema_output = gr.Markdown()
674
+
675
+ with gr.TabItem("πŸ”Œ API Docs"):
676
+ gr.Markdown(
677
+ "## JSON API Endpoints\n"
678
+ "This Gradio app is mounted on a FastAPI server. You can use the following JSON API endpoints directly."
679
+ )
680
+ gr.JSON({
681
+ "docs": "/api",
682
+ "profile": "/api/profile/{word}?lang=en&limit=10",
683
+ "query": "/api/query?start_node=dog&relation_uri=/r/IsA&lang=en&limit=50",
684
+ "relations": "/api/relations",
685
+ "languages": "/api/languages"
686
+ }, label="API Endpoints")
687
+ gr.Markdown(
688
+ "### Example (cURL)\n"
689
+ "```bash\n# (Assumes app is running at localhost:7860)\ncurl http://localhost:7860/api/profile/dog?lang=en\n```\n"
690
+ "```bash\ncurl 'http://localhost:7860/api/query?start_node=dog&relation_uri=/r/IsA&limit=10'\n```"
691
+ )
692
+
693
+ # --- Link UI components to functions ---
694
+ profile_btn.click(
695
+ fn=get_semantic_profile_ui,
696
+ inputs=[profile_word_input, profile_lang_input],
697
+ outputs=[profile_output],
698
+ api_name="get_semantic_profile" # Gradio-native API
699
+ )
700
 
701
+ query_btn.click(
702
+ fn=run_query_ui,
703
+ inputs=[query_start_input, query_rel_input, query_end_input, query_lang_input, query_limit_slider],
704
+ outputs=[query_results_output, query_status_output],
705
+ api_name="run_query" # Gradio-native API
706
+ )
 
 
 
707
 
708
+ raw_btn.click(
709
+ fn=run_raw_query_ui,
710
+ inputs=[raw_sql_input],
711
+ outputs=[raw_results, raw_status]
712
+ )
713
 
714
+ # Use .load() to run this when the tab is first clicked
715
+ schema_output.load(
716
+ fn=get_schema_info_ui,
717
+ inputs=None,
718
+ outputs=[schema_output]
719
+ )
720
+ schema_btn.click(
721
+ fn=get_schema_info_ui,
722
+ inputs=None,
723
+ outputs=[schema_output]
724
+ )
725
+
726
+ return demo
727
+
728
+ # ============================================================================
729
+ # 7. APP MOUNTING & LAUNCH
730
+ # ============================================================================
 
 
 
731
 
732
+ # --- 1. Setup the Database (Download from HF Hub) ---
733
+ # This runs *before* the UI is created
734
+ try:
735
+ if not setup_database():
736
+ print("\n" + "="*70)
737
+ print("❌ CRITICAL ERROR: Could not set up the database.")
738
+ print(f" Please check your connection or manually download '{INDEXED_DB_FILENAME}'")
739
+ print(f" from '{INDEXED_REPO_ID}' and place it in this directory.")
740
+ print("="*70 + "\n")
741
+ # We don't exit, Gradio will just show errors
742
+ else:
743
+ print(f"βœ… Database is ready at: {DB_PATH}")
744
+ except Exception as e:
745
+ print(f"❌ CRITICAL ERROR during database setup: {e}")
746
+ traceback.print_exc()
747
+
748
+
749
+ # --- 2. Create the Gradio UI ---
750
+ log_progress("Creating Gradio UI...", "INFO")
751
+ demo = create_gradio_ui()
752
+
753
+ # --- 3. Mount Gradio onto the FastAPI app ---
754
+ # This combines FastAPI (at /api/*) and Gradio (at /)
755
+ log_progress("Mounting Gradio UI onto FastAPI app...", "INFO")
756
  app = gr.mount_gradio_app(app, demo, path="/")
757
 
758
+
759
+ # ============================================================================
760
+ # 8. MAIN EXECUTION BLOCK
761
+ # ============================================================================
762
+
763
  if __name__ == "__main__":
764
  log_progress("="*60, "SUCCESS")
765
+ log_progress("πŸš€ CONCEPTNET EXPLORER APP READY!", "SUCCESS")
766
  log_progress("="*60, "SUCCESS")
767
  log_progress("UI: http://localhost:7860/", "INFO")
768
  log_progress("API: http://localhost:7860/api", "INFO")
769
  log_progress(" http://localhost:7860/api/profile/dog", "INFO")
770
+ log_progress(" http://localhost:7860/api/query?start_node=dog&relation_uri=/r/IsA", "INFO")
771
  log_progress("="*60, "SUCCESS")
772
 
773
+ uvicorn.run(
774
+ app,
775
+ host="0.0.0.0",
776
+ port=7860
777
+ )