cstr commited on
Commit
a322a4b
Β·
verified Β·
1 Parent(s): 3b8376c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +337 -557
app.py CHANGED
@@ -1,250 +1,91 @@
1
- # ============================================================================
2
- # 1. CONSOLIDATED IMPORTS
3
- # ============================================================================
4
  import gradio as gr
5
  import sqlite3
6
  import pandas as pd
7
  from huggingface_hub import hf_hub_download, HfApi
8
- from fastapi import FastAPI, Response
9
  from fastapi.responses import JSONResponse
10
  import os
11
  import time
12
  import json
13
- from typing import Optional, Dict, List, Any
14
- import uvicorn
15
- import traceback
16
- from pathlib import Path
17
- from contextlib import asynccontextmanager # <-- FIX 1: Add this import
18
 
19
- # ============================================================================
20
- # 2. SHARED GLOBALS & CONFIGURATION
21
- # ============================================================================
22
-
23
- # --- Languages ---
24
  TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
25
- print(f"🌍 Target Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
26
-
27
- # --- Hugging Face & DB Config ---
28
  INDEXED_REPO_ID = "cstr/conceptnet-de-indexed"
29
  INDEXED_DB_FILENAME = "conceptnet-de-indexed.db"
30
  PROGRESS_FILENAME = "indexing_progress.json"
31
- CONCEPTNET_BASE_URI = "http://conceptnet.io"
32
- DB_PATH: Optional[str] = None # Will be set by setup_database()
33
-
34
- # --- HF Token ---
35
- HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
36
-
37
- # --- All relations MUST be full URLs ---
38
- CONCEPTNET_RELATIONS: Dict[str, str] = {
39
- "RelatedTo": f"{CONCEPTNET_BASE_URI}/r/RelatedTo",
40
- "IsA": f"{CONCEPTNET_BASE_URI}/r/IsA",
41
- "PartOf": f"{CONCEPTNET_BASE_URI}/r/PartOf",
42
- "HasA": f"{CONCEPTNET_BASE_URI}/r/HasA",
43
- "UsedFor": f"{CONCEPTNET_BASE_URI}/r/UsedFor",
44
- "CapableOf": f"{CONCEPTNET_BASE_URI}/r/CapableOf",
45
- "AtLocation": f"{CONCEPTNET_BASE_URI}/r/AtLocation",
46
- "Causes": f"{CONCEPTNET_BASE_URI}/r/Causes",
47
- "HasSubevent": f"{CONCEPTNET_BASE_URI}/r/HasSubevent",
48
- "HasFirstSubevent": f"{CONCEPTNET_BASE_URI}/r/HasFirstSubevent",
49
- "HasLastSubevent": f"{CONCEPTNET_BASE_URI}/r/HasLastSubevent",
50
- "HasPrerequisite": f"{CONCEPTNET_BASE_URI}/r/HasPrerequisite",
51
- "HasProperty": f"{CONCEPTNET_BASE_URI}/r/HasProperty",
52
- "MotivatedByGoal": f"{CONCEPTNET_BASE_URI}/r/MotivatedByGoal",
53
- "ObstructedBy": f"{CONCEPTNET_BASE_URI}/r/ObstructedBy",
54
- "Desires": f"{CONCEPTNET_BASE_URI}/r/Desires",
55
- "CreatedBy": f"{CONCEPTNET_BASE_URI}/r/CreatedBy",
56
- "Synonym": f"{CONCEPTNET_BASE_URI}/r/Synonym",
57
- "Antonym": f"{CONCEPTNET_BASE_URI}/r/Antonym",
58
- "DistinctFrom": f"{CONCEPTNET_BASE_URI}/r/DistinctFrom",
59
- "DerivedFrom": f"{CONCEPTNET_BASE_URI}/r/DerivedFrom",
60
- "SymbolOf": f"{CONCEPTNET_BASE_URI}/r/SymbolOf",
61
- "DefinedAs": f"{CONCEPTNET_BASE_URI}/r/DefinedAs",
62
- "MannerOf": f"{CONCEPTNET_BASE_URI}/r/MannerOf",
63
- "LocatedNear": f"{CONCEPTNET_BASE_URI}/r/LocatedNear",
64
- "HasContext": f"{CONCEPTNET_BASE_URI}/r/HasContext",
65
- "SimilarTo": f"{CONCEPTNET_BASE_URI}/r/SimilarTo",
66
- "EtymologicallyRelatedTo": f"{CONCEPTNET_BASE_URI}/r/EtymologicallyRelatedTo",
67
- "EtymologicallyDerivedFrom": f"{CONCEPTNET_BASE_URI}/r/EtymologicallyDerivedFrom",
68
- "CausesDesire": f"{CONCEPTNET_BASE_URI}/r/CausesDesire",
69
- "MadeOf": f"{CONCEPTNET_BASE_URI}/r/MadeOf",
70
- "ReceivesAction": f"{CONCEPTNET_BASE_URI}/r/ReceivesAction",
71
- "ExternalURL": f"{CONCEPTNET_BASE_URI}/r/ExternalURL",
72
- "NotDesires": f"{CONCEPTNET_BASE_URI}/r/NotDesires",
73
- "NotUsedFor": f"{CONCEPTNET_BASE_URI}/r/NotUsedFor",
74
- "NotCapableOf": f"{CONCEPTNET_BASE_URI}/r/NotCapableOf",
75
- "NotHasProperty": f"{CONCEPTNET_BASE_URI}/r/NotHasProperty",
76
- }
77
 
78
- # Sorted list of (Label, Full_URL) tuples for Gradio dropdowns
79
- RELATION_CHOICES = sorted(CONCEPTNET_RELATIONS.items())
80
 
81
- # ============================================================================
82
- # 3. DATABASE SETUP & HELPERS
83
- # ============================================================================
84
 
85
  def log_progress(message, level="INFO"):
86
- """Helper for logging with emoji prefixes."""
87
  timestamp = time.strftime("%H:%M:%S")
88
  prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
89
  print(f"[{timestamp}] {prefix} {message}")
90
 
91
  def check_remote_progress():
92
- """Check HF Hub for the indexing progress file."""
93
  if not HF_TOKEN:
94
- log_progress("No HF_TOKEN. Assuming local DB or public repo.", "WARN")
95
- try:
96
- progress_path = hf_hub_download(
97
- repo_id=INDEXED_REPO_ID,
98
- filename=PROGRESS_FILENAME,
99
- repo_type="dataset"
100
- )
101
- with open(progress_path, 'r') as f:
102
- return json.load(f)
103
- except Exception:
104
- return {"indexing_complete": False} # Fallback
105
-
106
  try:
107
  api = HfApi()
108
  api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN)
109
- progress_path = hf_hub_download(
110
- repo_id=INDEXED_REPO_ID,
111
- filename=PROGRESS_FILENAME,
112
- repo_type="dataset",
113
- token=HF_TOKEN
114
- )
115
  with open(progress_path, 'r') as f:
116
  return json.load(f)
117
- except Exception as e:
118
- log_progress(f"Could not check remote progress: {e}", "WARN")
119
  return {"indexing_complete": False}
120
 
121
- def setup_database():
122
- """
123
- Downloads the pre-indexed database from Hugging Face Hub.
124
- This should only be called once on app startup.
125
- """
126
- global DB_PATH
127
- log_progress("Attempting to load indexed database...", "INFO")
128
-
129
- local_path = Path(INDEXED_DB_FILENAME)
130
- if local_path.exists() and local_path.stat().st_size > 1000000:
131
- log_progress(f"Found existing local DB: {local_path.resolve()}", "SUCCESS")
132
- DB_PATH = str(local_path.resolve())
133
- return DB_PATH
134
-
135
  progress = check_remote_progress()
136
  if progress.get("indexing_complete"):
137
  try:
138
- log_progress(f"Downloading {INDEXED_DB_FILENAME} from {INDEXED_REPO_ID}...", "INFO")
139
- path = hf_hub_download(
140
- repo_id=INDEXED_REPO_ID,
141
- filename=INDEXED_DB_FILENAME,
142
- repo_type="dataset",
143
- token=HF_TOKEN
144
- )
145
- DB_PATH = path
146
- log_progress(f"Indexed DB loaded successfully from Hub: {path}", "SUCCESS")
147
  return path
148
- except Exception as e:
149
- log_progress(f"Failed to download indexed DB: {e}", "ERROR")
150
- return None
151
- else:
152
- log_progress("Remote indexing is not complete or progress file not found.", "ERROR")
153
- return None
154
-
155
- def get_db_connection() -> sqlite3.Connection:
156
- """
157
- Returns a new SQLite connection to the database.
158
- Includes performance PRAGMAs.
159
- """
160
- if DB_PATH is None:
161
- raise ConnectionError("Database path is not set. The lifespan startup event might have failed.")
162
-
163
- conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True, check_same_thread=False)
164
-
165
- conn.execute("PRAGMA journal_mode = OFF")
166
- conn.execute("PRAGMA synchronous = 0")
167
- conn.execute("PRAGMA cache_size = -256000") # 256MB cache
168
- conn.execute("PRAGMA mmap_size = 4294967296") # 4GB mmap
169
- conn.execute("PRAGMA temp_store = MEMORY")
170
-
171
- return conn
172
 
173
- # ============================================================================
174
- # 4. API (FASTAPI) ENDPOINTS
175
- # ============================================================================
176
 
177
- # --- FIX 2: Define the lifespan event manager ---
178
- @asynccontextmanager
179
- async def lifespan(app: FastAPI):
180
- # --- Code to run ON STARTUP ---
181
- log_progress("Lifespan startup: Setting up database...", "INFO")
182
- try:
183
- if not setup_database():
184
- print("\n" + "="*70)
185
- print("❌ CRITICAL ERROR: Could not set up the database.")
186
- print(f" Please check connection or manually download '{INDEXED_DB_FILENAME}'")
187
- print(f" from '{INDEXED_REPO_ID}' and place it in this directory.")
188
- print("="*70 + "\n")
189
- else:
190
- print(f"βœ… Database is ready at: {DB_PATH}")
191
- except Exception as e:
192
- print(f"❌ CRITICAL ERROR during database setup: {e}")
193
- traceback.print_exc()
194
-
195
- # --- App is now running ---
196
- yield
197
-
198
- # --- Code to run ON SHUTDOWN (optional) ---
199
- log_progress("Lifespan shutdown.", "INFO")
200
-
201
- # --- FIX 3: Pass the lifespan manager to the FastAPI app ---
202
- app = FastAPI(
203
- title="ConceptNet Explorer API",
204
- version="1.0",
205
- description="A versatile API for querying a ConceptNet SQLite database.",
206
- lifespan=lifespan # <-- Here
207
- )
208
-
209
-
210
- @app.get("/api", tags=["API"])
211
- def api_docs():
212
- """API documentation - accessible at /api"""
213
- return {
214
- "name": "ConceptNet Explorer API",
215
- "version": "1.0",
216
- "endpoints": {
217
- "/api/profile/{word}": "Get a full semantic profile for a word.",
218
- "/api/query": "Run a specific query for edges.",
219
- "/api/relations": "List all available relation types.",
220
- "/api/languages": "List all supported languages."
221
- },
222
- "examples": {
223
- "profile": "/api/profile/dog?lang=en&limit=10",
224
- "query": f"/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA&limit=20"
225
- },
226
- "note": "Visit the root path (/) for the Gradio UI."
227
- }
228
 
229
- @app.get("/api/profile/{word}", tags=["API"])
230
- def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
231
- """
232
- API Endpoint: Get a full semantic profile for a word as JSON.
233
- """
 
 
 
 
 
 
 
 
 
 
 
 
234
  if not word or lang not in TARGET_LANGUAGES:
235
- return JSONResponse(
236
- status_code=400,
237
- content={"error": "Invalid input. 'word' is required and 'lang' must be valid."}
238
- )
239
 
240
- word_normalized = word.strip().lower().replace(' ', '_')
241
- like_path = f"{CONCEPTNET_BASE_URI}/c/{lang}/{word_normalized}%"
242
 
243
  result = {
244
- "query": {"word": word, "normalized": word_normalized, "lang": lang, "limit_per_relation": limit},
245
- "nodes_found": [],
 
246
  "relations": {},
247
- "total_edges_found": 0
248
  }
249
 
250
  try:
@@ -252,15 +93,12 @@ def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
252
  cursor = conn.cursor()
253
 
254
  cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
255
- result["nodes_found"] = [{"id": nid, "label": label} for nid, label in cursor.fetchall()]
256
 
257
- if not result["nodes_found"]:
258
- return JSONResponse(
259
- status_code=404,
260
- content={"error": f"Word '{word}' (normalized: '{word_normalized}') not found in language '{lang}'."}
261
- )
262
 
263
- for rel_name, rel_uri in RELATION_CHOICES:
264
  outgoing = []
265
  incoming = []
266
 
@@ -271,9 +109,9 @@ def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
271
  WHERE e.start_id LIKE ? AND e.rel_id = ?
272
  ORDER BY e.weight DESC
273
  LIMIT ?
274
- """, (like_path, rel_uri, limit))
275
 
276
- outgoing = [{"target_label": label, "weight": weight, "target_id": eid}
277
  for label, weight, eid in cursor.fetchall()]
278
 
279
  cursor.execute("""
@@ -283,112 +121,35 @@ def get_semantic_profile_json(word: str, lang: str = 'en', limit: int = 10):
283
  WHERE e.end_id LIKE ? AND e.rel_id = ?
284
  ORDER BY e.weight DESC
285
  LIMIT ?
286
- """, (like_path, rel_uri, limit))
287
 
288
- incoming = [{"source_label": label, "weight": weight, "source_id": sid}
289
  for label, weight, sid in cursor.fetchall()]
290
 
291
  if outgoing or incoming:
292
  result["relations"][rel_name] = {
293
- "uri": rel_uri,
294
  "outgoing": outgoing,
295
  "incoming": incoming,
296
  "count": len(outgoing) + len(incoming)
297
  }
298
- result["total_edges_found"] += len(outgoing) + len(incoming)
299
-
300
- return JSONResponse(content=result)
301
-
302
- except Exception as e:
303
- log_progress(f"API /profile error: {e}", "ERROR")
304
- traceback.print_exc()
305
- return JSONResponse(status_code=500, content={"error": str(e)})
306
-
307
- @app.get("/api/query", tags=["API"])
308
- def query_edges_json(
309
- start_node: Optional[str] = None,
310
- relation_uri: Optional[str] = None,
311
- end_node: Optional[str] = None,
312
- lang: str = 'en',
313
- limit: int = 50
314
- ):
315
- """
316
- API Endpoint: Query edges with flexible filters.
317
- """
318
- query = """
319
- SELECT
320
- e.id as edge_id, s.id as start_id, r.id as relation_id, en.id as end_id,
321
- e.weight, s.label as start_label, r.label as relation_label, en.label as end_label
322
- FROM edge e
323
- JOIN relation r ON e.rel_id = r.id
324
- JOIN node s ON e.start_id = s.id
325
- JOIN node en ON e.end_id = en.id
326
- WHERE 1=1
327
- """
328
- params: List[Any] = []
329
-
330
- try:
331
- def build_node_pattern(node_str: str) -> str:
332
- if node_str.startswith(f'{CONCEPTNET_BASE_URI}/c/'):
333
- return f"{node_str}%"
334
- return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
335
-
336
- with get_db_connection() as conn:
337
- if start_node:
338
- query += " AND s.id LIKE ?"
339
- params.append(build_node_pattern(start_node))
340
 
341
- if relation_uri:
342
- query += " AND r.id = ?"
343
- params.append(relation_uri)
344
-
345
- if end_node:
346
- query += " AND en.id LIKE ?"
347
- params.append(build_node_pattern(end_node))
348
-
349
- query += " ORDER BY e.weight DESC LIMIT ?"
350
- params.append(limit)
351
-
352
- df = pd.read_sql_query(query, conn, params=params)
353
-
354
- return {
355
- "query": {"start_node": start_node, "relation_uri": relation_uri, "end_node": end_node, "lang": lang, "limit": limit},
356
- "results": df.to_dict(orient='records'),
357
- "count": len(df)
358
- }
359
 
360
  except Exception as e:
361
- log_progress(f"API /query error: {e}", "ERROR")
362
- traceback.print_exc()
363
- return JSONResponse(status_code=500, content={"error": str(e)})
364
-
365
- @app.get("/api/relations", tags=["API"])
366
- def api_relations():
367
- """API Endpoint: List all configured relations."""
368
- return JSONResponse(content={"relations": CONCEPTNET_RELATIONS})
369
-
370
- @app.get("/api/languages", tags=["API"])
371
- def api_languages():
372
- """API Endpoint: List all configured languages."""
373
- return JSONResponse(content={"languages": TARGET_LANGUAGES})
374
 
375
- # ============================================================================
376
- # 5. GRADIO UI HELPER FUNCTIONS
377
- # ============================================================================
378
-
379
- def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
380
- """
381
- Gradio UI Function: Get semantic profile formatted as Markdown.
382
- """
383
- log_progress(f"UI Profile: {word} ({lang})", "INFO")
384
 
385
  if not word or lang not in TARGET_LANGUAGES:
386
- return "⚠️ Invalid input. Please provide a word and select a language."
387
 
388
  progress(0, desc="πŸ” Starting...")
389
 
390
- word_normalized = word.strip().lower().replace(' ', '_')
391
- like_path = f"{CONCEPTNET_BASE_URI}/c/{lang}/{word_normalized}%"
392
 
393
  output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
394
 
@@ -397,111 +158,198 @@ def get_semantic_profile_ui(word: str, lang: str, progress=gr.Progress()):
397
  cursor = conn.cursor()
398
 
399
  progress(0.05, desc="πŸ“ Finding nodes...")
 
400
  cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
401
  nodes = cursor.fetchall()
402
 
403
  if not nodes:
404
- return f"# 🧠 '{word}'\n\n⚠️ **Word not found** (as `.../c/{lang}/{word_normalized}...`)"
405
 
406
  log_progress(f"Found {len(nodes)} nodes", "SUCCESS")
407
 
408
- output_md += "**Matching Nodes:**\n"
409
  for node_id, label in nodes[:3]:
410
- output_md += f"- **{label}** (ID: `{node_id}`)\n"
411
- output_md += "\n---\n"
412
 
413
- total_found = 0
414
- num_relations = len(RELATION_CHOICES)
415
 
416
- for i, (rel_name, rel_uri) in enumerate(RELATION_CHOICES):
417
  progress((i + 0.1) / num_relations, desc=f"πŸ”Ž {rel_name}...")
418
 
 
 
 
 
419
  cursor.execute("""
420
  SELECT en.label, e.weight
421
- FROM edge e JOIN node en ON e.end_id = en.id
 
422
  WHERE e.start_id LIKE ? AND e.rel_id = ?
423
- ORDER BY e.weight DESC LIMIT 10
424
- """, (like_path, rel_uri))
425
- outgoing = cursor.fetchall()
 
 
 
 
 
 
 
 
 
 
426
 
427
  cursor.execute("""
428
  SELECT s.label, e.weight
429
- FROM edge e JOIN node s ON e.start_id = s.id
 
430
  WHERE e.end_id LIKE ? AND e.rel_id = ?
431
- ORDER BY e.weight DESC LIMIT 10
432
- """, (like_path, rel_uri))
433
- incoming = cursor.fetchall()
434
-
435
- if outgoing or incoming:
436
- output_md += f"### {rel_name} (`{rel_uri}`)\n\n"
437
- total_found += len(outgoing) + len(incoming)
438
-
439
- for label, weight in outgoing:
440
- output_md += f"- **{word}** β†’ *{label}* `[{weight:.3f}]`\n"
441
-
442
- for label, weight in incoming:
443
- output_md += f"- *{label}* β†’ **{word}** `[{weight:.3f}]`\n"
444
-
445
- output_md += "\n"
446
 
 
 
 
 
 
 
 
 
 
447
  progress((i + 1) / num_relations, desc=f"βœ“ {rel_name}")
448
 
449
  progress(1.0, desc="βœ… Complete!")
450
- output_md += f"---\n**Total relations found:** {total_found}\n"
451
- log_progress(f"Complete: {total_found} relations", "SUCCESS")
 
452
 
453
  return output_md
454
 
455
  except Exception as e:
456
- log_progress(f"UI Profile Error: {e}", "ERROR")
 
457
  traceback.print_exc()
458
- return f"**❌ An unexpected error occurred:**\n\n```\n{e}\n```"
459
 
460
- def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, limit: int, progress=gr.Progress()):
461
- """
462
- Gradio UI Function: Query builder.
 
 
 
 
 
 
 
 
 
 
463
  """
464
- log_progress(f"UI Query: start={start_node}, rel={relation_uri}, end={end_node}, lang={lang}", "INFO")
465
 
466
- progress(0, desc="πŸ” Building query...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
 
468
  query = """
469
  SELECT
470
- s.label as start_label, r.label as relation_label, en.label as end_label,
471
- e.weight, s.id as start_id, r.id as relation_id, en.id as end_id, e.id as edge_id
472
  FROM edge e
473
  JOIN relation r ON e.rel_id = r.id
474
  JOIN node s ON e.start_id = s.id
475
  JOIN node en ON e.end_id = en.id
476
  WHERE 1=1
477
  """
478
- params: List[Any] = []
 
479
 
480
  try:
481
- def build_node_pattern(node_str: str) -> str:
482
- if node_str.strip().startswith(f'{CONCEPTNET_BASE_URI}/c/'):
483
- return f"{node_str.strip()}%"
484
- return f"{CONCEPTNET_BASE_URI}/c/{lang}/{node_str.strip().lower().replace(' ', '_')}%"
485
-
486
  with get_db_connection() as conn:
487
- progress(0.3, desc="πŸ“ Applying filters...")
488
 
489
  if start_node and start_node.strip():
 
 
 
 
490
  query += " AND s.id LIKE ?"
491
- params.append(build_node_pattern(start_node))
492
-
493
- if relation_uri and relation_uri.strip():
 
 
 
 
 
 
494
  query += " AND r.id = ?"
495
- params.append(relation_uri)
496
 
497
  if end_node and end_node.strip():
 
 
 
 
498
  query += " AND en.id LIKE ?"
499
- params.append(build_node_pattern(end_node))
500
 
501
  query += " ORDER BY e.weight DESC LIMIT ?"
502
  params.append(limit)
503
 
504
- progress(0.6, desc="⚑ Running query...")
505
 
506
  start_time = time.time()
507
  df = pd.read_sql_query(query, conn, params=params)
@@ -509,30 +357,23 @@ def run_query_ui(start_node: str, relation_uri: str, end_node: str, lang: str, l
509
 
510
  progress(1.0, desc="βœ… Done!")
511
 
512
- log_progress(f"Query Done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS")
513
 
514
  if df.empty:
515
- return pd.DataFrame(), f"⚠️ No results found ({elapsed:.2f}s)"
516
-
517
- cols_to_show = [
518
- 'start_label', 'relation_label', 'end_label', 'weight',
519
- 'start_id', 'relation_id', 'end_id'
520
- ]
521
- df = df[cols_to_show]
522
 
 
523
  return df, f"βœ… {len(df)} results in {elapsed:.2f}s"
524
 
525
  except Exception as e:
526
- log_progress(f"UI Query Error: {e}", "ERROR")
 
527
  traceback.print_exc()
528
- return pd.DataFrame(), f"❌ **Error:**\n\n```\n{e}\n```"
529
 
530
- def run_raw_query_ui(sql_query: str):
531
- """
532
- Gradio UI Function: Raw SQL query.
533
- """
534
  if not sql_query.strip().upper().startswith("SELECT"):
535
- return pd.DataFrame(), "❌ **Error:** Only `SELECT` statements are allowed."
536
  try:
537
  with get_db_connection() as conn:
538
  start = time.time()
@@ -540,222 +381,161 @@ def run_raw_query_ui(sql_query: str):
540
  elapsed = time.time() - start
541
  return df, f"βœ… {len(df)} rows in {elapsed:.3f}s"
542
  except Exception as e:
543
- return pd.DataFrame(), f"❌ **Error:**\n\n```\n{e}\n```"
544
 
545
- def get_schema_info_ui():
546
- """
547
- Gradio UI Function: Display schema information.
548
- """
549
- md = f"# πŸ“š Database Schema\n\n"
550
- md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n"
551
 
552
- if DB_PATH:
553
- md += f"**Database File:** `{DB_PATH}`\n\n"
554
- else:
555
- md += "**Database File:** `NOT LOADED`\n\n"
556
- md += "⚠️ **Database not loaded.** Schema info may be incomplete. Check logs.\n"
557
- return md
558
-
559
  try:
560
  with get_db_connection() as conn:
561
  cursor = conn.cursor()
562
 
563
- md += "## Tables & Row Counts\n\n"
564
- cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
565
- tables = [row[0] for row in cursor.fetchall()]
 
566
 
567
- for table in tables:
 
 
568
  cursor.execute(f"SELECT COUNT(*) FROM {table}")
569
- count = cursor.fetchone()[0]
570
- md += f"- **{table}:** {count:,} rows\n"
571
-
572
- md += "\n## Configured Relations\n\n"
573
- md += "This list populates the 'Query Builder' dropdown. The values are the full URLs used in the DB.\n\n"
574
- for name, uri in RELATION_CHOICES:
575
- md += f"- **{name}:** `{uri}`\n"
576
 
577
  except Exception as e:
578
- md += f"\n**❌ Error loading schema:**\n\n```\n{e}\n```\n"
579
 
580
  return md
581
 
582
- # ============================================================================
583
- # 6. GRADIO UI DEFINITION
584
- # ============================================================================
585
 
586
- def create_gradio_ui():
587
- """Builds the consolidated Gradio interface."""
588
-
589
- with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft(primary_hue="blue")) as demo:
590
- gr.Markdown(
591
- "# 🧠 ConceptNet Explorer\n"
592
- "An interface for querying the ConceptNet semantic network."
593
- )
594
- gr.Markdown(
595
- f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | "
596
- f"**Database:** `{INDEXED_REPO_ID}` | "
597
- f"**JSON API:** Access at `/api`"
598
- )
599
-
600
- with gr.Tabs():
601
- with gr.TabItem("πŸ” Semantic Profile"):
602
- gr.Markdown("Explore all semantic relations for a single word. This queries all 34 relation types.")
603
-
604
- with gr.Row():
605
- profile_word_input = gr.Textbox(
606
- label="Word",
607
- placeholder="e.g., dog",
608
- value="dog",
609
- info="Enter a word"
610
- )
611
- profile_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language")
612
-
613
- profile_btn = gr.Button("πŸ” Get Semantic Profile", variant="primary", size="lg")
614
- profile_output = gr.Markdown(label="Profile Results")
615
-
616
- gr.Examples(
617
- examples=[["dog", "en"], ["Hund", "de"], ["perro", "es"], ["chien", "fr"], ["gatto", "it"]],
618
- inputs=[profile_word_input, profile_lang_input]
619
- )
620
-
621
- with gr.TabItem("⚑ Query Builder"):
622
- gr.Markdown("Construct a specific query using nodes and a relation.")
623
-
624
- with gr.Row():
625
- query_lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Node Language")
626
- query_limit_slider = gr.Slider(label="Result Limit", minimum=1, maximum=500, value=50, step=10)
627
 
628
- with gr.Row():
629
- query_start_input = gr.Textbox(
630
- label="Start Node",
631
- placeholder="e.g., dog (word) or /c/en/dog (URI)"
632
- )
633
-
634
- query_rel_input = gr.Dropdown(
635
- label="Relation",
636
- choices=RELATION_CHOICES,
637
- value=f"{CONCEPTNET_BASE_URI}/r/IsA"
638
- )
639
-
640
- query_end_input = gr.Textbox(
641
- label="End Node",
642
- placeholder="e.g., animal (word) or /c/en/animal (URI)"
643
- )
644
-
645
- query_btn = gr.Button("▢️ Run Query", variant="primary", size="lg")
646
-
647
- query_status_output = gr.Markdown()
648
- query_results_output = gr.DataFrame(
649
- label="Query Results",
650
- wrap=True,
651
- interactive=False
652
- )
653
-
654
- with gr.TabItem("πŸ’» Raw SQL"):
655
- gr.Markdown(
656
- "**Warning:** Directly query the SQLite database. Only `SELECT` statements are allowed. "
657
- "Use the 'Schema' tab to see table names."
658
- )
659
- raw_sql_input = gr.Textbox(
660
- label="SQL Query",
661
- value=f"SELECT s.label, r.label, en.label, e.weight\nFROM edge e\nJOIN relation r ON e.rel_id = r.id\nJOIN node s ON e.start_id = s.id\nJOIN node en ON e.end_id = en.id\nWHERE s.id LIKE '{CONCEPTNET_BASE_URI}/c/en/dog%'\n AND r.id = '{CONCEPTNET_BASE_URI}/r/IsA'\nORDER BY e.weight DESC\nLIMIT 10",
662
- lines=5,
663
- max_lines=20
664
- )
665
- raw_btn = gr.Button("▢️ Execute SQL")
666
- raw_status = gr.Markdown()
667
- raw_results = gr.DataFrame(label="SQL Results", interactive=False)
668
-
669
- with gr.TabItem("πŸ“Š Schema") as schema_tab:
670
- gr.Markdown("View the database schema and table counts.")
671
- schema_btn = gr.Button("πŸ“Š Load Schema Info")
672
- schema_output = gr.Markdown()
673
-
674
- with gr.TabItem("πŸ”Œ API Docs"):
675
- gr.Markdown(
676
- "## JSON API Endpoints\n"
677
- "This Gradio app is mounted on a FastAPI server. You can use the following JSON API endpoints directly."
678
- )
679
- gr.JSON({
680
- "docs": "/api",
681
- "profile": "/api/profile/{word}?lang=en&limit=10",
682
- "query": f"/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA&lang=en&limit=50",
683
- "relations": "/api/relations",
684
- "languages": "/api/languages"
685
- }, label="API Endpoints")
686
- gr.Markdown(
687
- "### Example (cURL)\n"
688
- "```bash\n# (Assumes app is running at localhost:7860)\ncurl http://localhost:7860/api/profile/dog?lang=en\n```\n"
689
- f"```bash\ncurl 'http://localhost:7860/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA&limit=10'\n```"
690
- )
691
 
692
- # --- Link UI components to functions ---
693
- profile_btn.click(
694
- fn=get_semantic_profile_ui,
695
- inputs=[profile_word_input, profile_lang_input],
696
- outputs=[profile_output],
697
- api_name="get_semantic_profile"
698
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699
 
700
- query_btn.click(
701
- fn=run_query_ui,
702
- inputs=[query_start_input, query_rel_input, query_end_input, query_lang_input, query_limit_slider],
703
- outputs=[query_results_output, query_status_output],
704
- api_name="run_query"
705
- )
 
 
 
 
 
706
 
707
- raw_btn.click(
708
- fn=run_raw_query_ui,
709
- inputs=[raw_sql_input],
710
- outputs=[raw_results, raw_status]
711
- )
 
 
 
 
712
 
713
- schema_tab.select(
714
- fn=get_schema_info_ui,
715
- inputs=None,
716
- outputs=[schema_output]
717
- )
718
- schema_btn.click(
719
- fn=get_schema_info_ui,
720
- inputs=None,
721
- outputs=[schema_output]
722
- )
723
 
724
- return demo
725
-
726
- # ============================================================================
727
- # 7. APP MOUNTING & LAUNCH
728
- # ============================================================================
729
-
730
- # --- FIX 4: REMOVE the top-level call to setup_database() ---
731
- # This is now handled by the FastAPI lifespan event
732
-
733
- log_progress("Creating Gradio UI...", "INFO")
734
- demo = create_gradio_ui()
 
 
 
 
 
 
 
 
 
735
 
736
- log_progress("Mounting Gradio UI onto FastAPI app...", "INFO")
737
  app = gr.mount_gradio_app(app, demo, path="/")
738
 
739
-
740
- # ============================================================================
741
- # 8. MAIN EXECUTION BLOCK
742
- # ============================================================================
743
-
744
- # This block is for LOCAL execution (e.g., `python app.py`)
745
- # The Hugging Face platform will IGNORE this and run `app` using Gunicorn
746
  if __name__ == "__main__":
747
  log_progress("="*60, "SUCCESS")
748
- log_progress("πŸš€ (LOCAL) CONCEPTNET EXPLORER APP READY!", "SUCCESS")
749
  log_progress("="*60, "SUCCESS")
750
  log_progress("UI: http://localhost:7860/", "INFO")
751
  log_progress("API: http://localhost:7860/api", "INFO")
752
- log_progress(f" http://localhost:7860/api/profile/dog", "INFO")
753
- log_progress(f" http://localhost:7860/api/query?start_node=dog&relation_uri={CONCEPTNET_BASE_URI}/r/IsA", "INFO")
754
  log_progress("="*60, "SUCCESS")
755
 
756
- uvicorn.run(
757
- "app:app", # Run the 'app' object from the 'app' file
758
- host="0.0.0.0",
759
- port=7860,
760
- reload=True # Enable reload for local dev
761
- )
 
 
 
 
1
  import gradio as gr
2
  import sqlite3
3
  import pandas as pd
4
  from huggingface_hub import hf_hub_download, HfApi
5
+ from fastapi import FastAPI
6
  from fastapi.responses import JSONResponse
7
  import os
8
  import time
9
  import json
10
+ from typing import Optional
 
 
 
 
11
 
12
+ # ===== CONFIGURATION =====
 
 
 
 
13
  TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
 
 
 
14
  INDEXED_REPO_ID = "cstr/conceptnet-de-indexed"
15
  INDEXED_DB_FILENAME = "conceptnet-de-indexed.db"
16
  PROGRESS_FILENAME = "indexing_progress.json"
17
+ CONCEPTNET_BASE = "http://conceptnet.io"
18
+ # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ print(f"🌍 Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
 
21
 
22
+ HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
 
 
23
 
24
  def log_progress(message, level="INFO"):
 
25
  timestamp = time.strftime("%H:%M:%S")
26
  prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
27
  print(f"[{timestamp}] {prefix} {message}")
28
 
29
  def check_remote_progress():
 
30
  if not HF_TOKEN:
31
+ return {"indexing_complete": False}
 
 
 
 
 
 
 
 
 
 
 
32
  try:
33
  api = HfApi()
34
  api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN)
35
+ progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN)
 
 
 
 
 
36
  with open(progress_path, 'r') as f:
37
  return json.load(f)
38
+ except:
 
39
  return {"indexing_complete": False}
40
 
41
+ def create_indexed_database():
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  progress = check_remote_progress()
43
  if progress.get("indexing_complete"):
44
  try:
45
+ path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
46
+ log_progress("Indexed DB loaded", "SUCCESS")
 
 
 
 
 
 
 
47
  return path
48
+ except:
49
+ pass
50
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ DB_PATH = create_indexed_database()
 
 
53
 
54
+ def get_db_connection():
55
+ conn = sqlite3.connect(DB_PATH, check_same_thread=False)
56
+ conn.execute("PRAGMA cache_size = -256000")
57
+ conn.execute("PRAGMA mmap_size = 4294967296")
58
+ return conn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ RELATIONS = [
61
+ ("IsA", f"{CONCEPTNET_BASE}/r/IsA"),
62
+ ("PartOf", f"{CONCEPTNET_BASE}/r/PartOf"),
63
+ ("HasA", f"{CONCEPTNET_BASE}/r/HasA"),
64
+ ("UsedFor", f"{CONCEPTNET_BASE}/r/UsedFor"),
65
+ ("CapableOf", f"{CONCEPTNET_BASE}/r/CapableOf"),
66
+ ("Causes", f"{CONCEPTNET_BASE}/r/Causes"),
67
+ ("HasProperty", f"{CONCEPTNET_BASE}/r/HasProperty"),
68
+ ("Synonym", f"{CONCEPTNET_BASE}/r/Synonym"),
69
+ ("Antonym", f"{CONCEPTNET_BASE}/r/Antonym"),
70
+ ("AtLocation", f"{CONCEPTNET_BASE}/r/AtLocation"),
71
+ ("RelatedTo", f"{CONCEPTNET_BASE}/r/RelatedTo"),
72
+ ("DerivedFrom", f"{CONCEPTNET_BASE}/r/DerivedFrom"),
73
+ ]
74
+
75
+ def get_semantic_profile_json(word: str, lang: str = 'en', max_per_relation: int = 10):
76
+ """Get semantic profile as JSON"""
77
  if not word or lang not in TARGET_LANGUAGES:
78
+ return {"error": "Invalid input"}
 
 
 
79
 
80
+ word = word.strip().lower().replace(' ', '_')
81
+ like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
82
 
83
  result = {
84
+ "word": word,
85
+ "language": lang,
86
+ "nodes": [],
87
  "relations": {},
88
+ "total_edges": 0
89
  }
90
 
91
  try:
 
93
  cursor = conn.cursor()
94
 
95
  cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
96
+ result["nodes"] = [{"id": nid, "label": label} for nid, label in cursor.fetchall()]
97
 
98
+ if not result["nodes"]:
99
+ return {"error": "Word not found"}
 
 
 
100
 
101
+ for rel_name, rel_url in RELATIONS:
102
  outgoing = []
103
  incoming = []
104
 
 
109
  WHERE e.start_id LIKE ? AND e.rel_id = ?
110
  ORDER BY e.weight DESC
111
  LIMIT ?
112
+ """, (like_path, rel_url, max_per_relation))
113
 
114
+ outgoing = [{"target": label, "weight": weight, "target_id": eid}
115
  for label, weight, eid in cursor.fetchall()]
116
 
117
  cursor.execute("""
 
121
  WHERE e.end_id LIKE ? AND e.rel_id = ?
122
  ORDER BY e.weight DESC
123
  LIMIT ?
124
+ """, (like_path, rel_url, max_per_relation))
125
 
126
+ incoming = [{"source": label, "weight": weight, "source_id": sid}
127
  for label, weight, sid in cursor.fetchall()]
128
 
129
  if outgoing or incoming:
130
  result["relations"][rel_name] = {
 
131
  "outgoing": outgoing,
132
  "incoming": incoming,
133
  "count": len(outgoing) + len(incoming)
134
  }
135
+ result["total_edges"] += len(outgoing) + len(incoming)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  except Exception as e:
140
+ return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ def get_semantic_profile(word, lang='en', progress=gr.Progress()):
143
+ """Get semantic profile with progress"""
144
+ log_progress(f"Profile: {word} ({lang})", "INFO")
 
 
 
 
 
 
145
 
146
  if not word or lang not in TARGET_LANGUAGES:
147
+ return "⚠️ Invalid input"
148
 
149
  progress(0, desc="πŸ” Starting...")
150
 
151
+ word = word.strip().lower().replace(' ', '_')
152
+ like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
153
 
154
  output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
155
 
 
158
  cursor = conn.cursor()
159
 
160
  progress(0.05, desc="πŸ“ Finding nodes...")
161
+
162
  cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
163
  nodes = cursor.fetchall()
164
 
165
  if not nodes:
166
+ return f"# 🧠 '{word}'\n\n⚠️ Not found"
167
 
168
  log_progress(f"Found {len(nodes)} nodes", "SUCCESS")
169
 
 
170
  for node_id, label in nodes[:3]:
171
+ output_md += f"**Node:** `{node_id}` β†’ **{label}**\n"
172
+ output_md += "\n"
173
 
174
+ total = 0
175
+ num_relations = len(RELATIONS)
176
 
177
+ for i, (rel_name, rel_url) in enumerate(RELATIONS):
178
  progress((i + 0.1) / num_relations, desc=f"πŸ”Ž {rel_name}...")
179
 
180
+ output_md += f"## {rel_name}\n\n"
181
+ found = False
182
+
183
+ start_time = time.time()
184
  cursor.execute("""
185
  SELECT en.label, e.weight
186
+ FROM edge e
187
+ JOIN node en ON e.end_id = en.id
188
  WHERE e.start_id LIKE ? AND e.rel_id = ?
189
+ ORDER BY e.weight DESC
190
+ LIMIT 10
191
+ """, (like_path, rel_url))
192
+
193
+ results = cursor.fetchall()
194
+ elapsed = time.time() - start_time
195
+
196
+ log_progress(f" {rel_name} out: {len(results)} in {elapsed:.3f}s", "DEBUG")
197
+
198
+ for label, weight in results:
199
+ output_md += f"- **{word}** {rel_name} β†’ *{label}* `[{weight:.3f}]`\n"
200
+ found = True
201
+ total += 1
202
 
203
  cursor.execute("""
204
  SELECT s.label, e.weight
205
+ FROM edge e
206
+ JOIN node s ON e.start_id = s.id
207
  WHERE e.end_id LIKE ? AND e.rel_id = ?
208
+ ORDER BY e.weight DESC
209
+ LIMIT 10
210
+ """, (like_path, rel_url))
211
+
212
+ results = cursor.fetchall()
 
 
 
 
 
 
 
 
 
 
213
 
214
+ for label, weight in results:
215
+ output_md += f"- *{label}* {rel_name} β†’ **{word}** `[{weight:.3f}]`\n"
216
+ found = True
217
+ total += 1
218
+
219
+ if not found:
220
+ output_md += "*No results*\n"
221
+
222
+ output_md += "\n"
223
  progress((i + 1) / num_relations, desc=f"βœ“ {rel_name}")
224
 
225
  progress(1.0, desc="βœ… Complete!")
226
+
227
+ output_md += f"---\n**Total relations:** {total}\n"
228
+ log_progress(f"Complete: {total} relations", "SUCCESS")
229
 
230
  return output_md
231
 
232
  except Exception as e:
233
+ log_progress(f"Error: {e}", "ERROR")
234
+ import traceback
235
  traceback.print_exc()
236
+ return f"**❌ Error:** {e}"
237
 
238
+ def query_edges_json(start_node: Optional[str] = None,
239
+ relation: Optional[str] = None,
240
+ end_node: Optional[str] = None,
241
+ limit: int = 50):
242
+ """Query edges JSON"""
243
+ query = """
244
+ SELECT
245
+ e.id, s.id, r.label, en.id, e.weight, s.label, en.label
246
+ FROM edge e
247
+ JOIN relation r ON e.rel_id = r.id
248
+ JOIN node s ON e.start_id = s.id
249
+ JOIN node en ON e.end_id = en.id
250
+ WHERE 1=1
251
  """
 
252
 
253
+ params = []
254
+
255
+ try:
256
+ with get_db_connection() as conn:
257
+ if start_node:
258
+ if start_node.startswith('http://'):
259
+ pattern = f"{start_node}%"
260
+ else:
261
+ pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
262
+ query += " AND s.id LIKE ?"
263
+ params.append(pattern)
264
+
265
+ if relation:
266
+ if relation.startswith('http://'):
267
+ rel_value = relation
268
+ elif relation.startswith('/r/'):
269
+ rel_value = f"{CONCEPTNET_BASE}{relation}"
270
+ else:
271
+ rel_value = f"{CONCEPTNET_BASE}/r/{relation}"
272
+ query += " AND r.id = ?"
273
+ params.append(rel_value)
274
+
275
+ if end_node:
276
+ if end_node.startswith('http://'):
277
+ pattern = f"{end_node}%"
278
+ else:
279
+ pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
280
+ query += " AND en.id LIKE ?"
281
+ params.append(pattern)
282
+
283
+ query += " ORDER BY e.weight DESC LIMIT ?"
284
+ params.append(limit)
285
+
286
+ df = pd.read_sql_query(query, conn, params=params)
287
+
288
+ if df.empty:
289
+ return {"results": [], "count": 0}
290
+
291
+ df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label']
292
+
293
+ return {
294
+ "results": df.to_dict(orient='records'),
295
+ "count": len(df)
296
+ }
297
+
298
+ except Exception as e:
299
+ return {"error": str(e)}
300
+
301
+ def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
302
+ """Query builder"""
303
+ log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
304
+
305
+ progress(0, desc="πŸ” Building...")
306
 
307
  query = """
308
  SELECT
309
+ e.id, s.id, r.label, en.id, e.weight, s.label, en.label
 
310
  FROM edge e
311
  JOIN relation r ON e.rel_id = r.id
312
  JOIN node s ON e.start_id = s.id
313
  JOIN node en ON e.end_id = en.id
314
  WHERE 1=1
315
  """
316
+
317
+ params = []
318
 
319
  try:
 
 
 
 
 
320
  with get_db_connection() as conn:
321
+ progress(0.3, desc="πŸ“ Filters...")
322
 
323
  if start_node and start_node.strip():
324
+ if start_node.startswith('http://'):
325
+ pattern = f"{start_node}%"
326
+ else:
327
+ pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
328
  query += " AND s.id LIKE ?"
329
+ params.append(pattern)
330
+
331
+ if relation and relation.strip():
332
+ if relation.startswith('http://'):
333
+ rel_value = relation
334
+ elif relation.startswith('/r/'):
335
+ rel_value = f"{CONCEPTNET_BASE}{relation}"
336
+ else:
337
+ rel_value = f"{CONCEPTNET_BASE}/r/{relation}"
338
  query += " AND r.id = ?"
339
+ params.append(rel_value)
340
 
341
  if end_node and end_node.strip():
342
+ if end_node.startswith('http://'):
343
+ pattern = f"{end_node}%"
344
+ else:
345
+ pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
346
  query += " AND en.id LIKE ?"
347
+ params.append(pattern)
348
 
349
  query += " ORDER BY e.weight DESC LIMIT ?"
350
  params.append(limit)
351
 
352
+ progress(0.6, desc="⚑ Running...")
353
 
354
  start_time = time.time()
355
  df = pd.read_sql_query(query, conn, params=params)
 
357
 
358
  progress(1.0, desc="βœ… Done!")
359
 
360
+ log_progress(f"Done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS")
361
 
362
  if df.empty:
363
+ return pd.DataFrame(), f"⚠️ No results ({elapsed:.2f}s)"
 
 
 
 
 
 
364
 
365
+ df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label']
366
  return df, f"βœ… {len(df)} results in {elapsed:.2f}s"
367
 
368
  except Exception as e:
369
+ log_progress(f"Error: {e}", "ERROR")
370
+ import traceback
371
  traceback.print_exc()
372
+ return pd.DataFrame(), f"❌ {e}"
373
 
374
+ def run_raw_query(sql_query):
 
 
 
375
  if not sql_query.strip().upper().startswith("SELECT"):
376
+ return pd.DataFrame(), "❌ Only SELECT"
377
  try:
378
  with get_db_connection() as conn:
379
  start = time.time()
 
381
  elapsed = time.time() - start
382
  return df, f"βœ… {len(df)} rows in {elapsed:.3f}s"
383
  except Exception as e:
384
+ return pd.DataFrame(), f"❌ {e}"
385
 
386
+ def get_schema_info():
387
+ md = f"# πŸ“š Schema\n\n"
388
+ md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n"
 
 
 
389
 
 
 
 
 
 
 
 
390
  try:
391
  with get_db_connection() as conn:
392
  cursor = conn.cursor()
393
 
394
+ md += "## Relations\n\n"
395
+ cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20")
396
+ for rel_id, label in cursor.fetchall():
397
+ md += f"- **{label}:** `{rel_id}`\n"
398
 
399
+ md += "\n## Tables\n\n"
400
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
401
+ for table, in cursor.fetchall():
402
  cursor.execute(f"SELECT COUNT(*) FROM {table}")
403
+ md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
 
 
 
 
 
 
404
 
405
  except Exception as e:
406
+ md += f"\nError: {e}\n"
407
 
408
  return md
409
 
410
+ # ===== FASTAPI - Create app FIRST =====
411
+ app = FastAPI(title="ConceptNet API", version="1.0")
 
412
 
413
+ @app.get("/api")
414
+ def api_docs():
415
+ """API documentation - accessible at /api"""
416
+ return {
417
+ "name": "ConceptNet API",
418
+ "version": "1.0",
419
+ "endpoints": {
420
+ "/api/profile/{word}": "Semantic profile (params: lang, limit)",
421
+ "/api/query": "Query edges (params: start, relation, end, limit)",
422
+ "/api/relations": "List relations",
423
+ "/api/languages": "List languages"
424
+ },
425
+ "examples": {
426
+ "profile": "/api/profile/dog?lang=en&limit=10",
427
+ "query": "/api/query?start=dog&relation=IsA&limit=20"
428
+ },
429
+ "note": "Visit root (/) for the Gradio UI"
430
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
 
432
+ @app.get("/api/profile/{word}")
433
+ def api_profile(word: str, lang: str = "en", limit: int = 10):
434
+ """Get semantic profile"""
435
+ return JSONResponse(get_semantic_profile_json(word, lang, limit))
436
+
437
+ @app.get("/api/query")
438
+ def api_query(start: Optional[str] = None,
439
+ relation: Optional[str] = None,
440
+ end: Optional[str] = None,
441
+ limit: int = 50):
442
+ """Query edges"""
443
+ return JSONResponse(query_edges_json(start, relation, end, limit))
444
+
445
+ @app.get("/api/relations")
446
+ def api_relations():
447
+ """List relations"""
448
+ return JSONResponse({"relations": [{"name": name, "url": url} for name, url in RELATIONS]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
 
450
+ @app.get("/api/languages")
451
+ def api_languages():
452
+ """List languages"""
453
+ return JSONResponse({"languages": TARGET_LANGUAGES})
454
+
455
+ # ===== GRADIO UI =====
456
+ with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
457
+ gr.Markdown("# 🧠 ConceptNet Explorer")
458
+ gr.Markdown(
459
+ f"**Multi-language semantic network** | "
460
+ f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | "
461
+ f"**API:** `/api/profile/{{word}}` `/api/query`"
462
+ )
463
+
464
+ with gr.Tabs():
465
+ with gr.TabItem("πŸ” Semantic Profile"):
466
+ gr.Markdown("**Explore semantic relations for any word**")
467
+
468
+ with gr.Row():
469
+ word_input = gr.Textbox(label="Word", placeholder="dog", value="dog")
470
+ lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language")
471
+
472
+ semantic_btn = gr.Button("πŸ” Get Semantic Profile", variant="primary", size="lg")
473
+ semantic_output = gr.Markdown()
474
+
475
+ gr.Examples(
476
+ examples=[["dog", "en"], ["hund", "de"], ["perro", "es"], ["chien", "fr"]],
477
+ inputs=[word_input, lang_input]
478
+ )
479
 
480
+ with gr.TabItem("⚑ Query Builder"):
481
+ with gr.Row():
482
+ start_input = gr.Textbox(label="Start", placeholder="dog")
483
+ rel_input = gr.Textbox(label="Relation", placeholder="IsA", value="IsA")
484
+ end_input = gr.Textbox(label="End", placeholder="")
485
+
486
+ limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=50)
487
+ query_btn = gr.Button("▢️ Run Query", variant="primary", size="lg")
488
+
489
+ status_output = gr.Markdown()
490
+ results_output = gr.DataFrame(wrap=True)
491
 
492
+ with gr.TabItem("πŸ’» Raw SQL"):
493
+ raw_sql_input = gr.Textbox(
494
+ label="SQL",
495
+ value=f"SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id = '{CONCEPTNET_BASE}/c/en/dog' LIMIT 10",
496
+ lines=3
497
+ )
498
+ raw_btn = gr.Button("▢️ Execute")
499
+ raw_status = gr.Markdown()
500
+ raw_results = gr.DataFrame()
501
 
502
+ with gr.TabItem("πŸ“Š Schema"):
503
+ schema_btn = gr.Button("πŸ“Š Load Schema")
504
+ schema_output = gr.Markdown()
 
 
 
 
 
 
 
505
 
506
+ with gr.TabItem("πŸ”Œ API"):
507
+ gr.Markdown("## JSON API Endpoints\n")
508
+ gr.Markdown("### API Documentation\n```\nGET /api\n```")
509
+ gr.Markdown("### Get Semantic Profile\n```\nGET /api/profile/{word}?lang=en&limit=10\n```")
510
+ gr.Markdown("### Query Edges\n```\nGET /api/query?start=dog&relation=IsA&limit=50\n```")
511
+ gr.Markdown("### List Relations\n```\nGET /api/relations\n```")
512
+ gr.Markdown("### Examples\n")
513
+ gr.Markdown("```\ncurl https://your-space.hf.space/api/profile/dog?lang=en\n```")
514
+ gr.Markdown("```\ncurl 'https://your-space.hf.space/api/query?start=dog&relation=IsA&limit=10'\n```")
515
+
516
+ gr.Markdown(
517
+ "---\n"
518
+ "**Performance:** Exact match on rel_id for fast queries | "
519
+ "**API:** Full REST API at `/api/*` endpoints"
520
+ )
521
+
522
+ semantic_btn.click(get_semantic_profile, [word_input, lang_input], semantic_output)
523
+ query_btn.click(run_query, [start_input, rel_input, end_input, limit_slider], [results_output, status_output])
524
+ raw_btn.click(run_raw_query, raw_sql_input, [raw_results, raw_status])
525
+ schema_btn.click(get_schema_info, None, schema_output)
526
 
527
+ # ===== MOUNT GRADIO TO FASTAPI - Gradio at root, API at /api/* =====
528
  app = gr.mount_gradio_app(app, demo, path="/")
529
 
 
 
 
 
 
 
 
530
  if __name__ == "__main__":
531
  log_progress("="*60, "SUCCESS")
532
+ log_progress("πŸš€ APP READY!", "SUCCESS")
533
  log_progress("="*60, "SUCCESS")
534
  log_progress("UI: http://localhost:7860/", "INFO")
535
  log_progress("API: http://localhost:7860/api", "INFO")
536
+ log_progress(" http://localhost:7860/api/profile/dog", "INFO")
537
+ log_progress(" http://localhost:7860/api/query?start=dog&relation=IsA", "INFO")
538
  log_progress("="*60, "SUCCESS")
539
 
540
+ import uvicorn
541
+ uvicorn.run(app, host="0.0.0.0", port=7860)