conceptnet_db / app.py
cstr's picture
Update app.py
61a2be6 verified
raw
history blame
20.8 kB
import gradio as gr
import sqlite3
import pandas as pd
from huggingface_hub import hf_hub_download, HfApi
from fastapi import FastAPI
from fastapi.responses import JSONResponse
import os
import time
import json
from typing import Optional
# ===== CONFIGURATION =====
TARGET_LANGUAGES = ['de', 'en', 'es', 'fr', 'it', 'ja', 'nl', 'pl', 'pt', 'ru', 'zh']
INDEXED_REPO_ID = "cstr/conceptnet-de-indexed"
INDEXED_DB_FILENAME = "conceptnet-de-indexed.db"
PROGRESS_FILENAME = "indexing_progress.json"
CONCEPTNET_BASE = "http://conceptnet.io"
# =========================
print(f"🌍 Languages: {', '.join([l.upper() for l in TARGET_LANGUAGES])}")
HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_API_TOKEN")
def log_progress(message, level="INFO"):
timestamp = time.strftime("%H:%M:%S")
prefix = {"INFO": "ℹ️ ", "SUCCESS": "βœ…", "ERROR": "❌", "WARN": "⚠️ ", "DEBUG": "πŸ”"}.get(level, "")
print(f"[{timestamp}] {prefix} {message}")
def check_remote_progress():
if not HF_TOKEN:
return {"indexing_complete": False}
try:
api = HfApi()
api.repo_info(repo_id=INDEXED_REPO_ID, repo_type="dataset", token=HF_TOKEN)
progress_path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=PROGRESS_FILENAME, repo_type="dataset", token=HF_TOKEN)
with open(progress_path, 'r') as f:
return json.load(f)
except:
return {"indexing_complete": False}
def create_indexed_database():
progress = check_remote_progress()
if progress.get("indexing_complete"):
try:
path = hf_hub_download(repo_id=INDEXED_REPO_ID, filename=INDEXED_DB_FILENAME, repo_type="dataset", token=HF_TOKEN)
log_progress("Indexed DB loaded", "SUCCESS")
return path
except:
pass
return None
DB_PATH = create_indexed_database()
def get_db_connection():
conn = sqlite3.connect(DB_PATH, check_same_thread=False)
conn.execute("PRAGMA cache_size = -256000")
conn.execute("PRAGMA mmap_size = 4294967296")
return conn
RELATIONS = [
("IsA", f"{CONCEPTNET_BASE}/r/IsA"),
("PartOf", f"{CONCEPTNET_BASE}/r/PartOf"),
("HasA", f"{CONCEPTNET_BASE}/r/HasA"),
("UsedFor", f"{CONCEPTNET_BASE}/r/UsedFor"),
("CapableOf", f"{CONCEPTNET_BASE}/r/CapableOf"),
("Causes", f"{CONCEPTNET_BASE}/r/Causes"),
("HasProperty", f"{CONCEPTNET_BASE}/r/HasProperty"),
("Synonym", f"{CONCEPTNET_BASE}/r/Synonym"),
("Antonym", f"{CONCEPTNET_BASE}/r/Antonym"),
("AtLocation", f"{CONCEPTNET_BASE}/r/AtLocation"),
("RelatedTo", f"{CONCEPTNET_BASE}/r/RelatedTo"),
("DerivedFrom", f"{CONCEPTNET_BASE}/r/DerivedFrom"),
]
def get_semantic_profile_json(word: str, lang: str = 'en', max_per_relation: int = 10):
"""Get semantic profile as JSON"""
if not word or lang not in TARGET_LANGUAGES:
return {"error": "Invalid input"}
word = word.strip().lower().replace(' ', '_')
like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
result = {
"word": word,
"language": lang,
"nodes": [],
"relations": {},
"total_edges": 0
}
try:
with get_db_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
result["nodes"] = [{"id": nid, "label": label} for nid, label in cursor.fetchall()]
if not result["nodes"]:
return {"error": "Word not found"}
for rel_name, rel_url in RELATIONS:
outgoing = []
incoming = []
cursor.execute("""
SELECT en.label, e.weight, en.id
FROM edge e
JOIN node en ON e.end_id = en.id
WHERE e.start_id LIKE ? AND e.rel_id = ?
ORDER BY e.weight DESC
LIMIT ?
""", (like_path, rel_url, max_per_relation))
outgoing = [{"target": label, "weight": weight, "target_id": eid}
for label, weight, eid in cursor.fetchall()]
cursor.execute("""
SELECT s.label, e.weight, s.id
FROM edge e
JOIN node s ON e.start_id = s.id
WHERE e.end_id LIKE ? AND e.rel_id = ?
ORDER BY e.weight DESC
LIMIT ?
""", (like_path, rel_url, max_per_relation))
incoming = [{"source": label, "weight": weight, "source_id": sid}
for label, weight, sid in cursor.fetchall()]
if outgoing or incoming:
result["relations"][rel_name] = {
"outgoing": outgoing,
"incoming": incoming,
"count": len(outgoing) + len(incoming)
}
result["total_edges"] += len(outgoing) + len(incoming)
return result
except Exception as e:
return {"error": str(e)}
def get_semantic_profile(word, lang='en', progress=gr.Progress()):
"""Get semantic profile with progress"""
log_progress(f"Profile: {word} ({lang})", "INFO")
if not word or lang not in TARGET_LANGUAGES:
return "⚠️ Invalid input"
progress(0, desc="πŸ” Starting...")
word = word.strip().lower().replace(' ', '_')
like_path = f"{CONCEPTNET_BASE}/c/{lang}/{word}%"
output_md = f"# 🧠 Semantic Profile: '{word}' ({lang.upper()})\n\n"
try:
with get_db_connection() as conn:
cursor = conn.cursor()
progress(0.05, desc="πŸ“ Finding nodes...")
cursor.execute("SELECT id, label FROM node WHERE id LIKE ? LIMIT 5", (like_path,))
nodes = cursor.fetchall()
if not nodes:
return f"# 🧠 '{word}'\n\n⚠️ Not found"
log_progress(f"Found {len(nodes)} nodes", "SUCCESS")
for node_id, label in nodes[:3]:
output_md += f"**Node:** `{node_id}` β†’ **{label}**\n"
output_md += "\n"
total = 0
num_relations = len(RELATIONS)
for i, (rel_name, rel_url) in enumerate(RELATIONS):
progress((i + 0.1) / num_relations, desc=f"πŸ”Ž {rel_name}...")
output_md += f"## {rel_name}\n\n"
found = False
start_time = time.time()
cursor.execute("""
SELECT en.label, e.weight
FROM edge e
JOIN node en ON e.end_id = en.id
WHERE e.start_id LIKE ? AND e.rel_id = ?
ORDER BY e.weight DESC
LIMIT 10
""", (like_path, rel_url))
results = cursor.fetchall()
elapsed = time.time() - start_time
log_progress(f" {rel_name} out: {len(results)} in {elapsed:.3f}s", "DEBUG")
for label, weight in results:
output_md += f"- **{word}** {rel_name} β†’ *{label}* `[{weight:.3f}]`\n"
found = True
total += 1
cursor.execute("""
SELECT s.label, e.weight
FROM edge e
JOIN node s ON e.start_id = s.id
WHERE e.end_id LIKE ? AND e.rel_id = ?
ORDER BY e.weight DESC
LIMIT 10
""", (like_path, rel_url))
results = cursor.fetchall()
for label, weight in results:
output_md += f"- *{label}* {rel_name} β†’ **{word}** `[{weight:.3f}]`\n"
found = True
total += 1
if not found:
output_md += "*No results*\n"
output_md += "\n"
progress((i + 1) / num_relations, desc=f"βœ“ {rel_name}")
progress(1.0, desc="βœ… Complete!")
output_md += f"---\n**Total relations:** {total}\n"
log_progress(f"Complete: {total} relations", "SUCCESS")
return output_md
except Exception as e:
log_progress(f"Error: {e}", "ERROR")
import traceback
traceback.print_exc()
return f"**❌ Error:** {e}"
def query_edges_json(start_node: Optional[str] = None,
relation: Optional[str] = None,
end_node: Optional[str] = None,
limit: int = 50):
"""Query edges JSON"""
query = """
SELECT
e.id, s.id, r.label, en.id, e.weight, s.label, en.label
FROM edge e
JOIN relation r ON e.rel_id = r.id
JOIN node s ON e.start_id = s.id
JOIN node en ON e.end_id = en.id
WHERE 1=1
"""
params = []
try:
with get_db_connection() as conn:
if start_node:
if start_node.startswith('http://'):
pattern = f"{start_node}%"
else:
pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
query += " AND s.id LIKE ?"
params.append(pattern)
if relation:
if relation.startswith('http://'):
rel_value = relation
elif relation.startswith('/r/'):
rel_value = f"{CONCEPTNET_BASE}{relation}"
else:
rel_value = f"{CONCEPTNET_BASE}/r/{relation}"
query += " AND r.id = ?"
params.append(rel_value)
if end_node:
if end_node.startswith('http://'):
pattern = f"{end_node}%"
else:
pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
query += " AND en.id LIKE ?"
params.append(pattern)
query += " ORDER BY e.weight DESC LIMIT ?"
params.append(limit)
df = pd.read_sql_query(query, conn, params=params)
if df.empty:
return {"results": [], "count": 0}
df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label']
return {
"results": df.to_dict(orient='records'),
"count": len(df)
}
except Exception as e:
return {"error": str(e)}
def run_query(start_node, relation, end_node, limit, progress=gr.Progress()):
"""Query builder"""
log_progress(f"Query: start={start_node}, rel={relation}, end={end_node}", "INFO")
progress(0, desc="πŸ” Building...")
query = """
SELECT
e.id, s.id, r.label, en.id, e.weight, s.label, en.label
FROM edge e
JOIN relation r ON e.rel_id = r.id
JOIN node s ON e.start_id = s.id
JOIN node en ON e.end_id = en.id
WHERE 1=1
"""
params = []
try:
with get_db_connection() as conn:
progress(0.3, desc="πŸ“ Filters...")
if start_node and start_node.strip():
if start_node.startswith('http://'):
pattern = f"{start_node}%"
else:
pattern = f"{CONCEPTNET_BASE}/c/en/{start_node}%"
query += " AND s.id LIKE ?"
params.append(pattern)
if relation and relation.strip():
if relation.startswith('http://'):
rel_value = relation
elif relation.startswith('/r/'):
rel_value = f"{CONCEPTNET_BASE}{relation}"
else:
rel_value = f"{CONCEPTNET_BASE}/r/{relation}"
query += " AND r.id = ?"
params.append(rel_value)
if end_node and end_node.strip():
if end_node.startswith('http://'):
pattern = f"{end_node}%"
else:
pattern = f"{CONCEPTNET_BASE}/c/en/{end_node}%"
query += " AND en.id LIKE ?"
params.append(pattern)
query += " ORDER BY e.weight DESC LIMIT ?"
params.append(limit)
progress(0.6, desc="⚑ Running...")
start_time = time.time()
df = pd.read_sql_query(query, conn, params=params)
elapsed = time.time() - start_time
progress(1.0, desc="βœ… Done!")
log_progress(f"Done: {len(df)} rows in {elapsed:.2f}s", "SUCCESS")
if df.empty:
return pd.DataFrame(), f"⚠️ No results ({elapsed:.2f}s)"
df.columns = ['edge_id', 'start_id', 'relation', 'end_id', 'weight', 'start_label', 'end_label']
return df, f"βœ… {len(df)} results in {elapsed:.2f}s"
except Exception as e:
log_progress(f"Error: {e}", "ERROR")
import traceback
traceback.print_exc()
return pd.DataFrame(), f"❌ {e}"
def run_raw_query(sql_query):
if not sql_query.strip().upper().startswith("SELECT"):
return pd.DataFrame(), "❌ Only SELECT"
try:
with get_db_connection() as conn:
start = time.time()
df = pd.read_sql_query(sql_query, conn)
elapsed = time.time() - start
return df, f"βœ… {len(df)} rows in {elapsed:.3f}s"
except Exception as e:
return pd.DataFrame(), f"❌ {e}"
def get_schema_info():
md = f"# πŸ“š Schema\n\n"
md += f"**Repo:** [{INDEXED_REPO_ID}](https://huggingface.co/datasets/{INDEXED_REPO_ID})\n\n"
try:
with get_db_connection() as conn:
cursor = conn.cursor()
md += "## Relations\n\n"
cursor.execute("SELECT id, label FROM relation ORDER BY label LIMIT 20")
for rel_id, label in cursor.fetchall():
md += f"- **{label}:** `{rel_id}`\n"
md += "\n## Tables\n\n"
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'")
for table, in cursor.fetchall():
cursor.execute(f"SELECT COUNT(*) FROM {table}")
md += f"- **{table}:** {cursor.fetchone()[0]:,} rows\n"
except Exception as e:
md += f"\nError: {e}\n"
return md
# ===== FASTAPI - Create app FIRST =====
app = FastAPI(title="ConceptNet API", version="1.0")
@app.get("/api")
def api_docs():
"""API documentation - accessible at /api"""
return {
"name": "ConceptNet API",
"version": "1.0",
"endpoints": {
"/api/profile/{word}": "Semantic profile (params: lang, limit)",
"/api/query": "Query edges (params: start, relation, end, limit)",
"/api/relations": "List relations",
"/api/languages": "List languages"
},
"examples": {
"profile": "/api/profile/dog?lang=en&limit=10",
"query": "/api/query?start=dog&relation=IsA&limit=20"
},
"note": "Visit root (/) for the Gradio UI"
}
@app.get("/api/profile/{word}")
def api_profile(word: str, lang: str = "en", limit: int = 10):
"""Get semantic profile"""
return JSONResponse(get_semantic_profile_json(word, lang, limit))
@app.get("/api/query")
def api_query(start: Optional[str] = None,
relation: Optional[str] = None,
end: Optional[str] = None,
limit: int = 50):
"""Query edges"""
return JSONResponse(query_edges_json(start, relation, end, limit))
@app.get("/api/relations")
def api_relations():
"""List relations"""
return JSONResponse({"relations": [{"name": name, "url": url} for name, url in RELATIONS]})
@app.get("/api/languages")
def api_languages():
"""List languages"""
return JSONResponse({"languages": TARGET_LANGUAGES})
# ===== GRADIO UI =====
with gr.Blocks(title="ConceptNet Explorer", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🧠 ConceptNet Explorer")
gr.Markdown(
f"**Multi-language semantic network** | "
f"**Languages:** {', '.join([l.upper() for l in TARGET_LANGUAGES])} | "
f"**API:** `/api/profile/{{word}}` `/api/query`"
)
with gr.Tabs():
with gr.TabItem("πŸ” Semantic Profile"):
gr.Markdown("**Explore semantic relations for any word**")
with gr.Row():
word_input = gr.Textbox(label="Word", placeholder="dog", value="dog")
lang_input = gr.Dropdown(choices=TARGET_LANGUAGES, value="en", label="Language")
semantic_btn = gr.Button("πŸ” Get Semantic Profile", variant="primary", size="lg")
semantic_output = gr.Markdown()
gr.Examples(
examples=[["dog", "en"], ["hund", "de"], ["perro", "es"], ["chien", "fr"]],
inputs=[word_input, lang_input]
)
with gr.TabItem("⚑ Query Builder"):
with gr.Row():
start_input = gr.Textbox(label="Start", placeholder="dog")
rel_input = gr.Textbox(label="Relation", placeholder="IsA", value="IsA")
end_input = gr.Textbox(label="End", placeholder="")
limit_slider = gr.Slider(label="Limit", minimum=1, maximum=200, value=50)
query_btn = gr.Button("▢️ Run Query", variant="primary", size="lg")
status_output = gr.Markdown()
results_output = gr.DataFrame(wrap=True)
with gr.TabItem("πŸ’» Raw SQL"):
raw_sql_input = gr.Textbox(
label="SQL",
value=f"SELECT e.*, r.label FROM edge e JOIN relation r ON e.rel_id = r.id WHERE e.start_id = '{CONCEPTNET_BASE}/c/en/dog' LIMIT 10",
lines=3
)
raw_btn = gr.Button("▢️ Execute")
raw_status = gr.Markdown()
raw_results = gr.DataFrame()
with gr.TabItem("πŸ“Š Schema"):
schema_btn = gr.Button("πŸ“Š Load Schema")
schema_output = gr.Markdown()
with gr.TabItem("πŸ”Œ API"):
gr.Markdown("## JSON API Endpoints\n")
gr.Markdown("### API Documentation\n```\nGET /api\n```")
gr.Markdown("### Get Semantic Profile\n```\nGET /api/profile/{word}?lang=en&limit=10\n```")
gr.Markdown("### Query Edges\n```\nGET /api/query?start=dog&relation=IsA&limit=50\n```")
gr.Markdown("### List Relations\n```\nGET /api/relations\n```")
gr.Markdown("### Examples\n")
gr.Markdown("```\ncurl https://your-space.hf.space/api/profile/dog?lang=en\n```")
gr.Markdown("```\ncurl 'https://your-space.hf.space/api/query?start=dog&relation=IsA&limit=10'\n```")
gr.Markdown(
"---\n"
"**Performance:** Exact match on rel_id for fast queries | "
"**API:** Full REST API at `/api/*` endpoints"
)
semantic_btn.click(get_semantic_profile, [word_input, lang_input], semantic_output)
query_btn.click(run_query, [start_input, rel_input, end_input, limit_slider], [results_output, status_output])
raw_btn.click(run_raw_query, raw_sql_input, [raw_results, raw_status])
schema_btn.click(get_schema_info, None, schema_output)
# ===== MOUNT GRADIO TO FASTAPI - Gradio at root, API at /api/* =====
app = gr.mount_gradio_app(app, demo, path="/")
if __name__ == "__main__":
log_progress("="*60, "SUCCESS")
log_progress("πŸš€ APP READY!", "SUCCESS")
log_progress("="*60, "SUCCESS")
log_progress("UI: http://localhost:7860/", "INFO")
log_progress("API: http://localhost:7860/api", "INFO")
log_progress(" http://localhost:7860/api/profile/dog", "INFO")
log_progress(" http://localhost:7860/api/query?start=dog&relation=IsA", "INFO")
log_progress("="*60, "SUCCESS")
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)