Spaces:
Sleeping
Sleeping
fix css, add wiktionary debug
Browse files
app.py
CHANGED
|
@@ -1410,21 +1410,19 @@ def _wiktionary_build_report_for_entry(entry_id: int, conn: sqlite3.Connection)
|
|
| 1410 |
return report
|
| 1411 |
|
| 1412 |
def _wiktionary_find_all_entries(word: str, conn: sqlite3.Connection) -> List[Dict[str, Any]]:
|
| 1413 |
-
""" Finds
|
| 1414 |
log(f"Wiktionary (EN): Querying for '{word}'...")
|
| 1415 |
found_entry_ids: Set[int] = set()
|
| 1416 |
|
| 1417 |
lang_query = 'English'
|
| 1418 |
form_titles = ("Inflected form", "verb form", "noun form", "adjective form", "Comparative", "Superlative")
|
| 1419 |
|
| 1420 |
-
#
|
| 1421 |
-
# This ensures we find "Ready" when input is "ready"
|
| 1422 |
search_variants = list(set([word, word.lower(), word.title()]))
|
| 1423 |
placeholders = ', '.join('?' for _ in search_variants)
|
| 1424 |
|
| 1425 |
# 1. Search Lemmatized Entries
|
| 1426 |
sql_lemma = f"SELECT id, pos_title, word FROM entries WHERE word IN ({placeholders}) AND lang = ?"
|
| 1427 |
-
# flatten params: [var1, var2, ..., 'English']
|
| 1428 |
params_lemma = list(search_variants) + [lang_query]
|
| 1429 |
|
| 1430 |
lemma_q = conn.execute(sql_lemma, params_lemma).fetchall()
|
|
@@ -1433,59 +1431,81 @@ def _wiktionary_find_all_entries(word: str, conn: sqlite3.Connection) -> List[Di
|
|
| 1433 |
|
| 1434 |
for row in lemma_q:
|
| 1435 |
entry_id = row["id"]
|
| 1436 |
-
pos_title = row["pos_title"] or ""
|
| 1437 |
found_entry_ids.add(entry_id)
|
| 1438 |
|
| 1439 |
-
#
|
| 1440 |
if any(ft in pos_title for ft in form_titles):
|
| 1441 |
-
|
| 1442 |
-
form_of_q = conn.execute(
|
| 1443 |
-
"SELECT form_of FROM senses WHERE entry_id = ?", (entry_id,)
|
| 1444 |
-
).fetchall()
|
| 1445 |
-
|
| 1446 |
for form_row in form_of_q:
|
| 1447 |
form_of_json = form_row["form_of"]
|
| 1448 |
if not form_of_json: continue
|
| 1449 |
try:
|
| 1450 |
form_of_data = json.loads(form_of_json)
|
| 1451 |
if isinstance(form_of_data, list) and form_of_data:
|
| 1452 |
-
|
| 1453 |
-
if
|
| 1454 |
-
|
| 1455 |
-
|
| 1456 |
-
|
| 1457 |
-
|
| 1458 |
-
# 2. Search Inflected Forms (Reverse lookup)
|
| 1459 |
-
# We also apply the case variants here
|
| 1460 |
sql_form = f"""
|
| 1461 |
SELECT DISTINCT e.id
|
| 1462 |
FROM forms f
|
| 1463 |
JOIN entries e ON f.entry_id = e.id
|
| 1464 |
WHERE f.form_text IN ({placeholders}) AND e.lang = ?
|
| 1465 |
AND f.id NOT IN (
|
| 1466 |
-
SELECT ft.form_id
|
| 1467 |
-
FROM form_tags ft
|
| 1468 |
-
JOIN tags t ON ft.tag_id = t.id
|
| 1469 |
WHERE t.tag IN ('variant', 'auxiliary')
|
| 1470 |
)
|
| 1471 |
"""
|
| 1472 |
params_form = list(search_variants) + [lang_query]
|
| 1473 |
-
|
| 1474 |
form_q = conn.execute(sql_form, params_form).fetchall()
|
| 1475 |
for row in form_q:
|
| 1476 |
found_entry_ids.add(row["id"])
|
| 1477 |
|
| 1478 |
-
# 3. Add Parent Lemmas
|
| 1479 |
if parent_lemmas_to_find:
|
| 1480 |
-
log(f"Wiktionary: Found parent lemmas to add: {parent_lemmas_to_find}")
|
| 1481 |
for lemma_word in parent_lemmas_to_find:
|
| 1482 |
-
|
| 1483 |
-
parent_id_q
|
| 1484 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1485 |
).fetchall()
|
| 1486 |
-
|
| 1487 |
-
|
| 1488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1489 |
log(f"Wiktionary: Found {len(found_entry_ids)} unique matching entries.")
|
| 1490 |
|
| 1491 |
all_reports = []
|
|
@@ -2257,36 +2277,96 @@ def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engin
|
|
| 2257 |
|
| 2258 |
HTML_CSS = """
|
| 2259 |
<style>
|
| 2260 |
-
|
| 2261 |
-
.ling-
|
| 2262 |
-
|
| 2263 |
-
|
| 2264 |
-
|
| 2265 |
-
|
| 2266 |
-
|
| 2267 |
-
|
| 2268 |
-
|
| 2269 |
-
|
| 2270 |
-
|
| 2271 |
-
|
| 2272 |
-
.
|
| 2273 |
-
|
| 2274 |
-
|
| 2275 |
-
|
| 2276 |
-
|
| 2277 |
-
|
| 2278 |
-
|
| 2279 |
-
.
|
| 2280 |
-
|
| 2281 |
-
|
| 2282 |
-
|
| 2283 |
-
|
| 2284 |
-
|
| 2285 |
-
.
|
| 2286 |
-
|
| 2287 |
-
|
| 2288 |
-
|
| 2289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2290 |
</style>
|
| 2291 |
"""
|
| 2292 |
|
|
@@ -2298,15 +2378,29 @@ def _format_word_analysis_html(data: Dict[str, Any]) -> str:
|
|
| 2298 |
html = HTML_CSS
|
| 2299 |
analysis = data["analysis"]
|
| 2300 |
|
| 2301 |
-
# Iterate over POS
|
| 2302 |
for pos_key, entries in analysis.items():
|
| 2303 |
if not entries: continue
|
| 2304 |
entry = entries[0] # Take best candidate
|
| 2305 |
|
| 2306 |
-
# ---
|
| 2307 |
-
|
| 2308 |
-
|
| 2309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2310 |
inf_wikt = entry.get("inflections_wiktionary") or {}
|
| 2311 |
inf_pat = entry.get("inflections_pattern") or {}
|
| 2312 |
sem_comb = entry.get("semantics_combined") or {}
|
|
@@ -2316,11 +2410,12 @@ def _format_word_analysis_html(data: Dict[str, Any]) -> str:
|
|
| 2316 |
sem_comb.get("lemma") or \
|
| 2317 |
data.get("input_word") or "?"
|
| 2318 |
|
|
|
|
| 2319 |
html += f"""
|
| 2320 |
<div class="ling-card">
|
| 2321 |
<div class="ling-header">
|
| 2322 |
<span class="ling-lemma">{lemma}</span>
|
| 2323 |
-
<span class="ling-pos {
|
| 2324 |
</div>
|
| 2325 |
"""
|
| 2326 |
|
|
@@ -2328,57 +2423,65 @@ def _format_word_analysis_html(data: Dict[str, Any]) -> str:
|
|
| 2328 |
html += "<div class='ling-section'><div class='ling-subtitle'>Morphology & Inflections</div>"
|
| 2329 |
html += "<table class='inflection-table'>"
|
| 2330 |
|
| 2331 |
-
#
|
|
|
|
|
|
|
| 2332 |
if pos_key == 'noun':
|
| 2333 |
-
if
|
| 2334 |
-
|
| 2335 |
-
|
| 2336 |
html += f"<tr><td class='inflection-label'>Context</td><td>{inf_pat.get('article', '-')}</td></tr>"
|
| 2337 |
|
| 2338 |
-
# Verb Logic
|
| 2339 |
elif pos_key == 'verb':
|
| 2340 |
cj = inf_pat.get('conjugation') or {}
|
| 2341 |
pres = cj.get('Present') or {}
|
| 2342 |
past = cj.get('Past') or {}
|
| 2343 |
parts = inf_pat.get('participles') or {}
|
| 2344 |
-
|
| 2345 |
html += f"<tr><td class='inflection-label'>Infinitive</td><td>{inf_pat.get('infinitive', lemma)}</td></tr>"
|
| 2346 |
html += f"<tr><td class='inflection-label'>3rd Person (He/She)</td><td>{pres.get('he/she (3sg)', '-')}</td></tr>"
|
| 2347 |
html += f"<tr><td class='inflection-label'>Past Simple</td><td>{past.get('General', '-')}</td></tr>"
|
| 2348 |
html += f"<tr><td class='inflection-label'>Participle (Ing)</td><td>{parts.get('Present Participle (gerund)', '-')}</td></tr>"
|
| 2349 |
html += f"<tr><td class='inflection-label'>Participle (Past)</td><td>{parts.get('Past Participle', '-')}</td></tr>"
|
| 2350 |
|
| 2351 |
-
|
| 2352 |
-
elif pos_key == 'adjective':
|
| 2353 |
gr = inf_pat.get('grading') or {}
|
| 2354 |
-
html += f"<tr><td class='inflection-label'>Positive</td><td>{gr.get('Positive',
|
| 2355 |
html += f"<tr><td class='inflection-label'>Comparative</td><td>{gr.get('Comparative', '-')}</td></tr>"
|
| 2356 |
html += f"<tr><td class='inflection-label'>Superlative</td><td>{gr.get('Superlative', '-')}</td></tr>"
|
| 2357 |
|
| 2358 |
-
# Wiktionary Forms
|
| 2359 |
forms_list = inf_wikt.get("forms_list") or []
|
| 2360 |
if forms_list:
|
| 2361 |
-
#
|
| 2362 |
-
forms_str_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2363 |
if forms_str_list:
|
| 2364 |
-
html += f"<tr><td class='inflection-label'>
|
| 2365 |
-
|
| 2366 |
html += "</table></div>"
|
| 2367 |
|
| 2368 |
# --- Semantics Section ---
|
| 2369 |
html += "<div class='ling-section'><div class='ling-subtitle'>Definitions & Senses</div>"
|
| 2370 |
|
| 2371 |
-
# Wiktionary Senses
|
| 2372 |
wikt_senses = sem_comb.get("wiktionary_senses") or []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2373 |
for s in wikt_senses[:3]:
|
| 2374 |
-
# FIX: Ensure definition is a string before replacing
|
| 2375 |
gloss_raw = s.get("definition") or ""
|
| 2376 |
-
gloss = gloss_raw.replace(";", "<br>")
|
| 2377 |
if gloss:
|
| 2378 |
html += f"<div class='sense-item'><span class='source-badge src-wikt'>Wikt</span> {gloss}</div>"
|
| 2379 |
|
| 2380 |
-
# OEWN Senses
|
| 2381 |
-
oewn_senses = sem_comb.get("odenet_senses") or []
|
| 2382 |
for s in oewn_senses[:3]:
|
| 2383 |
defi = s.get("definition") or ""
|
| 2384 |
if defi:
|
|
@@ -2386,17 +2489,43 @@ def _format_word_analysis_html(data: Dict[str, Any]) -> str:
|
|
| 2386 |
|
| 2387 |
html += "</div>"
|
| 2388 |
|
| 2389 |
-
# --- Relations Section
|
| 2390 |
rels = sem_comb.get("conceptnet_relations") or []
|
| 2391 |
if rels:
|
| 2392 |
-
html += "<div class='ling-section'><div class='ling-subtitle'>Knowledge Graph
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2393 |
html += "<div>"
|
| 2394 |
-
for r in
|
| 2395 |
-
|
| 2396 |
-
|
| 2397 |
-
|
| 2398 |
-
|
| 2399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2400 |
|
| 2401 |
html += "</div>" # End Card
|
| 2402 |
|
|
|
|
| 1410 |
return report
|
| 1411 |
|
| 1412 |
def _wiktionary_find_all_entries(word: str, conn: sqlite3.Connection) -> List[Dict[str, Any]]:
|
| 1413 |
+
""" Finds entries with verbose debugging if lookup fails. """
|
| 1414 |
log(f"Wiktionary (EN): Querying for '{word}'...")
|
| 1415 |
found_entry_ids: Set[int] = set()
|
| 1416 |
|
| 1417 |
lang_query = 'English'
|
| 1418 |
form_titles = ("Inflected form", "verb form", "noun form", "adjective form", "Comparative", "Superlative")
|
| 1419 |
|
| 1420 |
+
# Search variants: input, lowercase, title-case
|
|
|
|
| 1421 |
search_variants = list(set([word, word.lower(), word.title()]))
|
| 1422 |
placeholders = ', '.join('?' for _ in search_variants)
|
| 1423 |
|
| 1424 |
# 1. Search Lemmatized Entries
|
| 1425 |
sql_lemma = f"SELECT id, pos_title, word FROM entries WHERE word IN ({placeholders}) AND lang = ?"
|
|
|
|
| 1426 |
params_lemma = list(search_variants) + [lang_query]
|
| 1427 |
|
| 1428 |
lemma_q = conn.execute(sql_lemma, params_lemma).fetchall()
|
|
|
|
| 1431 |
|
| 1432 |
for row in lemma_q:
|
| 1433 |
entry_id = row["id"]
|
| 1434 |
+
pos_title = row["pos_title"] or ""
|
| 1435 |
found_entry_ids.add(entry_id)
|
| 1436 |
|
| 1437 |
+
# Check for parent lemma in "form_of" field
|
| 1438 |
if any(ft in pos_title for ft in form_titles):
|
| 1439 |
+
form_of_q = conn.execute("SELECT form_of FROM senses WHERE entry_id = ?", (entry_id,)).fetchall()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1440 |
for form_row in form_of_q:
|
| 1441 |
form_of_json = form_row["form_of"]
|
| 1442 |
if not form_of_json: continue
|
| 1443 |
try:
|
| 1444 |
form_of_data = json.loads(form_of_json)
|
| 1445 |
if isinstance(form_of_data, list) and form_of_data:
|
| 1446 |
+
parent = form_of_data[0].get("word")
|
| 1447 |
+
if parent: parent_lemmas_to_find.add(parent)
|
| 1448 |
+
except json.JSONDecodeError: pass
|
| 1449 |
+
|
| 1450 |
+
# 2. Search Inflected Forms
|
|
|
|
|
|
|
|
|
|
| 1451 |
sql_form = f"""
|
| 1452 |
SELECT DISTINCT e.id
|
| 1453 |
FROM forms f
|
| 1454 |
JOIN entries e ON f.entry_id = e.id
|
| 1455 |
WHERE f.form_text IN ({placeholders}) AND e.lang = ?
|
| 1456 |
AND f.id NOT IN (
|
| 1457 |
+
SELECT ft.form_id FROM form_tags ft JOIN tags t ON ft.tag_id = t.id
|
|
|
|
|
|
|
| 1458 |
WHERE t.tag IN ('variant', 'auxiliary')
|
| 1459 |
)
|
| 1460 |
"""
|
| 1461 |
params_form = list(search_variants) + [lang_query]
|
|
|
|
| 1462 |
form_q = conn.execute(sql_form, params_form).fetchall()
|
| 1463 |
for row in form_q:
|
| 1464 |
found_entry_ids.add(row["id"])
|
| 1465 |
|
| 1466 |
+
# 3. Add Parent Lemmas
|
| 1467 |
if parent_lemmas_to_find:
|
|
|
|
| 1468 |
for lemma_word in parent_lemmas_to_find:
|
| 1469 |
+
parent_id_q = conn.execute("SELECT id FROM entries WHERE word = ? AND lang = ?", (lemma_word, lang_query)).fetchall()
|
| 1470 |
+
for row in parent_id_q: found_entry_ids.add(row["id"])
|
| 1471 |
+
|
| 1472 |
+
# =========================================================
|
| 1473 |
+
# 🔍 VERBOSE DEBUG DETECTIVE (Triggered on Failure)
|
| 1474 |
+
# =========================================================
|
| 1475 |
+
if not found_entry_ids:
|
| 1476 |
+
log(f"⚠ [DEBUG-VERBOSE] Zero results for '{word}'. Running diagnostics...")
|
| 1477 |
+
try:
|
| 1478 |
+
# Check 1: Does it exist in ANY language?
|
| 1479 |
+
any_lang = conn.execute(
|
| 1480 |
+
f"SELECT lang, word FROM entries WHERE word IN ({placeholders}) LIMIT 5",
|
| 1481 |
+
list(search_variants)
|
| 1482 |
).fetchall()
|
| 1483 |
+
if any_lang:
|
| 1484 |
+
found_langs = [f"{r['word']} ({r['lang']})" for r in any_lang]
|
| 1485 |
+
log(f" -> FOUND in other languages/cases: {found_langs}")
|
| 1486 |
+
log(f" -> CONCLUSION: Language filter '{lang_query}' might be too strict.")
|
| 1487 |
+
else:
|
| 1488 |
+
log(f" -> NOT FOUND in 'entries' table (any language).")
|
| 1489 |
+
|
| 1490 |
+
# Check 2: Does it exist as a form?
|
| 1491 |
+
any_form = conn.execute(
|
| 1492 |
+
f"SELECT form_text FROM forms WHERE form_text IN ({placeholders}) LIMIT 1",
|
| 1493 |
+
list(search_variants)
|
| 1494 |
+
).fetchone()
|
| 1495 |
+
if any_form:
|
| 1496 |
+
log(f" -> FOUND in 'forms' table as '{any_form['form_text']}'! (But failed to link to an English entry)")
|
| 1497 |
+
else:
|
| 1498 |
+
log(f" -> NOT FOUND in 'forms' table either.")
|
| 1499 |
+
|
| 1500 |
+
# Check 3: Is it there but with whitespace issues?
|
| 1501 |
+
fuzzy = conn.execute("SELECT word FROM entries WHERE word LIKE ? LIMIT 1", (f"%{word}%",)).fetchone()
|
| 1502 |
+
if fuzzy:
|
| 1503 |
+
log(f" -> PARTIAL MATCH found: '{fuzzy['word']}'. (Check for whitespace/punctuation?)")
|
| 1504 |
+
else:
|
| 1505 |
+
log(f" -> COMPLETELY MISSING from DB.")
|
| 1506 |
+
except Exception as e:
|
| 1507 |
+
log(f" -> Detective crashed: {e}")
|
| 1508 |
+
|
| 1509 |
log(f"Wiktionary: Found {len(found_entry_ids)} unique matching entries.")
|
| 1510 |
|
| 1511 |
all_reports = []
|
|
|
|
| 2277 |
|
| 2278 |
HTML_CSS = """
|
| 2279 |
<style>
|
| 2280 |
+
/* Card Container - High Contrast */
|
| 2281 |
+
.ling-card {
|
| 2282 |
+
font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
|
| 2283 |
+
border: 1px solid #d1d5db; /* Darker border */
|
| 2284 |
+
border-radius: 8px;
|
| 2285 |
+
padding: 20px;
|
| 2286 |
+
margin-bottom: 20px;
|
| 2287 |
+
background: #ffffff;
|
| 2288 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
| 2289 |
+
}
|
| 2290 |
+
|
| 2291 |
+
/* Header Section */
|
| 2292 |
+
.ling-header {
|
| 2293 |
+
display: flex;
|
| 2294 |
+
align-items: center;
|
| 2295 |
+
margin-bottom: 15px;
|
| 2296 |
+
border-bottom: 2px solid #e5e7eb;
|
| 2297 |
+
padding-bottom: 10px;
|
| 2298 |
+
}
|
| 2299 |
+
.ling-lemma {
|
| 2300 |
+
font-size: 1.8em;
|
| 2301 |
+
font-weight: 800;
|
| 2302 |
+
color: #111827; /* Almost Black */
|
| 2303 |
+
margin-right: 12px;
|
| 2304 |
+
}
|
| 2305 |
+
.ling-pos {
|
| 2306 |
+
font-size: 0.85em;
|
| 2307 |
+
text-transform: uppercase;
|
| 2308 |
+
font-weight: 700;
|
| 2309 |
+
padding: 4px 10px;
|
| 2310 |
+
border-radius: 6px;
|
| 2311 |
+
color: #fff;
|
| 2312 |
+
letter-spacing: 0.05em;
|
| 2313 |
+
}
|
| 2314 |
+
|
| 2315 |
+
/* POS Colors */
|
| 2316 |
+
.pos-noun { background-color: #2563eb; } /* Blue */
|
| 2317 |
+
.pos-verb { background-color: #059669; } /* Green */
|
| 2318 |
+
.pos-adj { background-color: #d97706; } /* Amber */
|
| 2319 |
+
.pos-adv { background-color: #7c3aed; } /* Purple */
|
| 2320 |
+
.pos-name { background-color: #db2777; } /* Pink (Proper Noun) */
|
| 2321 |
+
.pos-other { background-color: #4b5563; } /* Gray */
|
| 2322 |
+
|
| 2323 |
+
/* Section Headers */
|
| 2324 |
+
.ling-section { margin-top: 15px; }
|
| 2325 |
+
.ling-subtitle {
|
| 2326 |
+
font-size: 0.85em;
|
| 2327 |
+
font-weight: 700;
|
| 2328 |
+
color: #374151; /* Dark Gray */
|
| 2329 |
+
text-transform: uppercase;
|
| 2330 |
+
margin-bottom: 8px;
|
| 2331 |
+
border-left: 4px solid #3b82f6;
|
| 2332 |
+
padding-left: 8px;
|
| 2333 |
+
}
|
| 2334 |
+
|
| 2335 |
+
/* Tables */
|
| 2336 |
+
.inflection-table { width: 100%; font-size: 0.95em; border-collapse: collapse; margin-bottom: 10px; }
|
| 2337 |
+
.inflection-table td { padding: 6px 10px; border-bottom: 1px solid #e5e7eb; color: #1f2937; }
|
| 2338 |
+
.inflection-label { color: #6b7280; font-weight: 600; width: 35%; background: #f9fafb; }
|
| 2339 |
+
|
| 2340 |
+
/* Senses */
|
| 2341 |
+
.sense-item { margin-bottom: 8px; line-height: 1.5; font-size: 1em; color: #1f2937; }
|
| 2342 |
+
.source-badge {
|
| 2343 |
+
display: inline-block; font-size: 0.75em; font-weight: bold; padding: 2px 6px;
|
| 2344 |
+
border-radius: 4px; border: 1px solid; margin-right: 8px; vertical-align: middle;
|
| 2345 |
+
}
|
| 2346 |
+
.src-wikt { background: #fff1f2; color: #9f1239; border-color: #fda4af; }
|
| 2347 |
+
.src-oewn { background: #eff6ff; color: #1e40af; border-color: #93c5fd; }
|
| 2348 |
+
|
| 2349 |
+
/* Relations Chips */
|
| 2350 |
+
.rel-chip {
|
| 2351 |
+
display: inline-block;
|
| 2352 |
+
background: #f3f4f6;
|
| 2353 |
+
color: #1f2937; /* Force Dark Text */
|
| 2354 |
+
padding: 4px 10px;
|
| 2355 |
+
border-radius: 15px;
|
| 2356 |
+
font-size: 0.9em;
|
| 2357 |
+
margin: 3px;
|
| 2358 |
+
border: 1px solid #d1d5db;
|
| 2359 |
+
font-weight: 500;
|
| 2360 |
+
}
|
| 2361 |
+
.rel-type { color: #6b7280; font-size: 0.8em; margin-right: 4px; font-weight: 700; text-transform: lowercase;}
|
| 2362 |
+
|
| 2363 |
+
/* Collapsible */
|
| 2364 |
+
.kg-details > summary {
|
| 2365 |
+
cursor: pointer; color: #2563eb; font-size: 0.9em; font-weight: 600;
|
| 2366 |
+
margin-top: 10px; padding: 6px; border-radius: 4px; width: fit-content;
|
| 2367 |
+
}
|
| 2368 |
+
.kg-details > summary:hover { text-decoration: underline; background: #eff6ff; }
|
| 2369 |
+
.kg-content { margin-top: 10px; padding: 10px; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb; }
|
| 2370 |
</style>
|
| 2371 |
"""
|
| 2372 |
|
|
|
|
| 2378 |
html = HTML_CSS
|
| 2379 |
analysis = data["analysis"]
|
| 2380 |
|
| 2381 |
+
# Iterate over POS
|
| 2382 |
for pos_key, entries in analysis.items():
|
| 2383 |
if not entries: continue
|
| 2384 |
entry = entries[0] # Take best candidate
|
| 2385 |
|
| 2386 |
+
# --- POS Display Logic ---
|
| 2387 |
+
# Map internal keys to nice display names and CSS classes
|
| 2388 |
+
display_pos = pos_key.upper()
|
| 2389 |
+
css_class = "pos-other"
|
| 2390 |
+
|
| 2391 |
+
if pos_key == 'noun': css_class = "pos-noun"
|
| 2392 |
+
elif pos_key == 'verb': css_class = "pos-verb"
|
| 2393 |
+
elif pos_key == 'adj' or pos_key == 'adjective':
|
| 2394 |
+
css_class = "pos-adj"
|
| 2395 |
+
display_pos = "ADJECTIVE"
|
| 2396 |
+
elif pos_key == 'adv' or pos_key == 'adverb':
|
| 2397 |
+
css_class = "pos-adv"
|
| 2398 |
+
display_pos = "ADVERB"
|
| 2399 |
+
elif pos_key == 'name':
|
| 2400 |
+
css_class = "pos-name"
|
| 2401 |
+
display_pos = "PROPER NOUN"
|
| 2402 |
+
|
| 2403 |
+
# Data Extraction
|
| 2404 |
inf_wikt = entry.get("inflections_wiktionary") or {}
|
| 2405 |
inf_pat = entry.get("inflections_pattern") or {}
|
| 2406 |
sem_comb = entry.get("semantics_combined") or {}
|
|
|
|
| 2410 |
sem_comb.get("lemma") or \
|
| 2411 |
data.get("input_word") or "?"
|
| 2412 |
|
| 2413 |
+
# --- CARD START ---
|
| 2414 |
html += f"""
|
| 2415 |
<div class="ling-card">
|
| 2416 |
<div class="ling-header">
|
| 2417 |
<span class="ling-lemma">{lemma}</span>
|
| 2418 |
+
<span class="ling-pos {css_class}">{display_pos}</span>
|
| 2419 |
</div>
|
| 2420 |
"""
|
| 2421 |
|
|
|
|
| 2423 |
html += "<div class='ling-section'><div class='ling-subtitle'>Morphology & Inflections</div>"
|
| 2424 |
html += "<table class='inflection-table'>"
|
| 2425 |
|
| 2426 |
+
# We check Pattern data first. If it's empty, we show '-' or rely on Wiktionary forms.
|
| 2427 |
+
has_pattern_data = bool(inf_pat) and "error" not in inf_pat
|
| 2428 |
+
|
| 2429 |
if pos_key == 'noun':
|
| 2430 |
+
html += f"<tr><td class='inflection-label'>Singular</td><td>{inf_pat.get('singular', lemma if not has_pattern_data else '-')}</td></tr>"
|
| 2431 |
+
html += f"<tr><td class='inflection-label'>Plural</td><td>{inf_pat.get('plural', '-')}</td></tr>"
|
| 2432 |
+
if has_pattern_data:
|
| 2433 |
html += f"<tr><td class='inflection-label'>Context</td><td>{inf_pat.get('article', '-')}</td></tr>"
|
| 2434 |
|
|
|
|
| 2435 |
elif pos_key == 'verb':
|
| 2436 |
cj = inf_pat.get('conjugation') or {}
|
| 2437 |
pres = cj.get('Present') or {}
|
| 2438 |
past = cj.get('Past') or {}
|
| 2439 |
parts = inf_pat.get('participles') or {}
|
|
|
|
| 2440 |
html += f"<tr><td class='inflection-label'>Infinitive</td><td>{inf_pat.get('infinitive', lemma)}</td></tr>"
|
| 2441 |
html += f"<tr><td class='inflection-label'>3rd Person (He/She)</td><td>{pres.get('he/she (3sg)', '-')}</td></tr>"
|
| 2442 |
html += f"<tr><td class='inflection-label'>Past Simple</td><td>{past.get('General', '-')}</td></tr>"
|
| 2443 |
html += f"<tr><td class='inflection-label'>Participle (Ing)</td><td>{parts.get('Present Participle (gerund)', '-')}</td></tr>"
|
| 2444 |
html += f"<tr><td class='inflection-label'>Participle (Past)</td><td>{parts.get('Past Participle', '-')}</td></tr>"
|
| 2445 |
|
| 2446 |
+
elif pos_key in ['adjective', 'adj']:
|
|
|
|
| 2447 |
gr = inf_pat.get('grading') or {}
|
| 2448 |
+
html += f"<tr><td class='inflection-label'>Positive</td><td>{gr.get('Positive', lemma)}</td></tr>"
|
| 2449 |
html += f"<tr><td class='inflection-label'>Comparative</td><td>{gr.get('Comparative', '-')}</td></tr>"
|
| 2450 |
html += f"<tr><td class='inflection-label'>Superlative</td><td>{gr.get('Superlative', '-')}</td></tr>"
|
| 2451 |
|
| 2452 |
+
# Wiktionary Forms (The "Other Forms" box)
|
| 2453 |
forms_list = inf_wikt.get("forms_list") or []
|
| 2454 |
if forms_list:
|
| 2455 |
+
# Extract text carefully
|
| 2456 |
+
forms_str_list = []
|
| 2457 |
+
for f in forms_list[:8]: # Show up to 8 forms
|
| 2458 |
+
txt = f.get('form_text')
|
| 2459 |
+
tags = f.get('tags')
|
| 2460 |
+
if txt:
|
| 2461 |
+
# Append tag if available e.g., "Readys (plural)"
|
| 2462 |
+
display_txt = f"{txt} <small style='color:#6b7280'>({tags})</small>" if tags else txt
|
| 2463 |
+
forms_str_list.append(display_txt)
|
| 2464 |
+
|
| 2465 |
if forms_str_list:
|
| 2466 |
+
html += f"<tr><td class='inflection-label'>Forms (DB)</td><td>{', '.join(forms_str_list)}</td></tr>"
|
| 2467 |
+
|
| 2468 |
html += "</table></div>"
|
| 2469 |
|
| 2470 |
# --- Semantics Section ---
|
| 2471 |
html += "<div class='ling-section'><div class='ling-subtitle'>Definitions & Senses</div>"
|
| 2472 |
|
|
|
|
| 2473 |
wikt_senses = sem_comb.get("wiktionary_senses") or []
|
| 2474 |
+
oewn_senses = sem_comb.get("odenet_senses") or []
|
| 2475 |
+
|
| 2476 |
+
if not wikt_senses and not oewn_senses:
|
| 2477 |
+
html += "<div class='sense-item'><i>No definitions found.</i></div>"
|
| 2478 |
+
|
| 2479 |
for s in wikt_senses[:3]:
|
|
|
|
| 2480 |
gloss_raw = s.get("definition") or ""
|
| 2481 |
+
gloss = str(gloss_raw).replace(";", "<br>") # Ensure string
|
| 2482 |
if gloss:
|
| 2483 |
html += f"<div class='sense-item'><span class='source-badge src-wikt'>Wikt</span> {gloss}</div>"
|
| 2484 |
|
|
|
|
|
|
|
| 2485 |
for s in oewn_senses[:3]:
|
| 2486 |
defi = s.get("definition") or ""
|
| 2487 |
if defi:
|
|
|
|
| 2489 |
|
| 2490 |
html += "</div>"
|
| 2491 |
|
| 2492 |
+
# --- Relations Section ---
|
| 2493 |
rels = sem_comb.get("conceptnet_relations") or []
|
| 2494 |
if rels:
|
| 2495 |
+
html += "<div class='ling-section'><div class='ling-subtitle'>Knowledge Graph</div>"
|
| 2496 |
+
|
| 2497 |
+
top_n = 5
|
| 2498 |
+
visible_rels = rels[:top_n]
|
| 2499 |
+
hidden_rels = rels[top_n:]
|
| 2500 |
+
|
| 2501 |
+
def render_rel(r):
|
| 2502 |
+
# Robust extraction
|
| 2503 |
+
rel_name = r.get("relation", "Rel")
|
| 2504 |
+
# Prefer other_node, fall back to parsing surface, fall back to '?'
|
| 2505 |
+
target = r.get("other_node") or "?"
|
| 2506 |
+
# Clean up surface text if needed
|
| 2507 |
+
if target == "?" and "surface" in r:
|
| 2508 |
+
parts = str(r["surface"]).split()
|
| 2509 |
+
if len(parts) > 2: target = parts[-1]
|
| 2510 |
+
|
| 2511 |
+
return f"<span class='rel-chip'><span class='rel-type'>{rel_name}:</span> {target}</span>"
|
| 2512 |
+
|
| 2513 |
html += "<div>"
|
| 2514 |
+
for r in visible_rels:
|
| 2515 |
+
html += render_rel(r)
|
| 2516 |
+
html += "</div>"
|
| 2517 |
+
|
| 2518 |
+
if hidden_rels:
|
| 2519 |
+
html += f"""
|
| 2520 |
+
<details class='kg-details'>
|
| 2521 |
+
<summary>Show {len(hidden_rels)} more relations</summary>
|
| 2522 |
+
<div class='kg-content'>
|
| 2523 |
+
"""
|
| 2524 |
+
for r in hidden_rels:
|
| 2525 |
+
html += render_rel(r)
|
| 2526 |
+
html += "</div></details>"
|
| 2527 |
+
|
| 2528 |
+
html += "</div>"
|
| 2529 |
|
| 2530 |
html += "</div>" # End Card
|
| 2531 |
|