Spaces:
Running
Running
fix null entries in sql db, formatted ui output
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# ============================================================================
|
| 2 |
-
# ENGLISH LINGUISTICS HUB (CONSOLIDATED APP
|
| 3 |
#
|
| 4 |
-
# This script
|
| 5 |
# adding NLTK, Stanza, TextBlob, HanTa(EN), OEWN, and OpenBLP.
|
| 6 |
# It maintains the exact same JSON output structure as the German app.
|
| 7 |
#
|
|
@@ -1568,7 +1568,8 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
|
| 1568 |
|
| 1569 |
spacy_pos_hint, spacy_lemma_hint = None, None
|
| 1570 |
try:
|
| 1571 |
-
|
|
|
|
| 1572 |
if nlp_en:
|
| 1573 |
doc = nlp_en(word)
|
| 1574 |
token = doc[0]
|
|
@@ -1590,15 +1591,18 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
|
| 1590 |
if spacy_pos_hint and wikt_pos == spacy_pos_hint:
|
| 1591 |
if spacy_lemma_hint and wikt_lemma == spacy_lemma_hint: return 1
|
| 1592 |
return 2
|
| 1593 |
-
if wikt_lemma.lower() == word.lower(): return 3
|
| 1594 |
return 4
|
|
|
|
| 1595 |
wiktionary_reports.sort(key=get_priority_score)
|
| 1596 |
|
| 1597 |
word_lower = word.lower()
|
| 1598 |
for wikt_report in wiktionary_reports:
|
|
|
|
| 1599 |
pos_key = _wiktionary_map_pos_key(wikt_report.get("pos"))
|
| 1600 |
-
lemma = wikt_report.get("lemma"
|
| 1601 |
-
|
|
|
|
| 1602 |
|
| 1603 |
inflections_wikt_block = {
|
| 1604 |
"base_form": lemma,
|
|
@@ -1609,7 +1613,9 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
|
| 1609 |
pattern_block = {}
|
| 1610 |
if PATTERN_EN_AVAILABLE:
|
| 1611 |
try:
|
|
|
|
| 1612 |
use_word = word if "form" in pos_title.lower() else lemma
|
|
|
|
| 1613 |
if pos_key == "noun": pattern_block = pattern_analyze_as_noun_en(use_word)
|
| 1614 |
elif pos_key == "verb": pattern_block = pattern_analyze_as_verb_en(use_word)
|
| 1615 |
elif pos_key == "adjective": pattern_block = pattern_analyze_as_adjective_en(use_word)
|
|
@@ -1625,12 +1631,14 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
|
| 1625 |
"semantics_combined": semantics_block,
|
| 1626 |
"wiktionary_metadata": {
|
| 1627 |
"pos_title": pos_title,
|
| 1628 |
-
"etymology": wikt_report.get("etymology_text"),
|
| 1629 |
-
"pronunciation": wikt_report.get("sounds"),
|
| 1630 |
}
|
| 1631 |
}
|
| 1632 |
|
|
|
|
| 1633 |
is_valid = False
|
|
|
|
| 1634 |
is_inflected_entry = any(ft in pos_title for ft in ["form", "Comparative", "Superlative"])
|
| 1635 |
|
| 1636 |
if lemma.lower() == word_lower: is_valid = True
|
|
@@ -2188,6 +2196,212 @@ def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engin
|
|
| 2188 |
}
|
| 2189 |
|
| 2190 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2191 |
# ============================================================================
|
| 2192 |
# 8. GRADIO UI CREATION (Adapted for English)
|
| 2193 |
# ============================================================================
|
|
@@ -2367,34 +2581,39 @@ def create_combined_tab():
|
|
| 2367 |
analyze_button = gr.Button("Run Comprehensive Analysis", variant="primary")
|
| 2368 |
|
| 2369 |
status_output = gr.Markdown(value="", visible=True)
|
| 2370 |
-
|
|
|
|
|
|
|
| 2371 |
|
| 2372 |
-
|
|
|
|
| 2373 |
try:
|
| 2374 |
status = "🔄 Analyzing..."
|
| 2375 |
-
yield status, {}
|
|
|
|
| 2376 |
result = comprehensive_english_analysis(text, top_n)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2377 |
status = f"✅ Analysis complete! Found {len(result.get('lemma_deep_dive', {}))} lemmas."
|
| 2378 |
-
yield status, result
|
| 2379 |
except Exception as e:
|
| 2380 |
error_status = f"❌ Error: {str(e)}"
|
| 2381 |
-
yield error_status, {"error": str(e), "traceback": traceback.format_exc()}
|
| 2382 |
|
| 2383 |
analyze_button.click(
|
| 2384 |
-
fn=
|
| 2385 |
inputs=[text_input, top_n_number],
|
| 2386 |
-
outputs=[status_output,
|
| 2387 |
api_name="comprehensive_analysis"
|
| 2388 |
)
|
| 2389 |
|
| 2390 |
gr.Examples(
|
| 2391 |
-
[["The cat sleeps on the table.", 3],
|
| 2392 |
-
["This is a houze.", 0],
|
| 2393 |
-
["I am running quickly.", 3],
|
| 2394 |
-
["The gardener is planting a tree.", 5]],
|
| 2395 |
inputs=[text_input, top_n_number],
|
| 2396 |
-
outputs=[status_output,
|
| 2397 |
-
fn=
|
| 2398 |
cache_examples=False
|
| 2399 |
)
|
| 2400 |
|
|
@@ -2416,7 +2635,7 @@ def create_word_encyclopedia_tab():
|
|
| 2416 |
)
|
| 2417 |
|
| 2418 |
engine_radio = gr.Radio(
|
| 2419 |
-
label="Select Analysis Engine
|
| 2420 |
choices=[
|
| 2421 |
("Wiktionary (Default)", "wiktionary"),
|
| 2422 |
("HanTa (EN)", "hanta"),
|
|
@@ -2430,24 +2649,28 @@ def create_word_encyclopedia_tab():
|
|
| 2430 |
|
| 2431 |
analyze_button = gr.Button("Analyze Word", variant="primary")
|
| 2432 |
|
| 2433 |
-
|
|
|
|
|
|
|
| 2434 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2435 |
analyze_button.click(
|
| 2436 |
-
fn=
|
| 2437 |
inputs=[word_input, top_n_number, engine_radio],
|
| 2438 |
-
outputs=[
|
| 2439 |
api_name="analyze_word"
|
| 2440 |
)
|
| 2441 |
|
| 2442 |
gr.Examples(
|
| 2443 |
-
[["run", 3, "wiktionary"],
|
| 2444 |
-
["water", 0, "wiktionary"],
|
| 2445 |
-
["fast", 3, "hanta"],
|
| 2446 |
-
["ran", 0, "stanza"],
|
| 2447 |
-
["beautiful", 0, "nltk"]],
|
| 2448 |
inputs=[word_input, top_n_number, engine_radio],
|
| 2449 |
-
outputs=[
|
| 2450 |
-
fn=
|
| 2451 |
cache_examples=False
|
| 2452 |
)
|
| 2453 |
|
|
|
|
| 1 |
# ============================================================================
|
| 2 |
+
# ENGLISH LINGUISTICS HUB (CONSOLIDATED APP V24-EN)
|
| 3 |
#
|
| 4 |
+
# This script provides a comprehensive Linguistics Hub for English analysis,
|
| 5 |
# adding NLTK, Stanza, TextBlob, HanTa(EN), OEWN, and OpenBLP.
|
| 6 |
# It maintains the exact same JSON output structure as the German app.
|
| 7 |
#
|
|
|
|
| 1568 |
|
| 1569 |
spacy_pos_hint, spacy_lemma_hint = None, None
|
| 1570 |
try:
|
| 1571 |
+
# Quick heuristic to help sort results if multiple entries exist
|
| 1572 |
+
nlp_en = SPACY_MODELS.get("en")
|
| 1573 |
if nlp_en:
|
| 1574 |
doc = nlp_en(word)
|
| 1575 |
token = doc[0]
|
|
|
|
| 1591 |
if spacy_pos_hint and wikt_pos == spacy_pos_hint:
|
| 1592 |
if spacy_lemma_hint and wikt_lemma == spacy_lemma_hint: return 1
|
| 1593 |
return 2
|
| 1594 |
+
if wikt_lemma and wikt_lemma.lower() == word.lower(): return 3
|
| 1595 |
return 4
|
| 1596 |
+
|
| 1597 |
wiktionary_reports.sort(key=get_priority_score)
|
| 1598 |
|
| 1599 |
word_lower = word.lower()
|
| 1600 |
for wikt_report in wiktionary_reports:
|
| 1601 |
+
# --- FIX: Safe Extraction of DB Fields ---
|
| 1602 |
pos_key = _wiktionary_map_pos_key(wikt_report.get("pos"))
|
| 1603 |
+
lemma = wikt_report.get("lemma") or word # Fallback if None
|
| 1604 |
+
# FORCE STRING: Use 'or ""' to handle DB NULLs preventing "NoneType is not iterable"
|
| 1605 |
+
pos_title = wikt_report.get("pos_title") or ""
|
| 1606 |
|
| 1607 |
inflections_wikt_block = {
|
| 1608 |
"base_form": lemma,
|
|
|
|
| 1613 |
pattern_block = {}
|
| 1614 |
if PATTERN_EN_AVAILABLE:
|
| 1615 |
try:
|
| 1616 |
+
# Safe check now that pos_title is definitely a string
|
| 1617 |
use_word = word if "form" in pos_title.lower() else lemma
|
| 1618 |
+
|
| 1619 |
if pos_key == "noun": pattern_block = pattern_analyze_as_noun_en(use_word)
|
| 1620 |
elif pos_key == "verb": pattern_block = pattern_analyze_as_verb_en(use_word)
|
| 1621 |
elif pos_key == "adjective": pattern_block = pattern_analyze_as_adjective_en(use_word)
|
|
|
|
| 1631 |
"semantics_combined": semantics_block,
|
| 1632 |
"wiktionary_metadata": {
|
| 1633 |
"pos_title": pos_title,
|
| 1634 |
+
"etymology": wikt_report.get("etymology_text") or "",
|
| 1635 |
+
"pronunciation": wikt_report.get("sounds") or "",
|
| 1636 |
}
|
| 1637 |
}
|
| 1638 |
|
| 1639 |
+
# Validation Logic
|
| 1640 |
is_valid = False
|
| 1641 |
+
# Safe check now that pos_title is definitely a string
|
| 1642 |
is_inflected_entry = any(ft in pos_title for ft in ["form", "Comparative", "Superlative"])
|
| 1643 |
|
| 1644 |
if lemma.lower() == word_lower: is_valid = True
|
|
|
|
| 2196 |
}
|
| 2197 |
|
| 2198 |
|
| 2199 |
+
# ============================================================================
|
| 2200 |
+
# 7.5 VISUALIZATION & HTML HELPERS (NEW)
|
| 2201 |
+
# ============================================================================
|
| 2202 |
+
|
| 2203 |
+
HTML_CSS = """
|
| 2204 |
+
<style>
|
| 2205 |
+
.ling-card { font-family: 'Source Sans Pro', sans-serif; border: 1px solid #e5e7eb; border-radius: 8px; padding: 15px; margin-bottom: 15px; background: #fff; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
|
| 2206 |
+
.ling-header { display: flex; align-items: baseline; margin-bottom: 10px; border-bottom: 2px solid #f3f4f6; padding-bottom: 5px; }
|
| 2207 |
+
.ling-lemma { font-size: 1.5em; font-weight: bold; color: #1f2937; margin-right: 10px; }
|
| 2208 |
+
.ling-pos { font-size: 0.9em; text-transform: uppercase; font-weight: bold; padding: 2px 6px; border-radius: 4px; color: #fff; }
|
| 2209 |
+
.pos-noun { background-color: #3b82f6; } /* Blue */
|
| 2210 |
+
.pos-verb { background-color: #10b981; } /* Green */
|
| 2211 |
+
.pos-adj { background-color: #f59e0b; } /* Amber */
|
| 2212 |
+
.pos-adv { background-color: #8b5cf6; } /* Purple */
|
| 2213 |
+
|
| 2214 |
+
.ling-section { margin-top: 12px; }
|
| 2215 |
+
.ling-subtitle { font-size: 0.95em; font-weight: bold; color: #6b7280; text-transform: uppercase; margin-bottom: 5px; letter-spacing: 0.05em; }
|
| 2216 |
+
|
| 2217 |
+
.inflection-table { width: 100%; font-size: 0.9em; border-collapse: collapse; }
|
| 2218 |
+
.inflection-table td { padding: 4px 8px; border-bottom: 1px solid #f3f4f6; }
|
| 2219 |
+
.inflection-label { color: #6b7280; font-weight: 600; width: 40%; }
|
| 2220 |
+
|
| 2221 |
+
.sense-item { margin-bottom: 6px; line-height: 1.4; font-size: 0.95em; }
|
| 2222 |
+
.source-badge { display: inline-block; font-size: 0.7em; padding: 0 4px; border-radius: 3px; border: 1px solid #e5e7eb; margin-right: 5px; vertical-align: middle; }
|
| 2223 |
+
.src-wikt { background: #fff1f2; color: #be123c; border-color: #fda4af; }
|
| 2224 |
+
.src-oewn { background: #eff6ff; color: #1d4ed8; border-color: #93c5fd; }
|
| 2225 |
+
|
| 2226 |
+
.rel-chip { display: inline-block; background: #f3f4f6; padding: 2px 8px; border-radius: 12px; font-size: 0.85em; margin: 2px; border: 1px solid #e5e7eb; }
|
| 2227 |
+
.rel-type { color: #6b7280; font-size: 0.8em; margin-right: 3px; font-weight: bold;}
|
| 2228 |
+
|
| 2229 |
+
.grammar-alert { padding: 10px; border-radius: 6px; margin-bottom: 20px; border-left: 4px solid; }
|
| 2230 |
+
.alert-green { background: #f0fdf4; border-color: #22c55e; color: #166534; }
|
| 2231 |
+
.alert-red { background: #fef2f2; border-color: #ef4444; color: #991b1b; }
|
| 2232 |
+
|
| 2233 |
+
details > summary { cursor: pointer; padding: 10px; background: #f9fafb; border-radius: 6px; font-weight: 600; margin-bottom: 5px; }
|
| 2234 |
+
details[open] > summary { background: #e5e7eb; }
|
| 2235 |
+
</style>
|
| 2236 |
+
"""
|
| 2237 |
+
|
| 2238 |
+
def _format_word_analysis_html(data: Dict[str, Any]) -> str:
|
| 2239 |
+
""" Generates HTML for a single word analysis result. """
|
| 2240 |
+
if not data or "analysis" not in data:
|
| 2241 |
+
return f"{HTML_CSS}<div class='ling-card'>No analysis data available. {data.get('info', '')}</div>"
|
| 2242 |
+
|
| 2243 |
+
html = HTML_CSS
|
| 2244 |
+
analysis = data["analysis"]
|
| 2245 |
+
|
| 2246 |
+
# Iterate over POS (noun, verb, etc.)
|
| 2247 |
+
for pos_key, entries in analysis.items():
|
| 2248 |
+
if not entries: continue
|
| 2249 |
+
entry = entries[0] # Take best candidate
|
| 2250 |
+
|
| 2251 |
+
# --- Header ---
|
| 2252 |
+
pos_class = f"pos-{pos_key}" if pos_key in ["noun", "verb", "adj", "adv"] else "pos-noun"
|
| 2253 |
+
# Try to find a lemma from one of the sub-blocks
|
| 2254 |
+
lemma = entry.get("inflections_wiktionary", {}).get("base_form") or \
|
| 2255 |
+
entry.get("inflections_pattern", {}).get("base_form") or \
|
| 2256 |
+
entry.get("semantics_combined", {}).get("lemma") or \
|
| 2257 |
+
data.get("input_word")
|
| 2258 |
+
|
| 2259 |
+
html += f"""
|
| 2260 |
+
<div class="ling-card">
|
| 2261 |
+
<div class="ling-header">
|
| 2262 |
+
<span class="ling-lemma">{lemma}</span>
|
| 2263 |
+
<span class="ling-pos {pos_class}">{pos_key}</span>
|
| 2264 |
+
</div>
|
| 2265 |
+
"""
|
| 2266 |
+
|
| 2267 |
+
# --- Inflections Section ---
|
| 2268 |
+
html += "<div class='ling-section'><div class='ling-subtitle'>Morphology & Inflections</div>"
|
| 2269 |
+
html += "<table class='inflection-table'>"
|
| 2270 |
+
|
| 2271 |
+
pat = entry.get("inflections_pattern", {})
|
| 2272 |
+
wikt = entry.get("inflections_wiktionary", {})
|
| 2273 |
+
|
| 2274 |
+
# Noun Logic
|
| 2275 |
+
if pos_key == 'noun':
|
| 2276 |
+
if pat:
|
| 2277 |
+
html += f"<tr><td class='inflection-label'>Singular</td><td>{pat.get('singular', '-')}</td></tr>"
|
| 2278 |
+
html += f"<tr><td class='inflection-label'>Plural</td><td>{pat.get('plural', '-')}</td></tr>"
|
| 2279 |
+
html += f"<tr><td class='inflection-label'>Context</td><td>{pat.get('article', '-')}</td></tr>"
|
| 2280 |
+
|
| 2281 |
+
# Verb Logic
|
| 2282 |
+
elif pos_key == 'verb':
|
| 2283 |
+
cj = pat.get('conjugation', {})
|
| 2284 |
+
if cj:
|
| 2285 |
+
pres = cj.get('Present', {})
|
| 2286 |
+
past = cj.get('Past', {})
|
| 2287 |
+
html += f"<tr><td class='inflection-label'>Infinitive</td><td>{pat.get('infinitive', lemma)}</td></tr>"
|
| 2288 |
+
html += f"<tr><td class='inflection-label'>3rd Person (He/She)</td><td>{pres.get('he/she (3sg)', '-')}</td></tr>"
|
| 2289 |
+
html += f"<tr><td class='inflection-label'>Past Simple</td><td>{past.get('General', '-')}</td></tr>"
|
| 2290 |
+
parts = pat.get('participles', {})
|
| 2291 |
+
html += f"<tr><td class='inflection-label'>Participle (Ing)</td><td>{parts.get('Present Participle (gerund)', '-')}</td></tr>"
|
| 2292 |
+
html += f"<tr><td class='inflection-label'>Participle (Past)</td><td>{parts.get('Past Participle', '-')}</td></tr>"
|
| 2293 |
+
|
| 2294 |
+
# Adjective Logic
|
| 2295 |
+
elif pos_key == 'adjective':
|
| 2296 |
+
gr = pat.get('grading', {})
|
| 2297 |
+
if gr:
|
| 2298 |
+
html += f"<tr><td class='inflection-label'>Positive</td><td>{gr.get('Positive', '-')}</td></tr>"
|
| 2299 |
+
html += f"<tr><td class='inflection-label'>Comparative</td><td>{gr.get('Comparative', '-')}</td></tr>"
|
| 2300 |
+
html += f"<tr><td class='inflection-label'>Superlative</td><td>{gr.get('Superlative', '-')}</td></tr>"
|
| 2301 |
+
|
| 2302 |
+
# Wiktionary Forms Fallback
|
| 2303 |
+
if wikt and wikt.get("forms_list"):
|
| 2304 |
+
forms_str = ", ".join([f['form_text'] for f in wikt['forms_list'][:5]])
|
| 2305 |
+
html += f"<tr><td class='inflection-label'>Other Forms (Wikt)</td><td>{forms_str}...</td></tr>"
|
| 2306 |
+
|
| 2307 |
+
html += "</table></div>"
|
| 2308 |
+
|
| 2309 |
+
# --- Semantics Section ---
|
| 2310 |
+
sem = entry.get("semantics_combined", {})
|
| 2311 |
+
html += "<div class='ling-section'><div class='ling-subtitle'>Definitions & Senses</div>"
|
| 2312 |
+
|
| 2313 |
+
# Wiktionary Senses
|
| 2314 |
+
wikt_senses = sem.get("wiktionary_senses", [])
|
| 2315 |
+
for s in wikt_senses[:3]: # Limit to top 3 for UI
|
| 2316 |
+
gloss = s.get("definition", "").replace(";", "<br>")
|
| 2317 |
+
html += f"<div class='sense-item'><span class='source-badge src-wikt'>Wikt</span> {gloss}</div>"
|
| 2318 |
+
|
| 2319 |
+
# OEWN Senses
|
| 2320 |
+
oewn_senses = sem.get("odenet_senses", [])
|
| 2321 |
+
for s in oewn_senses[:3]:
|
| 2322 |
+
defi = s.get("definition", "")
|
| 2323 |
+
html += f"<div class='sense-item'><span class='source-badge src-oewn'>OEWN</span> {defi}</div>"
|
| 2324 |
+
|
| 2325 |
+
html += "</div>"
|
| 2326 |
+
|
| 2327 |
+
# --- Relations Section (ConceptNet) ---
|
| 2328 |
+
rels = sem.get("conceptnet_relations", [])
|
| 2329 |
+
if rels:
|
| 2330 |
+
html += "<div class='ling-section'><div class='ling-subtitle'>Knowledge Graph (Top 5)</div>"
|
| 2331 |
+
html += "<div>"
|
| 2332 |
+
for r in rels[:5]:
|
| 2333 |
+
rel_name = r.get("relation", "Related")
|
| 2334 |
+
target = r.get("other_node") or r.get("surface")
|
| 2335 |
+
html += f"<span class='rel-chip'><span class='rel-type'>{rel_name}:</span> {target}</span>"
|
| 2336 |
+
html += "</div></div>"
|
| 2337 |
+
|
| 2338 |
+
html += "</div>" # End Card
|
| 2339 |
+
|
| 2340 |
+
return html
|
| 2341 |
+
|
| 2342 |
+
def _format_comprehensive_html(data: Dict[str, Any]) -> str:
|
| 2343 |
+
""" Generates HTML for the comprehensive sentence analysis. """
|
| 2344 |
+
if "error" in data:
|
| 2345 |
+
return f"<div style='color:red'>{data['error']}</div>"
|
| 2346 |
+
|
| 2347 |
+
html = HTML_CSS
|
| 2348 |
+
|
| 2349 |
+
# 1. Grammar Check Banner
|
| 2350 |
+
gc = data.get("grammar_check", [])
|
| 2351 |
+
if isinstance(gc, list) and len(gc) == 1 and gc[0].get("status") == "perfect":
|
| 2352 |
+
html += "<div class='grammar-alert alert-green'><strong>✓ Grammar Check Passed:</strong> No obvious errors detected.</div>"
|
| 2353 |
+
elif isinstance(gc, list) and gc:
|
| 2354 |
+
html += "<div class='grammar-alert alert-red'><strong>⚠ Grammar Issues Detected:</strong><br>"
|
| 2355 |
+
for err in gc:
|
| 2356 |
+
msg = err.get("message", "Error")
|
| 2357 |
+
bad = err.get("incorrect_text", "")
|
| 2358 |
+
html += f"• {msg} (in: '<em>{bad}</em>')<br>"
|
| 2359 |
+
html += "</div>"
|
| 2360 |
+
|
| 2361 |
+
# 2. Lemma Deep Dive Accordion
|
| 2362 |
+
deep_dive = data.get("lemma_deep_dive", {})
|
| 2363 |
+
if not deep_dive:
|
| 2364 |
+
html += "<p>No deep analysis available.</p>"
|
| 2365 |
+
else:
|
| 2366 |
+
html += "<h3>Word-by-Word Analysis</h3>"
|
| 2367 |
+
for lemma, details in deep_dive.items():
|
| 2368 |
+
# Construct a fake "single word" object to reuse the formatting function
|
| 2369 |
+
# We need to reshape the deep_dive structure slightly to match the expected format
|
| 2370 |
+
# The deep dive has keys "inflection_analysis" and "semantic_analysis".
|
| 2371 |
+
# We need to map this back to { "analysis": { "pos": [ entry... ] } }
|
| 2372 |
+
|
| 2373 |
+
# This is a bit tricky because deep_dive separates inflection from semantics
|
| 2374 |
+
# while the word analyzer groups them by POS entry.
|
| 2375 |
+
# We will generate a simplified view here.
|
| 2376 |
+
|
| 2377 |
+
html += f"<details><summary>{lemma}</summary>"
|
| 2378 |
+
|
| 2379 |
+
inflections = details.get("inflection_analysis", {})
|
| 2380 |
+
semantics = details.get("semantic_analysis", {})
|
| 2381 |
+
|
| 2382 |
+
# We need to guess the POS keys present
|
| 2383 |
+
all_keys = set([k.split('_')[0] for k in inflections.keys()])
|
| 2384 |
+
|
| 2385 |
+
reconstructed_data = {"analysis": {}}
|
| 2386 |
+
|
| 2387 |
+
for pos in all_keys:
|
| 2388 |
+
entry = {
|
| 2389 |
+
"inflections_wiktionary": inflections.get(f"{pos}_wiktionary"),
|
| 2390 |
+
"inflections_pattern": inflections.get(f"{pos}_pattern"),
|
| 2391 |
+
"semantics_combined": {
|
| 2392 |
+
"lemma": lemma,
|
| 2393 |
+
"wiktionary_senses": [s for s in semantics.get(f"{pos}_senses", []) if s.get('source') == 'wiktionary'],
|
| 2394 |
+
"odenet_senses": [s for s in semantics.get(f"{pos}_senses", []) if s.get('source') == 'oewn'],
|
| 2395 |
+
"conceptnet_relations": semantics.get("conceptnet_relations", [])
|
| 2396 |
+
}
|
| 2397 |
+
}
|
| 2398 |
+
reconstructed_data["analysis"][pos] = [entry]
|
| 2399 |
+
|
| 2400 |
+
html += _format_word_analysis_html(reconstructed_data)
|
| 2401 |
+
html += "</details>"
|
| 2402 |
+
|
| 2403 |
+
return html
|
| 2404 |
+
|
| 2405 |
# ============================================================================
|
| 2406 |
# 8. GRADIO UI CREATION (Adapted for English)
|
| 2407 |
# ============================================================================
|
|
|
|
| 2581 |
analyze_button = gr.Button("Run Comprehensive Analysis", variant="primary")
|
| 2582 |
|
| 2583 |
status_output = gr.Markdown(value="", visible=True)
|
| 2584 |
+
# --- CHANGED: Added HTML output ---
|
| 2585 |
+
html_output = gr.HTML(label="Visual Report")
|
| 2586 |
+
json_output = gr.JSON(label="Raw JSON Data")
|
| 2587 |
|
| 2588 |
+
# --- CHANGED: Wrapper to return Status, HTML, and JSON ---
|
| 2589 |
+
def run_analysis_with_status_visual(text, top_n):
|
| 2590 |
try:
|
| 2591 |
status = "🔄 Analyzing..."
|
| 2592 |
+
yield status, "", {} # Clear outputs
|
| 2593 |
+
|
| 2594 |
result = comprehensive_english_analysis(text, top_n)
|
| 2595 |
+
|
| 2596 |
+
# Generate HTML
|
| 2597 |
+
html = _format_comprehensive_html(result)
|
| 2598 |
+
|
| 2599 |
status = f"✅ Analysis complete! Found {len(result.get('lemma_deep_dive', {}))} lemmas."
|
| 2600 |
+
yield status, html, result
|
| 2601 |
except Exception as e:
|
| 2602 |
error_status = f"❌ Error: {str(e)}"
|
| 2603 |
+
yield error_status, f"<div style='color:red'>{str(e)}</div>", {"error": str(e), "traceback": traceback.format_exc()}
|
| 2604 |
|
| 2605 |
analyze_button.click(
|
| 2606 |
+
fn=run_analysis_with_status_visual,
|
| 2607 |
inputs=[text_input, top_n_number],
|
| 2608 |
+
outputs=[status_output, html_output, json_output],
|
| 2609 |
api_name="comprehensive_analysis"
|
| 2610 |
)
|
| 2611 |
|
| 2612 |
gr.Examples(
|
| 2613 |
+
[["The cat sleeps on the table.", 3]],
|
|
|
|
|
|
|
|
|
|
| 2614 |
inputs=[text_input, top_n_number],
|
| 2615 |
+
outputs=[status_output, html_output, json_output],
|
| 2616 |
+
fn=run_analysis_with_status_visual,
|
| 2617 |
cache_examples=False
|
| 2618 |
)
|
| 2619 |
|
|
|
|
| 2635 |
)
|
| 2636 |
|
| 2637 |
engine_radio = gr.Radio(
|
| 2638 |
+
label="Select Analysis Engine",
|
| 2639 |
choices=[
|
| 2640 |
("Wiktionary (Default)", "wiktionary"),
|
| 2641 |
("HanTa (EN)", "hanta"),
|
|
|
|
| 2649 |
|
| 2650 |
analyze_button = gr.Button("Analyze Word", variant="primary")
|
| 2651 |
|
| 2652 |
+
# --- CHANGED: Added HTML output component ---
|
| 2653 |
+
html_output = gr.HTML(label="Visual Report")
|
| 2654 |
+
json_output = gr.JSON(label="Raw JSON Data")
|
| 2655 |
|
| 2656 |
+
# --- CHANGED: Wrapper function to return both HTML and JSON ---
|
| 2657 |
+
def run_word_visual(word, top_n, engine):
|
| 2658 |
+
data = analyze_word_encyclopedia(word, top_n, engine, 'en')
|
| 2659 |
+
html = _format_word_analysis_html(data)
|
| 2660 |
+
return html, data
|
| 2661 |
+
|
| 2662 |
analyze_button.click(
|
| 2663 |
+
fn=run_word_visual, # Use wrapper
|
| 2664 |
inputs=[word_input, top_n_number, engine_radio],
|
| 2665 |
+
outputs=[html_output, json_output], # Output to both
|
| 2666 |
api_name="analyze_word"
|
| 2667 |
)
|
| 2668 |
|
| 2669 |
gr.Examples(
|
| 2670 |
+
[["run", 3, "wiktionary"], ["water", 0, "wiktionary"]],
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2671 |
inputs=[word_input, top_n_number, engine_radio],
|
| 2672 |
+
outputs=[html_output, json_output],
|
| 2673 |
+
fn=run_word_visual,
|
| 2674 |
cache_examples=False
|
| 2675 |
)
|
| 2676 |
|