Spaces:
Sleeping
Sleeping
pretty output
Browse files
app.py
CHANGED
|
@@ -3278,6 +3278,332 @@ def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engin
|
|
| 3278 |
}
|
| 3279 |
|
| 3280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3281 |
# ============================================================================
|
| 3282 |
# 8. GRADIO UI CREATION
|
| 3283 |
# ============================================================================
|
|
@@ -3417,47 +3743,46 @@ def create_conceptnet_tab():
|
|
| 3417 |
|
| 3418 |
def create_combined_tab():
|
| 3419 |
"""Creates the UI for the CONTEXTUAL Comprehensive Analyzer tab."""
|
| 3420 |
-
gr.Markdown("# 🚀
|
| 3421 |
-
gr.Markdown("
|
| 3422 |
with gr.Column():
|
| 3423 |
text_input = gr.Textbox(
|
| 3424 |
-
label="
|
| 3425 |
-
placeholder="
|
| 3426 |
lines=5
|
| 3427 |
)
|
| 3428 |
top_n_number = gr.Number(
|
| 3429 |
-
label="Limit
|
| 3430 |
-
value=0,
|
| 3431 |
-
step=1,
|
| 3432 |
-
minimum=0,
|
| 3433 |
-
interactive=True
|
| 3434 |
)
|
| 3435 |
-
analyze_button = gr.Button("
|
| 3436 |
|
| 3437 |
-
# *** ADD STATUS OUTPUT ***
|
| 3438 |
status_output = gr.Markdown(value="", visible=True)
|
| 3439 |
-
|
|
|
|
|
|
|
| 3440 |
|
| 3441 |
-
|
| 3442 |
-
def run_analysis_with_status(text, top_n):
|
| 3443 |
try:
|
| 3444 |
-
status = "🔄
|
| 3445 |
-
yield status, {}
|
| 3446 |
|
| 3447 |
result = comprehensive_german_analysis(text, top_n)
|
| 3448 |
|
| 3449 |
-
|
| 3450 |
-
|
|
|
|
|
|
|
|
|
|
| 3451 |
|
| 3452 |
except Exception as e:
|
| 3453 |
-
error_status = f"❌
|
| 3454 |
-
|
| 3455 |
-
yield error_status, error_result
|
| 3456 |
|
| 3457 |
analyze_button.click(
|
| 3458 |
-
fn=
|
| 3459 |
inputs=[text_input, top_n_number],
|
| 3460 |
-
outputs=[status_output,
|
| 3461 |
api_name="comprehensive_analysis"
|
| 3462 |
)
|
| 3463 |
|
|
@@ -3465,56 +3790,57 @@ def create_combined_tab():
|
|
| 3465 |
[["Die Katze schlafen auf dem Tisch.", 3],
|
| 3466 |
["Das ist ein Huas.", 0],
|
| 3467 |
["Ich laufe schnell.", 3],
|
| 3468 |
-
["Der Gärtner pflanzt einen Baum.", 5],
|
| 3469 |
-
["Ich fahre an den See.", 3]],
|
| 3470 |
inputs=[text_input, top_n_number],
|
| 3471 |
-
outputs=[status_output,
|
| 3472 |
-
fn=
|
| 3473 |
cache_examples=False
|
| 3474 |
)
|
| 3475 |
|
| 3476 |
def create_word_encyclopedia_tab():
|
| 3477 |
"""--- UI for the NON-CONTEXTUAL Word Analyzer tab ---"""
|
| 3478 |
-
gr.Markdown("# 📖
|
| 3479 |
-
gr.Markdown("
|
| 3480 |
|
| 3481 |
with gr.Column():
|
| 3482 |
word_input = gr.Textbox(
|
| 3483 |
-
label="
|
| 3484 |
-
placeholder="
|
| 3485 |
)
|
| 3486 |
|
| 3487 |
with gr.Row():
|
| 3488 |
top_n_number = gr.Number(
|
| 3489 |
-
label="Limit
|
| 3490 |
-
value=0,
|
| 3491 |
-
step=1,
|
| 3492 |
-
minimum=0,
|
| 3493 |
-
interactive=True
|
| 3494 |
)
|
| 3495 |
|
| 3496 |
-
# --- ADD DWDSMOR TO THE RADIO BUTTONS ---
|
| 3497 |
engine_radio = gr.Radio(
|
| 3498 |
-
label="
|
| 3499 |
choices=[
|
| 3500 |
-
("Wiktionary (
|
| 3501 |
-
("DWDSmor (
|
| 3502 |
("HanTa (Fallback 2)", "hanta"),
|
| 3503 |
("IWNLP (Fallback 3)", "iwnlp")
|
| 3504 |
],
|
| 3505 |
value="wiktionary",
|
| 3506 |
interactive=True
|
| 3507 |
)
|
| 3508 |
-
# --- END OF CHANGE ---
|
| 3509 |
|
| 3510 |
-
analyze_button = gr.Button("
|
| 3511 |
|
| 3512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3513 |
|
| 3514 |
analyze_button.click(
|
| 3515 |
-
fn=
|
| 3516 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3517 |
-
outputs=[
|
| 3518 |
api_name="analyze_word"
|
| 3519 |
)
|
| 3520 |
|
|
@@ -3522,11 +3848,10 @@ def create_word_encyclopedia_tab():
|
|
| 3522 |
[["Lauf", 3, "wiktionary"],
|
| 3523 |
["See", 0, "wiktionary"],
|
| 3524 |
["schnell", 3, "wiktionary"],
|
| 3525 |
-
["
|
| 3526 |
-
["gebildet", 0, "dwdsmor"]], # Example to show the new engine
|
| 3527 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3528 |
-
outputs=[
|
| 3529 |
-
fn=
|
| 3530 |
cache_examples=False
|
| 3531 |
)
|
| 3532 |
|
|
|
|
| 3278 |
}
|
| 3279 |
|
| 3280 |
|
| 3281 |
+
# ============================================================================
|
| 3282 |
+
# 7.5 VISUALIZATION & HTML HELPERS (DE)
|
| 3283 |
+
# ============================================================================
|
| 3284 |
+
|
| 3285 |
+
HTML_CSS = """
|
| 3286 |
+
<style>
|
| 3287 |
+
/* Card Container - High Contrast */
|
| 3288 |
+
.ling-card {
|
| 3289 |
+
font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
|
| 3290 |
+
border: 1px solid #d1d5db;
|
| 3291 |
+
border-radius: 8px;
|
| 3292 |
+
padding: 20px;
|
| 3293 |
+
margin-bottom: 20px;
|
| 3294 |
+
background: #ffffff;
|
| 3295 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
| 3296 |
+
}
|
| 3297 |
+
|
| 3298 |
+
/* Header Section */
|
| 3299 |
+
.ling-header {
|
| 3300 |
+
display: flex;
|
| 3301 |
+
align-items: center;
|
| 3302 |
+
margin-bottom: 15px;
|
| 3303 |
+
border-bottom: 2px solid #e5e7eb;
|
| 3304 |
+
padding-bottom: 10px;
|
| 3305 |
+
}
|
| 3306 |
+
.ling-lemma {
|
| 3307 |
+
font-size: 1.8em;
|
| 3308 |
+
font-weight: 800;
|
| 3309 |
+
color: #111827;
|
| 3310 |
+
margin-right: 12px;
|
| 3311 |
+
}
|
| 3312 |
+
.ling-pos {
|
| 3313 |
+
font-size: 0.85em;
|
| 3314 |
+
text-transform: uppercase;
|
| 3315 |
+
font-weight: 700;
|
| 3316 |
+
padding: 4px 10px;
|
| 3317 |
+
border-radius: 6px;
|
| 3318 |
+
color: #fff;
|
| 3319 |
+
letter-spacing: 0.05em;
|
| 3320 |
+
}
|
| 3321 |
+
|
| 3322 |
+
/* POS Colors */
|
| 3323 |
+
.pos-noun { background-color: #2563eb; } /* Blue */
|
| 3324 |
+
.pos-verb { background-color: #059669; } /* Green */
|
| 3325 |
+
.pos-adj { background-color: #d97706; } /* Amber */
|
| 3326 |
+
.pos-adv { background-color: #7c3aed; } /* Purple */
|
| 3327 |
+
.pos-name { background-color: #db2777; } /* Pink */
|
| 3328 |
+
.pos-other { background-color: #4b5563; } /* Gray */
|
| 3329 |
+
|
| 3330 |
+
/* Section Headers */
|
| 3331 |
+
.ling-section { margin-top: 15px; }
|
| 3332 |
+
.ling-subtitle {
|
| 3333 |
+
font-size: 0.85em;
|
| 3334 |
+
font-weight: 700;
|
| 3335 |
+
color: #374151;
|
| 3336 |
+
text-transform: uppercase;
|
| 3337 |
+
margin-bottom: 8px;
|
| 3338 |
+
border-left: 4px solid #3b82f6;
|
| 3339 |
+
padding-left: 8px;
|
| 3340 |
+
}
|
| 3341 |
+
|
| 3342 |
+
/* Tables */
|
| 3343 |
+
.inflection-table { width: 100%; font-size: 0.95em; border-collapse: collapse; margin-bottom: 10px; }
|
| 3344 |
+
.inflection-table td { padding: 6px 10px; border-bottom: 1px solid #e5e7eb; color: #1f2937; }
|
| 3345 |
+
.inflection-label { color: #6b7280; font-weight: 600; width: 35%; background: #f9fafb; }
|
| 3346 |
+
|
| 3347 |
+
/* Senses */
|
| 3348 |
+
.sense-item { margin-bottom: 8px; line-height: 1.5; font-size: 1em; color: #1f2937; }
|
| 3349 |
+
.source-badge {
|
| 3350 |
+
display: inline-block; font-size: 0.75em; font-weight: bold; padding: 2px 6px;
|
| 3351 |
+
border-radius: 4px; border: 1px solid; margin-right: 8px; vertical-align: middle;
|
| 3352 |
+
}
|
| 3353 |
+
.src-wikt { background: #fff1f2; color: #9f1239; border-color: #fda4af; }
|
| 3354 |
+
.src-oewn { background: #eff6ff; color: #1e40af; border-color: #93c5fd; }
|
| 3355 |
+
|
| 3356 |
+
/* Relations Chips */
|
| 3357 |
+
.rel-chip {
|
| 3358 |
+
display: inline-block;
|
| 3359 |
+
background: #f3f4f6;
|
| 3360 |
+
color: #1f2937;
|
| 3361 |
+
padding: 4px 10px;
|
| 3362 |
+
border-radius: 15px;
|
| 3363 |
+
font-size: 0.9em;
|
| 3364 |
+
margin: 3px;
|
| 3365 |
+
border: 1px solid #d1d5db;
|
| 3366 |
+
font-weight: 500;
|
| 3367 |
+
}
|
| 3368 |
+
.rel-type { color: #6b7280; font-size: 0.8em; margin-right: 4px; font-weight: 700; text-transform: lowercase;}
|
| 3369 |
+
|
| 3370 |
+
/* Collapsible */
|
| 3371 |
+
.kg-details > summary {
|
| 3372 |
+
cursor: pointer; color: #2563eb; font-size: 0.9em; font-weight: 600;
|
| 3373 |
+
margin-top: 10px; padding: 6px; border-radius: 4px; width: fit-content;
|
| 3374 |
+
}
|
| 3375 |
+
.kg-details > summary:hover { text-decoration: underline; background: #eff6ff; }
|
| 3376 |
+
.kg-content { margin-top: 10px; padding: 10px; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb; }
|
| 3377 |
+
|
| 3378 |
+
/* Alert Banners */
|
| 3379 |
+
.grammar-alert { padding: 10px; border-radius: 6px; margin-bottom: 15px; border: 1px solid; }
|
| 3380 |
+
.alert-green { background: #ecfdf5; color: #065f46; border-color: #a7f3d0; }
|
| 3381 |
+
.alert-red { background: #fef2f2; color: #991b1b; border-color: #fecaca; }
|
| 3382 |
+
</style>
|
| 3383 |
+
"""
|
| 3384 |
+
|
| 3385 |
+
def _format_word_analysis_html(data: Dict[str, Any]) -> str:
|
| 3386 |
+
""" Generates HTML for a single word analysis result (German version). """
|
| 3387 |
+
if not data or "analysis" not in data:
|
| 3388 |
+
return f"{HTML_CSS}<div class='ling-card'>Keine Daten verfügbar. {data.get('info', '')}</div>"
|
| 3389 |
+
|
| 3390 |
+
html = HTML_CSS
|
| 3391 |
+
analysis = data["analysis"]
|
| 3392 |
+
|
| 3393 |
+
# Iterate over POS
|
| 3394 |
+
for pos_key, entries in analysis.items():
|
| 3395 |
+
if not entries: continue
|
| 3396 |
+
entry = entries[0] # Take best candidate
|
| 3397 |
+
|
| 3398 |
+
# --- POS Display Logic ---
|
| 3399 |
+
display_pos = pos_key.upper()
|
| 3400 |
+
css_class = "pos-other"
|
| 3401 |
+
|
| 3402 |
+
if pos_key == 'noun':
|
| 3403 |
+
css_class = "pos-noun"
|
| 3404 |
+
display_pos = "SUBSTANTIV"
|
| 3405 |
+
elif pos_key == 'verb':
|
| 3406 |
+
css_class = "pos-verb"
|
| 3407 |
+
display_pos = "VERB"
|
| 3408 |
+
elif pos_key == 'adj' or pos_key == 'adjective':
|
| 3409 |
+
css_class = "pos-adj"
|
| 3410 |
+
display_pos = "ADJEKTIV"
|
| 3411 |
+
elif pos_key == 'adv' or pos_key == 'adverb':
|
| 3412 |
+
css_class = "pos-adv"
|
| 3413 |
+
display_pos = "ADVERB"
|
| 3414 |
+
|
| 3415 |
+
# Data Extraction
|
| 3416 |
+
inf_wikt = entry.get("inflections_wiktionary") or {}
|
| 3417 |
+
inf_pat = entry.get("inflections_pattern") or {}
|
| 3418 |
+
sem_comb = entry.get("semantics_combined") or {}
|
| 3419 |
+
|
| 3420 |
+
lemma = inf_wikt.get("base_form") or \
|
| 3421 |
+
inf_pat.get("base_form") or \
|
| 3422 |
+
sem_comb.get("lemma") or \
|
| 3423 |
+
data.get("input_word") or "?"
|
| 3424 |
+
|
| 3425 |
+
# --- CARD START ---
|
| 3426 |
+
html += f"""
|
| 3427 |
+
<div class="ling-card">
|
| 3428 |
+
<div class="ling-header">
|
| 3429 |
+
<span class="ling-lemma">{lemma}</span>
|
| 3430 |
+
<span class="ling-pos {css_class}">{display_pos}</span>
|
| 3431 |
+
</div>
|
| 3432 |
+
"""
|
| 3433 |
+
|
| 3434 |
+
# --- Inflections Section (Pattern.de logic) ---
|
| 3435 |
+
html += "<div class='ling-section'><div class='ling-subtitle'>Morphologie & Flexion</div>"
|
| 3436 |
+
html += "<table class='inflection-table'>"
|
| 3437 |
+
|
| 3438 |
+
has_pattern_data = bool(inf_pat) and "error" not in inf_pat
|
| 3439 |
+
|
| 3440 |
+
if pos_key == 'noun':
|
| 3441 |
+
# Pattern.de returns 'declension' or 'declension_by_gender'
|
| 3442 |
+
decl = inf_pat.get('declension')
|
| 3443 |
+
# Fallback if declension is inside gender key
|
| 3444 |
+
if not decl and inf_pat.get('declension_by_gender'):
|
| 3445 |
+
first_gender = list(inf_pat['declension_by_gender'].keys())[0]
|
| 3446 |
+
decl = inf_pat['declension_by_gender'][first_gender]
|
| 3447 |
+
|
| 3448 |
+
if decl:
|
| 3449 |
+
# Extract singular/plural nominative for concise display
|
| 3450 |
+
nom_sg = decl.get('Nominativ Singular', {}).get('bare', '-')
|
| 3451 |
+
nom_pl = decl.get('Nominativ Plural', {}).get('bare', '-')
|
| 3452 |
+
gen_sg = decl.get('Genitiv Singular', {}).get('bare', '-')
|
| 3453 |
+
|
| 3454 |
+
html += f"<tr><td class='inflection-label'>Singular (Nom)</td><td>{nom_sg}</td></tr>"
|
| 3455 |
+
html += f"<tr><td class='inflection-label'>Plural (Nom)</td><td>{nom_pl}</td></tr>"
|
| 3456 |
+
html += f"<tr><td class='inflection-label'>Genitiv (Sg)</td><td>{gen_sg}</td></tr>"
|
| 3457 |
+
|
| 3458 |
+
gender = inf_pat.get('gender', 'Unknown')
|
| 3459 |
+
html += f"<tr><td class='inflection-label'>Genus</td><td>{gender}</td></tr>"
|
| 3460 |
+
else:
|
| 3461 |
+
html += f"<tr><td colspan='2'><i>Keine Flexionsdaten gefunden.</i></td></tr>"
|
| 3462 |
+
|
| 3463 |
+
elif pos_key == 'verb':
|
| 3464 |
+
cj = inf_pat.get('conjugation') or {}
|
| 3465 |
+
pres = cj.get('Präsens') or {}
|
| 3466 |
+
past = cj.get('Präteritum') or {}
|
| 3467 |
+
parts = inf_pat.get('participles') or {}
|
| 3468 |
+
|
| 3469 |
+
html += f"<tr><td class='inflection-label'>Infinitiv</td><td>{inf_pat.get('infinitive', lemma)}</td></tr>"
|
| 3470 |
+
html += f"<tr><td class='inflection-label'>3. Pers. Sg. (er/sie)</td><td>{pres.get('er/sie/es', '-')}</td></tr>"
|
| 3471 |
+
html += f"<tr><td class='inflection-label'>Präteritum (ich)</td><td>{past.get('ich', '-')}</td></tr>"
|
| 3472 |
+
html += f"<tr><td class='inflection-label'>Partizip II</td><td>{parts.get('Partizip Perfekt', '-')}</td></tr>"
|
| 3473 |
+
|
| 3474 |
+
elif pos_key in ['adjective', 'adj']:
|
| 3475 |
+
html += f"<tr><td class='inflection-label'>Positiv</td><td>{inf_pat.get('predicative', lemma)}</td></tr>"
|
| 3476 |
+
html += f"<tr><td class='inflection-label'>Komparativ</td><td>{inf_pat.get('comparative', '-')}</td></tr>"
|
| 3477 |
+
html += f"<tr><td class='inflection-label'>Superlativ</td><td>{inf_pat.get('superlative', '-')}</td></tr>"
|
| 3478 |
+
|
| 3479 |
+
# Wiktionary Forms (The "Other Forms" box)
|
| 3480 |
+
forms_list = inf_wikt.get("forms_list") or []
|
| 3481 |
+
if forms_list:
|
| 3482 |
+
forms_str_list = []
|
| 3483 |
+
for f in forms_list[:8]: # Show up to 8 forms
|
| 3484 |
+
txt = f.get('form_text')
|
| 3485 |
+
if txt: forms_str_list.append(txt)
|
| 3486 |
+
|
| 3487 |
+
if forms_str_list:
|
| 3488 |
+
html += f"<tr><td class='inflection-label'>Weitere Formen (DB)</td><td>{', '.join(forms_str_list)}</td></tr>"
|
| 3489 |
+
|
| 3490 |
+
html += "</table></div>"
|
| 3491 |
+
|
| 3492 |
+
# --- Semantics Section ---
|
| 3493 |
+
html += "<div class='ling-section'><div class='ling-subtitle'>Bedeutungen & Definitionen</div>"
|
| 3494 |
+
|
| 3495 |
+
wikt_senses = sem_comb.get("wiktionary_senses") or []
|
| 3496 |
+
ode_senses = sem_comb.get("odenet_senses") or []
|
| 3497 |
+
|
| 3498 |
+
if not wikt_senses and not ode_senses:
|
| 3499 |
+
html += "<div class='sense-item'><i>Keine Definitionen gefunden.</i></div>"
|
| 3500 |
+
|
| 3501 |
+
for s in wikt_senses[:3]:
|
| 3502 |
+
gloss_raw = s.get("definition") or ""
|
| 3503 |
+
gloss = str(gloss_raw).replace(";", "<br>")
|
| 3504 |
+
if gloss:
|
| 3505 |
+
html += f"<div class='sense-item'><span class='source-badge src-wikt'>Wikt</span> {gloss}</div>"
|
| 3506 |
+
|
| 3507 |
+
for s in ode_senses[:3]:
|
| 3508 |
+
defi = s.get("definition") or ""
|
| 3509 |
+
if defi:
|
| 3510 |
+
html += f"<div class='sense-item'><span class='source-badge src-oewn'>OdeNet</span> {defi}</div>"
|
| 3511 |
+
|
| 3512 |
+
html += "</div>"
|
| 3513 |
+
|
| 3514 |
+
# --- Relations Section ---
|
| 3515 |
+
rels = sem_comb.get("conceptnet_relations") or []
|
| 3516 |
+
if rels:
|
| 3517 |
+
html += "<div class='ling-section'><div class='ling-subtitle'>Wissensgraph (Kontext)</div>"
|
| 3518 |
+
|
| 3519 |
+
top_n_rels = 6
|
| 3520 |
+
visible_rels = rels[:top_n_rels]
|
| 3521 |
+
hidden_rels = rels[top_n_rels:]
|
| 3522 |
+
|
| 3523 |
+
def render_rel(r):
|
| 3524 |
+
rel_name = r.get("relation", "Rel")
|
| 3525 |
+
target = r.get("other_node") or "?"
|
| 3526 |
+
if target == "?" and "surface" in r:
|
| 3527 |
+
parts = str(r["surface"]).split()
|
| 3528 |
+
if len(parts) > 2: target = parts[-1]
|
| 3529 |
+
return f"<span class='rel-chip'><span class='rel-type'>{rel_name}:</span> {target}</span>"
|
| 3530 |
+
|
| 3531 |
+
html += "<div>"
|
| 3532 |
+
for r in visible_rels:
|
| 3533 |
+
html += render_rel(r)
|
| 3534 |
+
html += "</div>"
|
| 3535 |
+
|
| 3536 |
+
if hidden_rels:
|
| 3537 |
+
html += f"""
|
| 3538 |
+
<details class='kg-details'>
|
| 3539 |
+
<summary>Zeige {len(hidden_rels)} weitere Relationen</summary>
|
| 3540 |
+
<div class='kg-content'>
|
| 3541 |
+
"""
|
| 3542 |
+
for r in hidden_rels:
|
| 3543 |
+
html += render_rel(r)
|
| 3544 |
+
html += "</div></details>"
|
| 3545 |
+
|
| 3546 |
+
html += "</div>"
|
| 3547 |
+
|
| 3548 |
+
html += "</div>" # End Card
|
| 3549 |
+
|
| 3550 |
+
return html
|
| 3551 |
+
|
| 3552 |
+
def _format_comprehensive_html(data: Dict[str, Any]) -> str:
|
| 3553 |
+
""" Generates HTML for the comprehensive sentence analysis. """
|
| 3554 |
+
if "error" in data:
|
| 3555 |
+
return f"<div style='color:red'>{data['error']}</div>"
|
| 3556 |
+
|
| 3557 |
+
html = HTML_CSS
|
| 3558 |
+
|
| 3559 |
+
# 1. Grammar Check Banner
|
| 3560 |
+
gc = data.get("grammar_check", [])
|
| 3561 |
+
if isinstance(gc, list) and len(gc) == 1 and gc[0].get("status") == "perfect":
|
| 3562 |
+
html += "<div class='grammar-alert alert-green'><strong>✓ Grammatikprüfung:</strong> Keine offensichtlichen Fehler gefunden.</div>"
|
| 3563 |
+
elif isinstance(gc, list) and gc:
|
| 3564 |
+
html += "<div class='grammar-alert alert-red'><strong>⚠ Grammatik-Hinweise:</strong><br>"
|
| 3565 |
+
for err in gc:
|
| 3566 |
+
msg = err.get("message", "Fehler")
|
| 3567 |
+
bad = err.get("incorrect_text", "")
|
| 3568 |
+
html += f"• {msg} (in: '<em>{bad}</em>')<br>"
|
| 3569 |
+
html += "</div>"
|
| 3570 |
+
|
| 3571 |
+
# 2. Lemma Deep Dive Accordion
|
| 3572 |
+
deep_dive = data.get("lemma_deep_dive", {})
|
| 3573 |
+
if not deep_dive:
|
| 3574 |
+
html += "<p>Keine Tiefenanalyse verfügbar.</p>"
|
| 3575 |
+
else:
|
| 3576 |
+
html += "<h3>Wort-für-Wort Analyse</h3>"
|
| 3577 |
+
for lemma, details in deep_dive.items():
|
| 3578 |
+
# Reconstruct a simplified data structure to reuse the word-formatter
|
| 3579 |
+
html += f"<details><summary>{lemma}</summary>"
|
| 3580 |
+
|
| 3581 |
+
inflections = details.get("inflection_analysis", {})
|
| 3582 |
+
semantics = details.get("semantic_analysis", {})
|
| 3583 |
+
|
| 3584 |
+
# Guess the POS keys present
|
| 3585 |
+
all_keys = set([k.split('_')[0] for k in inflections.keys()])
|
| 3586 |
+
|
| 3587 |
+
reconstructed_data = {"analysis": {}}
|
| 3588 |
+
|
| 3589 |
+
for pos in all_keys:
|
| 3590 |
+
entry = {
|
| 3591 |
+
"inflections_wiktionary": inflections.get(f"{pos}_wiktionary"),
|
| 3592 |
+
"inflections_pattern": inflections.get(f"{pos}_pattern"),
|
| 3593 |
+
"semantics_combined": {
|
| 3594 |
+
"lemma": lemma,
|
| 3595 |
+
"wiktionary_senses": [s for s in semantics.get(f"{pos}_senses", []) if s.get('source') == 'wiktionary'],
|
| 3596 |
+
"odenet_senses": [s for s in semantics.get(f"{pos}_senses", []) if s.get('source') == 'odenet'],
|
| 3597 |
+
"conceptnet_relations": semantics.get("conceptnet_relations", [])
|
| 3598 |
+
}
|
| 3599 |
+
}
|
| 3600 |
+
reconstructed_data["analysis"][pos] = [entry]
|
| 3601 |
+
|
| 3602 |
+
html += _format_word_analysis_html(reconstructed_data)
|
| 3603 |
+
html += "</details>"
|
| 3604 |
+
|
| 3605 |
+
return html
|
| 3606 |
+
|
| 3607 |
# ============================================================================
|
| 3608 |
# 8. GRADIO UI CREATION
|
| 3609 |
# ============================================================================
|
|
|
|
| 3743 |
|
| 3744 |
def create_combined_tab():
|
| 3745 |
"""Creates the UI for the CONTEXTUAL Comprehensive Analyzer tab."""
|
| 3746 |
+
gr.Markdown("# 🚀 Umfassende Analyse (Kontextuell)")
|
| 3747 |
+
gr.Markdown("Dieses Tool bietet eine tiefe, **lemma-basierte** Analyse *im Kontext*. Es integriert alle Tools und nutzt den **ganzen Satz**, um Bedeutungen nach Relevanz zu sortieren.")
|
| 3748 |
with gr.Column():
|
| 3749 |
text_input = gr.Textbox(
|
| 3750 |
+
label="Deutscher Text",
|
| 3751 |
+
placeholder="z.B., Die schnelle Katze springt über den faulen Hund.",
|
| 3752 |
lines=5
|
| 3753 |
)
|
| 3754 |
top_n_number = gr.Number(
|
| 3755 |
+
label="Limit semantische Bedeutungen pro POS (0 für alle)",
|
| 3756 |
+
value=0, step=1, minimum=0, interactive=True
|
|
|
|
|
|
|
|
|
|
| 3757 |
)
|
| 3758 |
+
analyze_button = gr.Button("Umfassende Analyse starten", variant="primary")
|
| 3759 |
|
|
|
|
| 3760 |
status_output = gr.Markdown(value="", visible=True)
|
| 3761 |
+
# --- NEW: Visual Output ---
|
| 3762 |
+
html_output = gr.HTML(label="Visueller Bericht")
|
| 3763 |
+
json_output = gr.JSON(label="Rohdaten (JSON)")
|
| 3764 |
|
| 3765 |
+
def run_analysis_with_status_visual(text, top_n):
|
|
|
|
| 3766 |
try:
|
| 3767 |
+
status = "🔄 Analyse läuft..."
|
| 3768 |
+
yield status, "", {}
|
| 3769 |
|
| 3770 |
result = comprehensive_german_analysis(text, top_n)
|
| 3771 |
|
| 3772 |
+
# Generate HTML
|
| 3773 |
+
html = _format_comprehensive_html(result)
|
| 3774 |
+
|
| 3775 |
+
status = f"✅ Analyse abgeschlossen! {len(result.get('lemma_deep_dive', {}))} Lemmata analysiert."
|
| 3776 |
+
yield status, html, result
|
| 3777 |
|
| 3778 |
except Exception as e:
|
| 3779 |
+
error_status = f"❌ Fehler: {str(e)}"
|
| 3780 |
+
yield error_status, f"<div style='color:red'>{str(e)}</div>", {"error": str(e), "traceback": traceback.format_exc()}
|
|
|
|
| 3781 |
|
| 3782 |
analyze_button.click(
|
| 3783 |
+
fn=run_analysis_with_status_visual,
|
| 3784 |
inputs=[text_input, top_n_number],
|
| 3785 |
+
outputs=[status_output, html_output, json_output],
|
| 3786 |
api_name="comprehensive_analysis"
|
| 3787 |
)
|
| 3788 |
|
|
|
|
| 3790 |
[["Die Katze schlafen auf dem Tisch.", 3],
|
| 3791 |
["Das ist ein Huas.", 0],
|
| 3792 |
["Ich laufe schnell.", 3],
|
| 3793 |
+
["Der Gärtner pflanzt einen Baum.", 5]],
|
|
|
|
| 3794 |
inputs=[text_input, top_n_number],
|
| 3795 |
+
outputs=[status_output, html_output, json_output],
|
| 3796 |
+
fn=run_analysis_with_status_visual,
|
| 3797 |
cache_examples=False
|
| 3798 |
)
|
| 3799 |
|
| 3800 |
def create_word_encyclopedia_tab():
|
| 3801 |
"""--- UI for the NON-CONTEXTUAL Word Analyzer tab ---"""
|
| 3802 |
+
gr.Markdown("# 📖 Wort-Enzyklopädie (Nicht-Kontextuell)")
|
| 3803 |
+
gr.Markdown("Analysiert ein **einzelnes Wort** auf alle grammatikalischen und semantischen Formen.")
|
| 3804 |
|
| 3805 |
with gr.Column():
|
| 3806 |
word_input = gr.Textbox(
|
| 3807 |
+
label="Einzelnes deutsches Wort",
|
| 3808 |
+
placeholder="z.B., Lauf, See, schnell, heute"
|
| 3809 |
)
|
| 3810 |
|
| 3811 |
with gr.Row():
|
| 3812 |
top_n_number = gr.Number(
|
| 3813 |
+
label="Limit semantische Bedeutungen pro POS (0 für alle)",
|
| 3814 |
+
value=0, step=1, minimum=0, interactive=True
|
|
|
|
|
|
|
|
|
|
| 3815 |
)
|
| 3816 |
|
|
|
|
| 3817 |
engine_radio = gr.Radio(
|
| 3818 |
+
label="Wähle Analyse-Engine (Automatischer Fallback)",
|
| 3819 |
choices=[
|
| 3820 |
+
("Wiktionary (Standard)", "wiktionary"),
|
| 3821 |
+
("DWDSmor (Neu)", "dwdsmor"),
|
| 3822 |
("HanTa (Fallback 2)", "hanta"),
|
| 3823 |
("IWNLP (Fallback 3)", "iwnlp")
|
| 3824 |
],
|
| 3825 |
value="wiktionary",
|
| 3826 |
interactive=True
|
| 3827 |
)
|
|
|
|
| 3828 |
|
| 3829 |
+
analyze_button = gr.Button("Wort analysieren", variant="primary")
|
| 3830 |
|
| 3831 |
+
# --- NEW: Visual Output ---
|
| 3832 |
+
html_output = gr.HTML(label="Visueller Bericht")
|
| 3833 |
+
json_output = gr.JSON(label="Analyse Rohdaten (JSON)")
|
| 3834 |
+
|
| 3835 |
+
def run_word_visual(word, top_n, engine):
|
| 3836 |
+
data = analyze_word_encyclopedia(word, top_n, engine)
|
| 3837 |
+
html = _format_word_analysis_html(data)
|
| 3838 |
+
return html, data
|
| 3839 |
|
| 3840 |
analyze_button.click(
|
| 3841 |
+
fn=run_word_visual,
|
| 3842 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3843 |
+
outputs=[html_output, json_output],
|
| 3844 |
api_name="analyze_word"
|
| 3845 |
)
|
| 3846 |
|
|
|
|
| 3848 |
[["Lauf", 3, "wiktionary"],
|
| 3849 |
["See", 0, "wiktionary"],
|
| 3850 |
["schnell", 3, "wiktionary"],
|
| 3851 |
+
["gebildet", 0, "dwdsmor"]],
|
|
|
|
| 3852 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3853 |
+
outputs=[html_output, json_output],
|
| 3854 |
+
fn=run_word_visual,
|
| 3855 |
cache_examples=False
|
| 3856 |
)
|
| 3857 |
|