Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import sys | |
| from pathlib import Path | |
| import gradio as gr | |
| # Ensure the 'src' directory is in the Python path so we can import our local package. | |
| SRC = Path(__file__).resolve().parent / "src" | |
| if str(SRC) not in sys.path: | |
| sys.path.insert(0, str(SRC)) | |
| from etymology_tagger.predict import EtymologyPredictor | |
| # Custom CSS for the Gradio interface. | |
| # We use CSS Grid and Flexbox for a responsive, research-grade layout. | |
| CSS = """ | |
| .legend { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(210px, 1fr)); | |
| column-gap: 24px; | |
| row-gap: 8px; | |
| margin: 8px 0 14px; | |
| align-items: center; | |
| } | |
| .legend-item { | |
| display: grid; | |
| grid-template-columns: 14px 1fr; | |
| align-items: center; | |
| column-gap: 8px; | |
| font-size: 13px; | |
| line-height: 1.25; | |
| color: #111827; | |
| } | |
| .legend-note { | |
| margin: -4px 0 12px; | |
| color: #6b7280; | |
| font-size: 12px; | |
| } | |
| .swatch { width: 12px; height: 12px; border-radius: 2px; display: inline-block; } | |
| .tagged-output { | |
| line-height: 1.8; | |
| font-size: 16px; | |
| min-height: 132px; | |
| border: 1px solid #d5d8de; | |
| border-radius: 6px; | |
| padding: 12px; | |
| white-space: pre-wrap; | |
| } | |
| .etym-word { | |
| display: inline !important; | |
| border-bottom: 2px solid color-mix(in srgb, var(--language-color) 42%, transparent); | |
| border-radius: 3px; | |
| cursor: pointer; | |
| padding: 0 1px; | |
| transition: color 120ms ease, background-color 120ms ease, font-weight 120ms ease; | |
| } | |
| .etym-word:hover, | |
| .etym-word:focus { | |
| color: var(--language-color) !important; | |
| background: color-mix(in srgb, var(--language-color) 12%, transparent); | |
| font-weight: 700; | |
| outline: none; | |
| } | |
| .breakdown-stack { | |
| margin-top: 12px; | |
| } | |
| .breakdown-panel { | |
| display: none; | |
| min-height: 160px; | |
| white-space: pre-wrap; | |
| border: 1px solid #d5d8de; | |
| border-radius: 6px; | |
| padding: 12px; | |
| line-height: 1.45; | |
| font-size: 14px; | |
| text-align: left; | |
| } | |
| .breakdown-placeholder { | |
| min-height: 80px; | |
| border: 1px dashed #d5d8de; | |
| border-radius: 6px; | |
| padding: 12px; | |
| font-size: 14px; | |
| } | |
| .eval-section { | |
| margin-top: 32px; | |
| padding-top: 24px; | |
| border-top: 1px solid #e5e7eb; | |
| } | |
| .eval-table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-size: 13px; | |
| color: #ffffff; | |
| background: #111827; | |
| border-radius: 8px; | |
| overflow: hidden; | |
| box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.2); | |
| } | |
| .eval-table th { | |
| background: #1f2937; | |
| font-weight: 600; | |
| text-align: left; | |
| padding: 10px 16px; | |
| border-bottom: 1px solid #374151; | |
| color: #ffffff; | |
| } | |
| .eval-table td { | |
| padding: 10px 16px; | |
| border-bottom: 1px solid #1f2937; | |
| color: #f3f4f6; | |
| } | |
| .eval-table tr:last-child td { | |
| border-bottom: none; | |
| } | |
| .eval-title { | |
| font-size: 15px; | |
| font-weight: 600; | |
| margin-bottom: 12px; | |
| color: var(--body-text-color, #111827); | |
| } | |
| """ | |
| # Global predictor instance | |
| predictor = EtymologyPredictor() | |
| def legend_html() -> str: | |
| """Generates the color legend for the UI based on model labels.""" | |
| items = [] | |
| for language, color in predictor.language_colors.items(): | |
| frequency = predictor.language_frequencies.get(language, 0.0) | |
| items.append( | |
| f"<span class='legend-item'><span class='swatch' style='background:{color}'></span>" | |
| f"<span>{language} ({frequency:.2f}%)</span></span>" | |
| ) | |
| return ( | |
| "<div class='legend'>" | |
| + "".join(items) | |
| + "</div><div class='legend-note'>Labels can overlap. Percentages are based on word types in the training vocabulary.</div>" | |
| ) | |
| def evaluation_html() -> str: | |
| """Displays the model's test-set performance metrics in an HTML table.""" | |
| eval_data = predictor.metadata.get("evaluation", {}) | |
| if not eval_data: | |
| return "" | |
| rows = [] | |
| for head in ["source_language", "source_mechanism"]: | |
| m = eval_data.get(head, {}) | |
| name = "Source Language" if "language" in head else "Entry Mechanism" | |
| rows.append( | |
| f"<tr>" | |
| f"<td>{name}</td>" | |
| f"<td>{m.get('precision', 0):.4f}</td>" | |
| f"<td>{m.get('recall', 0):.4f}</td>" | |
| f"<td>{m.get('f1', 0):.4f}</td>" | |
| f"</tr>" | |
| ) | |
| return ( | |
| "<div class='eval-section'>" | |
| "<div class='eval-title'>Model Performance (Held-out Test Set)</div>" | |
| "<table class='eval-table'>" | |
| "<thead><tr><th>Component</th><th>Precision</th><th>Recall</th><th>F1 Score</th></tr></thead>" | |
| "<tbody>" + "".join(rows) + "</tbody>" | |
| "</table>" | |
| "</div>" | |
| ) | |
| def tag_text(text: str) -> str: | |
| """Gradio handler: Takes input text and returns interactive annotated HTML.""" | |
| if not text.strip(): | |
| return "<div class='etag-result'><div class='tagged-output'></div></div>" | |
| return ( | |
| "<div class='etag-result'>" | |
| + legend_html() | |
| + predictor.annotate_html(text) | |
| + "</div>" | |
| ) | |
| # JavaScript snippet to handle the interactive side-panel switching | |
| # when a user clicks on a word. | |
| JS = """ | |
| function showPanel(id) { | |
| document.querySelectorAll('.breakdown-panel').forEach(p => p.style.display = 'none'); | |
| const placeholder = document.querySelector('.breakdown-placeholder'); | |
| if(placeholder) placeholder.style.display = 'none'; | |
| const panel = document.getElementById(id); | |
| if(panel) panel.style.display = 'block'; | |
| } | |
| """ | |
| # Build the Gradio interface | |
| with gr.Blocks(css=CSS, js=JS, title="English Etymology Tagger") as demo: | |
| gr.Markdown("# English Etymology Tagger") | |
| gr.Markdown( | |
| "Automated etymological analysis using a **Multi-Task Neural Network**. " | |
| "Type a sentence below and click on any word to see its predicted origin path." | |
| ) | |
| text = gr.Textbox( | |
| label="Input Text", | |
| lines=4, | |
| placeholder="Enter English text here...", | |
| value="The berserk corgi said 'tycoon' from the jungle as the cosmonaut sought chaos with an avocado.", | |
| ) | |
| button = gr.Button("Analyze Etymology", variant="primary") | |
| output = gr.HTML(label="Interactive Visualization") | |
| # Display the performance metrics at the bottom | |
| gr.HTML(evaluation_html()) | |
| # Event wiring | |
| button.click(tag_text, inputs=[text], outputs=output) | |
| text.submit(tag_text, inputs=[text], outputs=output) | |
| demo.load(tag_text, inputs=[text], outputs=output) | |
| if __name__ == "__main__": | |
| demo.launch() | |