Spaces:

ulugbeksalaev
/

uzmorph

Sleeping

File size: 7,098 Bytes

import gradio as gr
import json
import sys
from uzmorph import UzMorph

# Initialize analyzer
analyzer = UzMorph()

# POS filter options
POS_OPTIONS = ["All"] + [
    f"{code}: {desc}" for code, desc in analyzer.POS.DESCRIPTIONS.items()
]

FEATURE_COLUMNS = analyzer.get_features_list()

def analyze_word(word, pos_selection):
    if not word or not word.strip():
        return "Please enter a word.", ""

    word = word.strip().lower()

    # Extract POS filter
    pos_filter = None
    if pos_selection and pos_selection != "All":
        pos_filter = pos_selection.split(":")[0].strip()

    results = analyzer.analyze(word, pos_filter=pos_filter)

    if not results:
        return f"## Results for: `{word}`\n\nNo analysis found.", ""

    # Build markdown output
    md = f"## Results for: `{word}`\n"
    md += f"Found **{len(results)}** variant(s)\n\n"

    for i, r in enumerate(results, 1):
        star = " ⭐ (best match)" if i == 1 else ""
        md += f"### Variant #{i}{star}\n"
        md += "| Field | Value |\n|:---|:---|\n"
        md += f"| **Word** | `{r.get('word', '')}` |\n"
        md += f"| **Stem** | `{r.get('stem', '')}` |\n"
        md += f"| **Lemma** | `{r.get('lemma', '')}` |\n"
        md += f"| **POS** | **{r.get('pos', '')}** |\n"

        if r.get('cse'):
            md += f"| **Suffix (CSE)** | `{r['cse']}` |\n"
        if r.get('cse_formula'):
            md += f"| **CSE Formula** | `{r['cse_formula']}` |\n"

        # Morphological features
        features = []
        skip = {'word', 'stem', 'lemma', 'pos', 'cse', 'cse_formula', 'note', 'ball'}
        for k, v in r.items():
            if k in skip or not v:
                continue
            features.append(f"| {k} | `{v}` |")

        if features:
            md += "\n**Morphological Features:**\n\n"
            md += "| Feature | Value |\n|:---|:---|\n"
            md += "\n".join(features) + "\n"

        if r.get('note'):
            md += f"\n*Note: {r['note']}*\n"
        md += "\n---\n"

    # JSON output
    json_out = json.dumps(results, ensure_ascii=False, indent=2)
    return md, json_out


# ── Theme ──
custom_theme = gr.themes.Soft(
    primary_hue="teal",
    secondary_hue="slate",
    neutral_hue="slate",
    font=gr.themes.GoogleFont("Inter"),
    font_mono=gr.themes.GoogleFont("JetBrains Mono"),
)

with gr.Blocks(
    title="UzMorph — Uzbek Morphological Analyzer",
    theme=custom_theme,
    css=".gradio-container { max-width: 1100px; margin: auto; } footer { display: none !important; }"
) as demo:
    gr.Markdown(
        "# UzMorph — Uzbek Morphological Analyzer using Complete Set of Ending\n"
        "Analyze Uzbek words using **Complete Set of Endings (CSE)** rules and an extensive lexicon (~122k stems).  \n"
        'Scientific Base: <a href="https://www.scopus.com/pages/publications/85212084325" target="_blank">Scopus Article</a> | '
        'Neural Model Version: <a href="https://huggingface.co/spaces/ulugbeksalaev/uzmorph_nn" target="_blank">UzMorph_NN</a> | '
        'Web: <a href="https://morph.uz" target="_blank">morph.uz</a> | '
        '<a href="https://github.com/UlugbekSalaev/uzmorph" target="_blank">Github</a> | '
        '<a href="https://pypi.org/project/uzmorph/" target="_blank">PyPi</a>'
        
    )

    with gr.Tabs():
        # ── Tab 1: Analyzer ──
        with gr.TabItem("Analyze"):
            with gr.Row():
                with gr.Column(scale=1):
                    word_input = gr.Textbox(
                        label="Enter a word",
                        placeholder="maktabimizda",
                        lines=1
                    )
                    pos_filter = gr.Dropdown(
                        choices=POS_OPTIONS,
                        value="All",
                        label="POS Filter (Optional)"
                    )
                    analyze_btn = gr.Button("Analyze", variant="primary")
                    
                    gr.Examples(
                        examples=[["ishladik", "All"], ["kitoblarim", "All"], ["bording", "All"], ["yozdi", "All"], ["olma", "VERB: Verb {Fe'l}"]],
                        inputs=[word_input, pos_filter]
                    )

                with gr.Column(scale=2):
                    result_md = gr.Markdown(label="Results", value="Analysis results will appear here...")

            with gr.Accordion("Structured JSON Result", open=False):
                result_json = gr.Code(label="JSON", language="json")

            analyze_btn.click(
                fn=analyze_word,
                inputs=[word_input, pos_filter],
                outputs=[result_md, result_json]
            )
            word_input.submit(
                fn=analyze_word,
                inputs=[word_input, pos_filter],
                outputs=[result_md, result_json]
            )

        # ── Tab 2: POS Tags Reference ──
        with gr.TabItem("POS Tags"):
            gr.Markdown("## Supported Part-of-Speech (POS) Tags\n")
            gr.Markdown(
                "| Code | Description | Example |\n|:---|:---|:---|\n" +
                "| `NOUN` | Noun | kitob |\n" +
                "| `VERB` | Verb | o'qi |\n" +
                "| `ADJ` | Adjective | katta |\n" +
                "| `ADV` | Adverb | tez |\n" +
                "| `PRN` | Pronoun | men |\n" +
                "| `NUM` | Numeric | bir |\n" +
                "| `MOD` | Modal | kerak |\n" +
                "| `CNJ` | Conjunction | va |\n" +
                "| `ADP` | Adposition | bilan |\n" +
                "| `PRT` | Particle | mi |\n" +
                "| `INTJ` | Interjection | oh |\n" +
                "| `IMIT` | Imitation | taq-tuq |\n" +
                "| `PPN` | Proper Noun | Toshkent |\n" +
                "| `AUX` | Auxiliary verb | bo'lmoq |\n"
            )

        # ── Tab 3: Documentation ──
        with gr.TabItem("About"):
            gr.Markdown(
                "## About the Project\n"
                "UzMorph is a rule-based morphological analyzer for the Uzbek language with the following features:\n"
                "- **122K+** stems in the core lexicon.\n"
                "- **Multi-POS** support for disambiguating ambiguous stems.\n"
                "- **CSE (Complete Set of Endings)**: A specialized system for agglutinative languages.\n\n"
                "### For Developers (Python)\n"
                "```bash\n"
                "pip install uzmorph\n"
                "```\n"
                "```python\n"
                "from uzmorph import UzMorph\n"
                "analyzer = UzMorph()\n"
                "results = analyzer.analyze('kitoblarim')\n"
                "```\n\n"
                "### Links\n"
                "- [GitHub Repository](https://github.com/UlugbekSalaev/uzmorph)\n"
                "- [PyPI Project](https://pypi.org/project/uzmorph/)\n"
            )

    gr.Markdown(
        "---\n"
        "**Author**: Ulugbek Salaev \n"
        'Website: <a href="https://morph.uz" target="_blank">morph.uz</a>\n'
    )

if __name__ == "__main__":
    demo.launch()