File size: 7,098 Bytes
7cd0758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27b51c3
7cd0758
 
7125483
7cd0758
 
27b51c3
 
7cd0758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2baf296
7cd0758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27b51c3
7cd0758
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27b51c3
7cd0758
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import gradio as gr
import json
import sys
from uzmorph import UzMorph

# Initialize analyzer
analyzer = UzMorph()

# POS filter options
POS_OPTIONS = ["All"] + [
    f"{code}: {desc}" for code, desc in analyzer.POS.DESCRIPTIONS.items()
]

FEATURE_COLUMNS = analyzer.get_features_list()

def analyze_word(word, pos_selection):
    if not word or not word.strip():
        return "Please enter a word.", ""

    word = word.strip().lower()

    # Extract POS filter
    pos_filter = None
    if pos_selection and pos_selection != "All":
        pos_filter = pos_selection.split(":")[0].strip()

    results = analyzer.analyze(word, pos_filter=pos_filter)

    if not results:
        return f"## Results for: `{word}`\n\nNo analysis found.", ""

    # Build markdown output
    md = f"## Results for: `{word}`\n"
    md += f"Found **{len(results)}** variant(s)\n\n"

    for i, r in enumerate(results, 1):
        star = " ⭐ (best match)" if i == 1 else ""
        md += f"### Variant #{i}{star}\n"
        md += "| Field | Value |\n|:---|:---|\n"
        md += f"| **Word** | `{r.get('word', '')}` |\n"
        md += f"| **Stem** | `{r.get('stem', '')}` |\n"
        md += f"| **Lemma** | `{r.get('lemma', '')}` |\n"
        md += f"| **POS** | **{r.get('pos', '')}** |\n"

        if r.get('cse'):
            md += f"| **Suffix (CSE)** | `{r['cse']}` |\n"
        if r.get('cse_formula'):
            md += f"| **CSE Formula** | `{r['cse_formula']}` |\n"

        # Morphological features
        features = []
        skip = {'word', 'stem', 'lemma', 'pos', 'cse', 'cse_formula', 'note', 'ball'}
        for k, v in r.items():
            if k in skip or not v:
                continue
            features.append(f"| {k} | `{v}` |")

        if features:
            md += "\n**Morphological Features:**\n\n"
            md += "| Feature | Value |\n|:---|:---|\n"
            md += "\n".join(features) + "\n"

        if r.get('note'):
            md += f"\n*Note: {r['note']}*\n"
        md += "\n---\n"

    # JSON output
    json_out = json.dumps(results, ensure_ascii=False, indent=2)
    return md, json_out


# ── Theme ──
custom_theme = gr.themes.Soft(
    primary_hue="teal",
    secondary_hue="slate",
    neutral_hue="slate",
    font=gr.themes.GoogleFont("Inter"),
    font_mono=gr.themes.GoogleFont("JetBrains Mono"),
)

with gr.Blocks(
    title="UzMorph β€” Uzbek Morphological Analyzer",
    theme=custom_theme,
    css=".gradio-container { max-width: 1100px; margin: auto; } footer { display: none !important; }"
) as demo:
    gr.Markdown(
        "# UzMorph β€” Uzbek Morphological Analyzer using Complete Set of Ending\n"
        "Analyze Uzbek words using **Complete Set of Endings (CSE)** rules and an extensive lexicon (~122k stems).  \n"
        'Scientific Base: <a href="https://www.scopus.com/pages/publications/85212084325" target="_blank">Scopus Article</a> | '
        'Neural Model Version: <a href="https://huggingface.co/spaces/ulugbeksalaev/uzmorph_nn" target="_blank">UzMorph_NN</a> | '
        'Web: <a href="https://morph.uz" target="_blank">morph.uz</a> | '
        '<a href="https://github.com/UlugbekSalaev/uzmorph" target="_blank">Github</a> | '
        '<a href="https://pypi.org/project/uzmorph/" target="_blank">PyPi</a>'
        
    )

    with gr.Tabs():
        # ── Tab 1: Analyzer ──
        with gr.TabItem("Analyze"):
            with gr.Row():
                with gr.Column(scale=1):
                    word_input = gr.Textbox(
                        label="Enter a word",
                        placeholder="maktabimizda",
                        lines=1
                    )
                    pos_filter = gr.Dropdown(
                        choices=POS_OPTIONS,
                        value="All",
                        label="POS Filter (Optional)"
                    )
                    analyze_btn = gr.Button("Analyze", variant="primary")
                    
                    gr.Examples(
                        examples=[["ishladik", "All"], ["kitoblarim", "All"], ["bording", "All"], ["yozdi", "All"], ["olma", "VERB: Verb {Fe'l}"]],
                        inputs=[word_input, pos_filter]
                    )

                with gr.Column(scale=2):
                    result_md = gr.Markdown(label="Results", value="Analysis results will appear here...")

            with gr.Accordion("Structured JSON Result", open=False):
                result_json = gr.Code(label="JSON", language="json")

            analyze_btn.click(
                fn=analyze_word,
                inputs=[word_input, pos_filter],
                outputs=[result_md, result_json]
            )
            word_input.submit(
                fn=analyze_word,
                inputs=[word_input, pos_filter],
                outputs=[result_md, result_json]
            )

        # ── Tab 2: POS Tags Reference ──
        with gr.TabItem("POS Tags"):
            gr.Markdown("## Supported Part-of-Speech (POS) Tags\n")
            gr.Markdown(
                "| Code | Description | Example |\n|:---|:---|:---|\n" +
                "| `NOUN` | Noun | kitob |\n" +
                "| `VERB` | Verb | o'qi |\n" +
                "| `ADJ` | Adjective | katta |\n" +
                "| `ADV` | Adverb | tez |\n" +
                "| `PRN` | Pronoun | men |\n" +
                "| `NUM` | Numeric | bir |\n" +
                "| `MOD` | Modal | kerak |\n" +
                "| `CNJ` | Conjunction | va |\n" +
                "| `ADP` | Adposition | bilan |\n" +
                "| `PRT` | Particle | mi |\n" +
                "| `INTJ` | Interjection | oh |\n" +
                "| `IMIT` | Imitation | taq-tuq |\n" +
                "| `PPN` | Proper Noun | Toshkent |\n" +
                "| `AUX` | Auxiliary verb | bo'lmoq |\n"
            )

        # ── Tab 3: Documentation ──
        with gr.TabItem("About"):
            gr.Markdown(
                "## About the Project\n"
                "UzMorph is a rule-based morphological analyzer for the Uzbek language with the following features:\n"
                "- **122K+** stems in the core lexicon.\n"
                "- **Multi-POS** support for disambiguating ambiguous stems.\n"
                "- **CSE (Complete Set of Endings)**: A specialized system for agglutinative languages.\n\n"
                "### For Developers (Python)\n"
                "```bash\n"
                "pip install uzmorph\n"
                "```\n"
                "```python\n"
                "from uzmorph import UzMorph\n"
                "analyzer = UzMorph()\n"
                "results = analyzer.analyze('kitoblarim')\n"
                "```\n\n"
                "### Links\n"
                "- [GitHub Repository](https://github.com/UlugbekSalaev/uzmorph)\n"
                "- [PyPI Project](https://pypi.org/project/uzmorph/)\n"
            )

    gr.Markdown(
        "---\n"
        "**Author**: Ulugbek Salaev \n"
        'Website: <a href="https://morph.uz" target="_blank">morph.uz</a>\n'
    )

if __name__ == "__main__":
    demo.launch()