import gradio as gr import json import sys from uzmorph import UzMorph # Initialize analyzer analyzer = UzMorph() # POS filter options POS_OPTIONS = ["All"] + [ f"{code}: {desc}" for code, desc in analyzer.POS.DESCRIPTIONS.items() ] FEATURE_COLUMNS = analyzer.get_features_list() def analyze_word(word, pos_selection): if not word or not word.strip(): return "Please enter a word.", "" word = word.strip().lower() # Extract POS filter pos_filter = None if pos_selection and pos_selection != "All": pos_filter = pos_selection.split(":")[0].strip() results = analyzer.analyze(word, pos_filter=pos_filter) if not results: return f"## Results for: `{word}`\n\nNo analysis found.", "" # Build markdown output md = f"## Results for: `{word}`\n" md += f"Found **{len(results)}** variant(s)\n\n" for i, r in enumerate(results, 1): star = " ⭐ (best match)" if i == 1 else "" md += f"### Variant #{i}{star}\n" md += "| Field | Value |\n|:---|:---|\n" md += f"| **Word** | `{r.get('word', '')}` |\n" md += f"| **Stem** | `{r.get('stem', '')}` |\n" md += f"| **Lemma** | `{r.get('lemma', '')}` |\n" md += f"| **POS** | **{r.get('pos', '')}** |\n" if r.get('cse'): md += f"| **Suffix (CSE)** | `{r['cse']}` |\n" if r.get('cse_formula'): md += f"| **CSE Formula** | `{r['cse_formula']}` |\n" # Morphological features features = [] skip = {'word', 'stem', 'lemma', 'pos', 'cse', 'cse_formula', 'note', 'ball'} for k, v in r.items(): if k in skip or not v: continue features.append(f"| {k} | `{v}` |") if features: md += "\n**Morphological Features:**\n\n" md += "| Feature | Value |\n|:---|:---|\n" md += "\n".join(features) + "\n" if r.get('note'): md += f"\n*Note: {r['note']}*\n" md += "\n---\n" # JSON output json_out = json.dumps(results, ensure_ascii=False, indent=2) return md, json_out # ── Theme ── custom_theme = gr.themes.Soft( primary_hue="teal", secondary_hue="slate", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), font_mono=gr.themes.GoogleFont("JetBrains Mono"), ) with gr.Blocks( title="UzMorph — Uzbek Morphological Analyzer", theme=custom_theme, css=".gradio-container { max-width: 1100px; margin: auto; } footer { display: none !important; }" ) as demo: gr.Markdown( "# UzMorph — Uzbek Morphological Analyzer using Complete Set of Ending\n" "Analyze Uzbek words using **Complete Set of Endings (CSE)** rules and an extensive lexicon (~122k stems). \n" 'Scientific Base: Scopus Article | ' 'Neural Model Version: UzMorph_NN | ' 'Web: morph.uz | ' 'Github | ' 'PyPi' ) with gr.Tabs(): # ── Tab 1: Analyzer ── with gr.TabItem("Analyze"): with gr.Row(): with gr.Column(scale=1): word_input = gr.Textbox( label="Enter a word", placeholder="maktabimizda", lines=1 ) pos_filter = gr.Dropdown( choices=POS_OPTIONS, value="All", label="POS Filter (Optional)" ) analyze_btn = gr.Button("Analyze", variant="primary") gr.Examples( examples=[["ishladik", "All"], ["kitoblarim", "All"], ["bording", "All"], ["yozdi", "All"], ["olma", "VERB: Verb {Fe'l}"]], inputs=[word_input, pos_filter] ) with gr.Column(scale=2): result_md = gr.Markdown(label="Results", value="Analysis results will appear here...") with gr.Accordion("Structured JSON Result", open=False): result_json = gr.Code(label="JSON", language="json") analyze_btn.click( fn=analyze_word, inputs=[word_input, pos_filter], outputs=[result_md, result_json] ) word_input.submit( fn=analyze_word, inputs=[word_input, pos_filter], outputs=[result_md, result_json] ) # ── Tab 2: POS Tags Reference ── with gr.TabItem("POS Tags"): gr.Markdown("## Supported Part-of-Speech (POS) Tags\n") gr.Markdown( "| Code | Description | Example |\n|:---|:---|:---|\n" + "| `NOUN` | Noun | kitob |\n" + "| `VERB` | Verb | o'qi |\n" + "| `ADJ` | Adjective | katta |\n" + "| `ADV` | Adverb | tez |\n" + "| `PRN` | Pronoun | men |\n" + "| `NUM` | Numeric | bir |\n" + "| `MOD` | Modal | kerak |\n" + "| `CNJ` | Conjunction | va |\n" + "| `ADP` | Adposition | bilan |\n" + "| `PRT` | Particle | mi |\n" + "| `INTJ` | Interjection | oh |\n" + "| `IMIT` | Imitation | taq-tuq |\n" + "| `PPN` | Proper Noun | Toshkent |\n" + "| `AUX` | Auxiliary verb | bo'lmoq |\n" ) # ── Tab 3: Documentation ── with gr.TabItem("About"): gr.Markdown( "## About the Project\n" "UzMorph is a rule-based morphological analyzer for the Uzbek language with the following features:\n" "- **122K+** stems in the core lexicon.\n" "- **Multi-POS** support for disambiguating ambiguous stems.\n" "- **CSE (Complete Set of Endings)**: A specialized system for agglutinative languages.\n\n" "### For Developers (Python)\n" "```bash\n" "pip install uzmorph\n" "```\n" "```python\n" "from uzmorph import UzMorph\n" "analyzer = UzMorph()\n" "results = analyzer.analyze('kitoblarim')\n" "```\n\n" "### Links\n" "- [GitHub Repository](https://github.com/UlugbekSalaev/uzmorph)\n" "- [PyPI Project](https://pypi.org/project/uzmorph/)\n" ) gr.Markdown( "---\n" "**Author**: Ulugbek Salaev \n" 'Website: morph.uz\n' ) if __name__ == "__main__": demo.launch()