uzmorph / app.py
ulugbeksalaev's picture
Update app.py
7125483 verified
import gradio as gr
import json
import sys
from uzmorph import UzMorph
# Initialize analyzer
analyzer = UzMorph()
# POS filter options
POS_OPTIONS = ["All"] + [
f"{code}: {desc}" for code, desc in analyzer.POS.DESCRIPTIONS.items()
]
FEATURE_COLUMNS = analyzer.get_features_list()
def analyze_word(word, pos_selection):
if not word or not word.strip():
return "Please enter a word.", ""
word = word.strip().lower()
# Extract POS filter
pos_filter = None
if pos_selection and pos_selection != "All":
pos_filter = pos_selection.split(":")[0].strip()
results = analyzer.analyze(word, pos_filter=pos_filter)
if not results:
return f"## Results for: `{word}`\n\nNo analysis found.", ""
# Build markdown output
md = f"## Results for: `{word}`\n"
md += f"Found **{len(results)}** variant(s)\n\n"
for i, r in enumerate(results, 1):
star = " ⭐ (best match)" if i == 1 else ""
md += f"### Variant #{i}{star}\n"
md += "| Field | Value |\n|:---|:---|\n"
md += f"| **Word** | `{r.get('word', '')}` |\n"
md += f"| **Stem** | `{r.get('stem', '')}` |\n"
md += f"| **Lemma** | `{r.get('lemma', '')}` |\n"
md += f"| **POS** | **{r.get('pos', '')}** |\n"
if r.get('cse'):
md += f"| **Suffix (CSE)** | `{r['cse']}` |\n"
if r.get('cse_formula'):
md += f"| **CSE Formula** | `{r['cse_formula']}` |\n"
# Morphological features
features = []
skip = {'word', 'stem', 'lemma', 'pos', 'cse', 'cse_formula', 'note', 'ball'}
for k, v in r.items():
if k in skip or not v:
continue
features.append(f"| {k} | `{v}` |")
if features:
md += "\n**Morphological Features:**\n\n"
md += "| Feature | Value |\n|:---|:---|\n"
md += "\n".join(features) + "\n"
if r.get('note'):
md += f"\n*Note: {r['note']}*\n"
md += "\n---\n"
# JSON output
json_out = json.dumps(results, ensure_ascii=False, indent=2)
return md, json_out
# ── Theme ──
custom_theme = gr.themes.Soft(
primary_hue="teal",
secondary_hue="slate",
neutral_hue="slate",
font=gr.themes.GoogleFont("Inter"),
font_mono=gr.themes.GoogleFont("JetBrains Mono"),
)
with gr.Blocks(
title="UzMorph β€” Uzbek Morphological Analyzer",
theme=custom_theme,
css=".gradio-container { max-width: 1100px; margin: auto; } footer { display: none !important; }"
) as demo:
gr.Markdown(
"# UzMorph β€” Uzbek Morphological Analyzer using Complete Set of Ending\n"
"Analyze Uzbek words using **Complete Set of Endings (CSE)** rules and an extensive lexicon (~122k stems). \n"
'Scientific Base: <a href="https://www.scopus.com/pages/publications/85212084325" target="_blank">Scopus Article</a> | '
'Neural Model Version: <a href="https://huggingface.co/spaces/ulugbeksalaev/uzmorph_nn" target="_blank">UzMorph_NN</a> | '
'Web: <a href="https://morph.uz" target="_blank">morph.uz</a> | '
'<a href="https://github.com/UlugbekSalaev/uzmorph" target="_blank">Github</a> | '
'<a href="https://pypi.org/project/uzmorph/" target="_blank">PyPi</a>'
)
with gr.Tabs():
# ── Tab 1: Analyzer ──
with gr.TabItem("Analyze"):
with gr.Row():
with gr.Column(scale=1):
word_input = gr.Textbox(
label="Enter a word",
placeholder="maktabimizda",
lines=1
)
pos_filter = gr.Dropdown(
choices=POS_OPTIONS,
value="All",
label="POS Filter (Optional)"
)
analyze_btn = gr.Button("Analyze", variant="primary")
gr.Examples(
examples=[["ishladik", "All"], ["kitoblarim", "All"], ["bording", "All"], ["yozdi", "All"], ["olma", "VERB: Verb {Fe'l}"]],
inputs=[word_input, pos_filter]
)
with gr.Column(scale=2):
result_md = gr.Markdown(label="Results", value="Analysis results will appear here...")
with gr.Accordion("Structured JSON Result", open=False):
result_json = gr.Code(label="JSON", language="json")
analyze_btn.click(
fn=analyze_word,
inputs=[word_input, pos_filter],
outputs=[result_md, result_json]
)
word_input.submit(
fn=analyze_word,
inputs=[word_input, pos_filter],
outputs=[result_md, result_json]
)
# ── Tab 2: POS Tags Reference ──
with gr.TabItem("POS Tags"):
gr.Markdown("## Supported Part-of-Speech (POS) Tags\n")
gr.Markdown(
"| Code | Description | Example |\n|:---|:---|:---|\n" +
"| `NOUN` | Noun | kitob |\n" +
"| `VERB` | Verb | o'qi |\n" +
"| `ADJ` | Adjective | katta |\n" +
"| `ADV` | Adverb | tez |\n" +
"| `PRN` | Pronoun | men |\n" +
"| `NUM` | Numeric | bir |\n" +
"| `MOD` | Modal | kerak |\n" +
"| `CNJ` | Conjunction | va |\n" +
"| `ADP` | Adposition | bilan |\n" +
"| `PRT` | Particle | mi |\n" +
"| `INTJ` | Interjection | oh |\n" +
"| `IMIT` | Imitation | taq-tuq |\n" +
"| `PPN` | Proper Noun | Toshkent |\n" +
"| `AUX` | Auxiliary verb | bo'lmoq |\n"
)
# ── Tab 3: Documentation ──
with gr.TabItem("About"):
gr.Markdown(
"## About the Project\n"
"UzMorph is a rule-based morphological analyzer for the Uzbek language with the following features:\n"
"- **122K+** stems in the core lexicon.\n"
"- **Multi-POS** support for disambiguating ambiguous stems.\n"
"- **CSE (Complete Set of Endings)**: A specialized system for agglutinative languages.\n\n"
"### For Developers (Python)\n"
"```bash\n"
"pip install uzmorph\n"
"```\n"
"```python\n"
"from uzmorph import UzMorph\n"
"analyzer = UzMorph()\n"
"results = analyzer.analyze('kitoblarim')\n"
"```\n\n"
"### Links\n"
"- [GitHub Repository](https://github.com/UlugbekSalaev/uzmorph)\n"
"- [PyPI Project](https://pypi.org/project/uzmorph/)\n"
)
gr.Markdown(
"---\n"
"**Author**: Ulugbek Salaev \n"
'Website: <a href="https://morph.uz" target="_blank">morph.uz</a>\n'
)
if __name__ == "__main__":
demo.launch()