Spaces:

ulugbeksalaev
/

uzmorph

Sleeping

App Files Files Community

uzmorph / app.py

ulugbeksalaev

Update app.py

7125483 verified 21 days ago

raw

history blame contribute delete

7.1 kB

	import gradio as gr
	import json
	import sys
	from uzmorph import UzMorph

	# Initialize analyzer
	analyzer = UzMorph()

	# POS filter options
	POS_OPTIONS = ["All"] + [
	f"{code}: {desc}" for code, desc in analyzer.POS.DESCRIPTIONS.items()
	]

	FEATURE_COLUMNS = analyzer.get_features_list()

	def analyze_word(word, pos_selection):
	if not word or not word.strip():
	return "Please enter a word.", ""

	word = word.strip().lower()

	# Extract POS filter
	pos_filter = None
	if pos_selection and pos_selection != "All":
	pos_filter = pos_selection.split(":")[0].strip()

	results = analyzer.analyze(word, pos_filter=pos_filter)

	if not results:
	return f"## Results for: `{word}`\n\nNo analysis found.", ""

	# Build markdown output
	md = f"## Results for: `{word}`\n"
	md += f"Found {len(results)} variant(s)\n\n"

	for i, r in enumerate(results, 1):
	star = " ⭐ (best match)" if i == 1 else ""
	md += f"### Variant #{i}{star}\n"
	md += "\| Field \| Value \|\n\|:---\|:---\|\n"
	md += f"\| Word \| `{r.get('word', '')}` \|\n"
	md += f"\| Stem \| `{r.get('stem', '')}` \|\n"
	md += f"\| Lemma \| `{r.get('lemma', '')}` \|\n"
	md += f"\| POS \| {r.get('pos', '')} \|\n"

	if r.get('cse'):
	md += f"\| Suffix (CSE) \| `{r['cse']}` \|\n"
	if r.get('cse_formula'):
	md += f"\| CSE Formula \| `{r['cse_formula']}` \|\n"

	# Morphological features
	features = []
	skip = {'word', 'stem', 'lemma', 'pos', 'cse', 'cse_formula', 'note', 'ball'}
	for k, v in r.items():
	if k in skip or not v:
	continue
	features.append(f"\| {k} \| `{v}` \|")

	if features:
	md += "\nMorphological Features:\n\n"
	md += "\| Feature \| Value \|\n\|:---\|:---\|\n"
	md += "\n".join(features) + "\n"

	if r.get('note'):
	md += f"\nNote: {r['note']}\n"
	md += "\n---\n"

	# JSON output
	json_out = json.dumps(results, ensure_ascii=False, indent=2)
	return md, json_out


	# ── Theme ──
	custom_theme = gr.themes.Soft(
	primary_hue="teal",
	secondary_hue="slate",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	font_mono=gr.themes.GoogleFont("JetBrains Mono"),
	)

	with gr.Blocks(
	title="UzMorph — Uzbek Morphological Analyzer",
	theme=custom_theme,
	css=".gradio-container { max-width: 1100px; margin: auto; } footer { display: none !important; }"
	) as demo:
	gr.Markdown(
	"# UzMorph — Uzbek Morphological Analyzer using Complete Set of Ending\n"
	"Analyze Uzbek words using Complete Set of Endings (CSE) rules and an extensive lexicon (~122k stems). \n"
	'Scientific Base: <a href="https://www.scopus.com/pages/publications/85212084325" target="_blank">Scopus Article</a> \| '
	'Neural Model Version: <a href="https://huggingface.co/spaces/ulugbeksalaev/uzmorph_nn" target="_blank">UzMorph_NN</a> \| '
	'Web: <a href="https://morph.uz" target="_blank">morph.uz</a> \| '
	'<a href="https://github.com/UlugbekSalaev/uzmorph" target="_blank">Github</a> \| '
	'<a href="https://pypi.org/project/uzmorph/" target="_blank">PyPi</a>'

	)

	with gr.Tabs():
	# ── Tab 1: Analyzer ──
	with gr.TabItem("Analyze"):
	with gr.Row():
	with gr.Column(scale=1):
	word_input = gr.Textbox(
	label="Enter a word",
	placeholder="maktabimizda",
	lines=1
	)
	pos_filter = gr.Dropdown(
	choices=POS_OPTIONS,
	value="All",
	label="POS Filter (Optional)"
	)
	analyze_btn = gr.Button("Analyze", variant="primary")

	gr.Examples(
	examples=[["ishladik", "All"], ["kitoblarim", "All"], ["bording", "All"], ["yozdi", "All"], ["olma", "VERB: Verb {Fe'l}"]],
	inputs=[word_input, pos_filter]
	)

	with gr.Column(scale=2):
	result_md = gr.Markdown(label="Results", value="Analysis results will appear here...")

	with gr.Accordion("Structured JSON Result", open=False):
	result_json = gr.Code(label="JSON", language="json")

	analyze_btn.click(
	fn=analyze_word,
	inputs=[word_input, pos_filter],
	outputs=[result_md, result_json]
	)
	word_input.submit(
	fn=analyze_word,
	inputs=[word_input, pos_filter],
	outputs=[result_md, result_json]
	)

	# ── Tab 2: POS Tags Reference ──
	with gr.TabItem("POS Tags"):
	gr.Markdown("## Supported Part-of-Speech (POS) Tags\n")
	gr.Markdown(
	"\| Code \| Description \| Example \|\n\|:---\|:---\|:---\|\n" +
	"\| `NOUN` \| Noun \| kitob \|\n" +
	"\| `VERB` \| Verb \| o'qi \|\n" +
	"\| `ADJ` \| Adjective \| katta \|\n" +
	"\| `ADV` \| Adverb \| tez \|\n" +
	"\| `PRN` \| Pronoun \| men \|\n" +
	"\| `NUM` \| Numeric \| bir \|\n" +
	"\| `MOD` \| Modal \| kerak \|\n" +
	"\| `CNJ` \| Conjunction \| va \|\n" +
	"\| `ADP` \| Adposition \| bilan \|\n" +
	"\| `PRT` \| Particle \| mi \|\n" +
	"\| `INTJ` \| Interjection \| oh \|\n" +
	"\| `IMIT` \| Imitation \| taq-tuq \|\n" +
	"\| `PPN` \| Proper Noun \| Toshkent \|\n" +
	"\| `AUX` \| Auxiliary verb \| bo'lmoq \|\n"
	)

	# ── Tab 3: Documentation ──
	with gr.TabItem("About"):
	gr.Markdown(
	"## About the Project\n"
	"UzMorph is a rule-based morphological analyzer for the Uzbek language with the following features:\n"
	"- 122K+ stems in the core lexicon.\n"
	"- Multi-POS support for disambiguating ambiguous stems.\n"
	"- CSE (Complete Set of Endings): A specialized system for agglutinative languages.\n\n"
	"### For Developers (Python)\n"
	"```bash\n"
	"pip install uzmorph\n"
	"```\n"
	"```python\n"
	"from uzmorph import UzMorph\n"
	"analyzer = UzMorph()\n"
	"results = analyzer.analyze('kitoblarim')\n"
	"```\n\n"
	"### Links\n"
	"- [GitHub Repository](https://github.com/UlugbekSalaev/uzmorph)\n"
	"- [PyPI Project](https://pypi.org/project/uzmorph/)\n"
	)

	gr.Markdown(
	"---\n"
	"Author: Ulugbek Salaev \n"
	'Website: <a href="https://morph.uz" target="_blank">morph.uz</a>\n'
	)

	if __name__ == "__main__":
	demo.launch()