| import gradio as gr | |
| from segmentation import has_myanmar, preprocess, segment | |
| def segment_myanmar_only(text: str) -> str: | |
| """Segment only Myanmar text, preserving English/Latin characters as-is.""" | |
| tokens = preprocess(text) | |
| segmented_texts = [] | |
| for token in tokens: | |
| if has_myanmar(token): | |
| segmented_texts.append(segment(token)) | |
| else: | |
| segmented_texts.append(token) | |
| result = " ".join(segmented_texts) | |
| return result | |
| def process_text(text: str, should_preserve_english: bool) -> str: | |
| """Process text based on the selected mode.""" | |
| if should_preserve_english: | |
| return segment_myanmar_only(text) | |
| return segment(text) | |
| css = """ | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 1200px; | |
| padding: 1rem; | |
| gap: 2.5rem; | |
| } | |
| #col-container h1 { | |
| text-align: center; | |
| margin-bottom: 1rem; | |
| } | |
| #input-output-row { | |
| flex-direction: row; | |
| gap: 1rem; | |
| } | |
| #input-output-row label { | |
| text-align: center; | |
| } | |
| @media (max-width: 768px) { | |
| #input-output-row { | |
| flex-direction: column; | |
| } | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown("# Myanmar Text Segmentation") | |
| with gr.Row(elem_id="input-output-row", equal_height=True): | |
| input_text = gr.Textbox(label="Input Text", placeholder="Enter Myanmar text here...", lines=8) | |
| output_text = gr.Textbox(label="Segmented Text", lines=8) | |
| preserve_english = gr.Checkbox( | |
| label="Preserve English text", | |
| value=False, | |
| info="Only segment Myanmar text and add spaces between English/Myanmar boundaries", | |
| ) | |
| run_button = gr.Button("Segment", variant="primary") | |
| gr.Examples( | |
| examples=[ | |
| ["အချစ်ဆိုတာလူတွေရှင်သန်ဖို့သဘာဝကပေးတဲ့လက်နက်လား၊ဒါမှမဟုတ်ယဉ်ကျေးမှုအရတီထွင်ထားတဲ့စိတ်ကူးယဉ်မှုသက်သက်လား။"], | |
| ["iPhone 15 Pro Maxဖုန်းအသစ်ရောင်းရန်ရှိပြီးဈေးနှုန်း$1,199(ကျပ်၂၅သိန်း)နှင့်အလေးချိန်221gရှိပါသည်။"], | |
| ["Mathematics(သင်္ချာ)ဘာသာရပ်Chapter-5[အခန်း၅]ကိုစာမျက်နှာ{page 45-60}တွင်လေ့လာနိုင်ပါသည်။"], | |
| [ | |
| ( | |
| "ယနေ့Manchester United(မန်ယူ)အသင်းကLiverpoolကို3-2ဖြင့်အနိုင်ရရှိခဲ့သည်။ဒီပွဲမှာMarcus Rashfordက2ဂိုးသွင်းပြီး" | |
| "Man of the Match(ပွဲစဉ်အကောင်းဆုံးကစားသမား)ဆုရရှိခဲ့သည်။" | |
| ) | |
| ], | |
| [ | |
| ( | |
| "အမှတ်စဉ်REF:2024/MM/001ဖြင့်ရက်စွဲ15-Jan-2024(၂၀၂၄ခုနှစ်ဇန်နဝါရီလ၁၅ရက်)တွင်လျှောက်လွှာ(Application Form)" | |
| "ကိုတင်သွင်းရမည်ဖြစ်ပါသည်။" | |
| ) | |
| ], | |
| ], | |
| inputs=input_text, | |
| ) | |
| run_button.click(fn=process_text, inputs=[input_text, preserve_english], outputs=output_text) | |
| input_text.submit(fn=process_text, inputs=[input_text, preserve_english], outputs=output_text) | |
| if __name__ == "__main__": | |
| demo.launch() | |