Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import time | |
| from diacritizer import Diacritizer, ModelNotFound | |
| MODEL_INFO = { | |
| "bilstm": { | |
| "display_name": "BiLSTM", | |
| "models": { | |
| "medium": { | |
| "size": "4 MB", | |
| "details": "Balanced speed and accuracy.", | |
| }, | |
| "large": { | |
| "size": "15.5 MB", | |
| "details": "Highest accuracy model.", | |
| }, | |
| }, | |
| }, | |
| "bigru": { | |
| "display_name": "BiGRU", | |
| "models": { | |
| "medium": { | |
| "size": "3.8 MB", | |
| "details": "Slightly faster than BiLSTM with comparable accuracy.", | |
| }, | |
| "large": { | |
| "size": "14.9 MB", | |
| "details": "High accuracy alternative to the BiLSTM model.", | |
| }, | |
| }, | |
| }, | |
| } | |
| MODEL_CACHE = {} | |
| def get_model(architecture: str, size: str, progress=gr.Progress()): | |
| """ | |
| Lazily loads and caches a Diacritizer model. | |
| Includes user feedback via gr.Progress to show loading status. | |
| """ | |
| model_key = f"{architecture}/{size}" | |
| if model_key not in MODEL_CACHE: | |
| progress(0.5, desc=f"Loading {architecture}/{size} model...") | |
| try: | |
| MODEL_CACHE[model_key] = Diacritizer(architecture=architecture, size=size) | |
| except ModelNotFound: | |
| raise gr.Error( | |
| f"The requested model ({model_key}) was not found on the Hugging Face Hub." | |
| ) | |
| except Exception as e: | |
| raise gr.Error(f"An unexpected error occurred while loading the model: {e}") | |
| return MODEL_CACHE[model_key] | |
| def diacritize_text(text: str, architecture: str, size: str, progress=gr.Progress()): | |
| """ | |
| Main function to diacritize text, now with progress tracking. | |
| """ | |
| if not text or not text.strip(): | |
| return "", "0.000s", "Please enter some text to diacritize." | |
| progress(0, desc="Loading model...") | |
| diacritizer = get_model(architecture, size, progress) | |
| progress(0.8, desc="Diacritizing text...") | |
| start_time = time.time() | |
| diacritized_text = diacritizer.diacritize(text) | |
| end_time = time.time() | |
| inference_time = f"{end_time - start_time:.3f}s" | |
| # Update the info text with the final result details | |
| model_details = MODEL_INFO[architecture]["models"][size]["details"] | |
| final_info_text = f"**Model:** {architecture}/{size} | **Size:** {MODEL_INFO[architecture]['models'][size]['size']} | {model_details}" | |
| return diacritized_text, inference_time, final_info_text | |
| def update_available_sizes(architecture: str): | |
| """Callback to update the size choices when the architecture changes.""" | |
| available_sizes = list(MODEL_INFO[architecture]["models"].keys()) | |
| # Return a new Radio component with updated choices and a default value | |
| return gr.Radio( | |
| choices=available_sizes, | |
| value=available_sizes[0], # Default to the first available size | |
| label="Model Size", | |
| info="Select the model size.", | |
| ) | |
| theme = gr.themes.Soft( | |
| primary_hue="zinc", | |
| secondary_hue="blue", | |
| neutral_hue="slate", | |
| font=(gr.themes.GoogleFont("Noto Sans"), gr.themes.GoogleFont("Noto Sans Arabic")), | |
| ).set( | |
| body_background_fill_dark="#111827" # A slightly off-black for dark mode | |
| ) | |
| DESCRIPTION = """ | |
| # ⚡ End-to-End Arabic Diacritizer | |
| A lightweight and efficient model for automatic Arabic diacritization. | |
| Select an architecture and size, enter some text, and see it in action. For more details, visit the | |
| [GitHub repository](https://github.com/muhammad-abdelsattar/arabic-diacritizer). | |
| """ | |
| with gr.Blocks(theme=theme, css=".footer {display: none !important}") as demo: | |
| gr.Markdown(DESCRIPTION) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| with gr.Row(): | |
| arch_selector = gr.Radio( | |
| choices=[ | |
| (info["display_name"], arch) | |
| for arch, info in MODEL_INFO.items() | |
| ], | |
| label="Model Architecture", | |
| value="bilstm", | |
| info="Select the model architecture.", | |
| ) | |
| model_selector = gr.Radio( | |
| choices=["medium", "large"], | |
| label="Model Size", | |
| value="medium", | |
| info="Select the model size.", | |
| ) | |
| info_display = gr.Markdown( | |
| "**Model:** bilstm/medium | **Size:** 4 MB | Balanced speed and accuracy. (Formerly 'small')", | |
| elem_id="info-display", | |
| ) | |
| with gr.Column(scale=1): | |
| inference_time_output = gr.Textbox( | |
| label="Inference Time", interactive=False, max_lines=1 | |
| ) | |
| with gr.Row(equal_height=True): | |
| with gr.Column(): | |
| input_text = gr.Textbox( | |
| label="Input Text (Undiacritized)", | |
| placeholder="اكتب جملة عربية غير مشكولة هنا...", | |
| lines=8, | |
| rtl=True, | |
| ) | |
| with gr.Column(): | |
| output_text = gr.Textbox( | |
| label="Output Text (Diacritized)", | |
| lines=8, | |
| rtl=True, | |
| interactive=False, | |
| ) | |
| submit_button = gr.Button("Diacritize ✨", variant="primary") | |
| gr.Examples( | |
| [ | |
| ["أعلنت الشركة عن نتائجها المالية للربع الأول من العام."], | |
| ["إن مع العسر يسرا."], | |
| ["هل يمكن للذكاء الاصطناعي أن يكون مبدعا؟"], | |
| ["كان المتنبي شاعرا عظيما في العصر العباسي."], | |
| ], | |
| inputs=input_text, | |
| label="Examples", | |
| ) | |
| submit_button.click( | |
| fn=diacritize_text, | |
| inputs=[input_text, arch_selector, model_selector], | |
| outputs=[output_text, inference_time_output, info_display], | |
| ) | |
| # When architecture changes, update the available sizes | |
| arch_selector.change( | |
| fn=update_available_sizes, inputs=arch_selector, outputs=model_selector | |
| ) | |
| if __name__ == "__main__": | |
| # Pre-load the default model for a faster first-time user experience | |
| print("Pre-loading default 'bilstm/medium' model...") | |
| get_model(architecture="bilstm", size="medium") | |
| print("Default model loaded successfully.") | |
| demo.launch() | |