""" Hugging Face Space - Arabic EOU Detection Demo File: app.py This creates an interactive web demo for your model """ import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification # ============================================================================ # LOAD MODEL # ============================================================================ MODEL_NAME = "LordTenson/Saudi-EOU" # Replace with your model name print("Loading model...") try: tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) model.eval() print(f"✅ Model loaded on {device}") except Exception as e: print(f"Error loading model: {e}") print("Falling back to local model...") MODEL_NAME = "./arabert_eou_final" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) device = "cpu" model.to(device) model.eval() # ============================================================================ # PREDICTION FUNCTION # ============================================================================ def predict_eou(text, threshold=0.5): """ Predict if text is end-of-utterance Args: text: Arabic text to analyze threshold: Confidence threshold Returns: Prediction result and confidence """ if not text or len(text.strip()) == 0: return "❌ Please enter some text", 0.0, 0.0 # Tokenize inputs = tokenizer( text, return_tensors="pt", truncation=True, max_length=256, padding=True ) # Move to device inputs = {k: v.to(device) for k, v in inputs.items()} # Inference with torch.no_grad(): outputs = model(**inputs) probs = torch.softmax(outputs.logits, dim=1) # Get probabilities not_eou_prob = probs[0][0].item() eou_prob = probs[0][1].item() # Determine result is_eou = eou_prob >= threshold if is_eou: result = f"✅ **END OF TURN** - Speaker has finished" color = "green" else: result = f"⏳ **CONTINUE** - Speaker is still talking" color = "orange" # Return results return result, eou_prob, not_eou_prob # ============================================================================ # GRADIO INTERFACE # ============================================================================ def create_demo(): """Create Gradio interface""" with gr.Blocks(title="Arabic EOU Detection", theme=gr.themes.Soft()) as demo: # Header gr.Markdown(""" # 🎤 Arabic End-of-Utterance Detection This model detects whether a speaker has finished their turn in Arabic conversations. Fine-tuned AraBERT model on Saudi dialect conversations. **Use Case**: Real-time voice agents, conversation systems, live transcription """) with gr.Row(): with gr.Column(scale=2): # Input text_input = gr.Textbox( label="Enter Arabic Text", placeholder="مثال: السلام عليكم كيف حالك", lines=3, rtl=True # Right-to-left for Arabic ) threshold_slider = gr.Slider( minimum=0.0, maximum=1.0, value=0.5, step=0.05, label="Detection Threshold", info="Lower = more sensitive, Higher = less sensitive" ) submit_btn = gr.Button("🔍 Analyze", variant="primary") with gr.Column(scale=1): # Output result_output = gr.Markdown(label="Prediction") with gr.Row(): eou_prob = gr.Number(label="EOU Probability", precision=3) not_eou_prob = gr.Number(label="Not-EOU Probability", precision=3) # Examples gr.Markdown("### 📝 Try These Examples:") gr.Examples( examples=[ ["السلام عليكم كيف حالك", 0.5], ["أنا رايح", 0.5], ["شكراً لك والله", 0.5], ["يعني مثلاً", 0.5], ["تمام فهمت عليك", 0.5], ["أبي أقول لك", 0.5], ["والله ما أدري كيف", 0.5], ["خلاص انتهينا من الموضوع", 0.5], ], inputs=[text_input, threshold_slider], outputs=[result_output, eou_prob, not_eou_prob], fn=predict_eou, cache_examples=False, ) # Model Info with gr.Accordion("ℹ️ Model Information", open=False): gr.Markdown(f""" ### Model Details - **Base Model**: aubmindlab/bert-base-arabertv2 - **Fine-tuned on**: Saudi Arabic dialect conversations - **Accuracy**: 62% - **F1 Score**: 0.62 (balanced) - **Latency**: ~45ms average ### How It Works 1. The model analyzes Arabic text 2. Predicts probability of turn completion 3. If probability > threshold → Turn ends 4. Used in real-time voice agents for natural conversations ### Classes - **EOU (End-of-Utterance)**: Speaker has finished their turn - **Not-EOU**: Speaker is continuing, more words expected ### Links - 🤗 [Model on Hugging Face]({MODEL_NAME}) - 📊 [Dataset](your-dataset-link) - 💻 [GitHub Repository](your-github-link) """) # Connect interface submit_btn.click( fn=predict_eou, inputs=[text_input, threshold_slider], outputs=[result_output, eou_prob, not_eou_prob] ) text_input.submit( fn=predict_eou, inputs=[text_input, threshold_slider], outputs=[result_output, eou_prob, not_eou_prob] ) return demo # ============================================================================ # LAUNCH # ============================================================================ if __name__ == "__main__": demo = create_demo() demo.launch()