Spaces:

oddadmix
/

egyptian-code-swtiching

Running on Zero

App Files Files Community

oddadmix commited on Nov 5, 2025

Commit

7d2baf1

verified ·

1 Parent(s): 6a53468

Create app.py

Browse files

Files changed (1) hide show

app.py +144 -0

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import gradio as gr
+from unsloth import FastModel, FastLanguageModel
+import torch
+from transformers import Gemma3nProcessor
+import os
+# Global variables for model and processor
+model = None
+processor = None
+def load_model():
+    """Load the model and processor once at startup"""
+    global model, processor
+    print("Loading model...")
+    model, _ = FastModel.from_pretrained(
+        model_name = "oddadmix/gemma-4b-egyptian-code-switching-b4-g2",
+        dtype = None,
+        max_seq_length = 2048,
+        load_in_4bit = True,  # Enable 4bit for GPU memory efficiency
+        full_finetuning = False,
+    )
+    processor = Gemma3nProcessor.from_pretrained("google/gemma-3n-E4B-it")
+    # Set model to inference mode
+    FastLanguageModel.for_inference(model)
+    print("Model loaded successfully!")
+def transcribe_audio(audio_path, max_tokens=128):
+    """Transcribe audio file using the loaded model"""
+    if model is None or processor is None:
+        return "Error: Model not loaded"
+    if audio_path is None:
+        return "Please upload or record an audio file"
+    try:
+        messages = [
+            {
+                "role": "system",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "You are an assistant that transcribes speech accurately.",
+                    }
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "audio", "url": audio_path},
+                    {"type": "text", "text": "Please transcribe this audio."}
+                ]
+            }
+        ]
+        inputs = processor.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            tokenize=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to("cuda")
+        # Generate transcription
+        output = model.generate(
+            **inputs,
+            max_new_tokens=max_tokens,
+            do_sample=False
+        )
+        # Get only the newly generated tokens
+        generated_tokens = output[0][inputs["input_ids"].shape[-1]:]
+        response = processor.decode(generated_tokens, skip_special_tokens=True)
+        return response
+    except Exception as e:
+        return f"Error during transcription: {str(e)}"
+# Load model at startup
+load_model()
+# Create Gradio interface
+with gr.Blocks(title="Egyptian Arabic ASR") as demo:
+    gr.Markdown(
+        """
+        # 🎙️ Egyptian Arabic Speech Recognition
+        Upload an audio file or record your voice to get an automatic transcription.
+        This model is optimized for Egyptian Arabic code-switching.
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                sources=["upload", "microphone"],
+                type="filepath",
+                label="Audio Input"
+            )
+            max_tokens_slider = gr.Slider(
+                minimum=32,
+                maximum=512,
+                value=128,
+                step=32,
+                label="Max Output Tokens"
+            )
+            transcribe_btn = gr.Button("Transcribe", variant="primary")
+        with gr.Column():
+            output_text = gr.Textbox(
+                label="Transcription",
+                placeholder="Your transcription will appear here...",
+                lines=10
+            )
+    gr.Markdown(
+        """
+        ### Tips:
+        - For best results, use clear audio with minimal background noise
+        - The model handles Egyptian Arabic and code-switching with English
+        - Recording length should be reasonable (under 30 seconds recommended)
+        """
+    )
+    # Set up the transcription action
+    transcribe_btn.click(
+        fn=transcribe_audio,
+        inputs=[audio_input, max_tokens_slider],
+        outputs=output_text
+    )
+    # Also allow transcription on audio upload/record
+    audio_input.change(
+        fn=transcribe_audio,
+        inputs=[audio_input, max_tokens_slider],
+        outputs=output_text
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()