Spaces:

oddadmix
/

egyptian-code-swtiching

Running on Zero

App Files Files Community

oddadmix commited on Nov 7, 2025

Commit

06bbda0

verified ·

1 Parent(s): 1cf51f9

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -166

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import gradio as gr
 from transformers import AutoProcessor, Gemma3nForConditionalGeneration
 import torch
 import os
-import numpy as np
 # Global variables for model and processor
 model = None
@@ -16,6 +15,7 @@ def load_model():
     print("Loading model...")
     model_id = "oddadmix/egyptian-code-switching-b4-g2-merged"
     model = Gemma3nForConditionalGeneration.from_pretrained(
         model_id,
         device_map="auto",
@@ -81,85 +81,6 @@ def transcribe_audio(audio_path, max_tokens=128):
     except Exception as e:
         return f"Error during transcription: {str(e)}"
-@spaces.GPU
-def live_transcribe(audio_stream, max_tokens=128):
-    """Transcribe audio stream in real-time"""
-    if model is None or processor is None:
-        yield "Error: Model not loaded"
-        return
-    if audio_stream is None:
-        yield "Waiting for audio input..."
-        return
-    try:
-        # Extract sample rate and audio data
-        sample_rate, audio_data = audio_stream
-        # Check if we have enough audio data (at least 1 second)
-        if len(audio_data) < sample_rate:
-            yield "Recording... (speak now)"
-            return
-        # Save temporary audio file
-        import tempfile
-        import soundfile as sf
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-            tmp_path = tmp_file.name
-            sf.write(tmp_path, audio_data, sample_rate)
-        try:
-            messages = [
-                {
-                    "role": "system",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": "You are an assistant that transcribes speech accurately.",
-                        }
-                    ],
-                },
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "audio", "url": tmp_path},
-                        {"type": "text", "text": "Please transcribe this audio."}
-                    ]
-                }
-            ]
-            inputs = processor.apply_chat_template(
-                messages,
-                add_generation_prompt=True,
-                tokenize=True,
-                return_dict=True,
-                return_tensors="pt",
-            ).to(model.device)
-            input_len = inputs["input_ids"].shape[-1]
-            # Generate transcription
-            with torch.inference_mode():
-                generation = model.generate(
-                    **inputs,
-                    max_new_tokens=max_tokens,
-                    do_sample=False
-                )
-                generation = generation[0][input_len:]
-            response = processor.decode(generation, skip_special_tokens=True)
-            yield response
-        finally:
-            # Clean up temporary file
-            if os.path.exists(tmp_path):
-                os.unlink(tmp_path)
-    except Exception as e:
-        yield f"Error during transcription: {str(e)}"
 # Load model at startup
 load_model()
@@ -169,97 +90,33 @@ with gr.Blocks(title="Egyptian Code Switching Audio Transcription") as demo:
         """
         # 🎙️ Egyptian Code Switching Audio Transcription
-        Choose between live transcription or file upload for automatic transcription.
         Specialized for Egyptian Arabic with English code-switching.
         """
     )
-    with gr.Tabs():
-        # Live Transcription Tab
-        with gr.Tab("Live Transcription"):
-            gr.Markdown(
-                """
-                ### 🔴 Live Transcription Mode
-                Click the microphone button below and start speaking. The transcription will update in real-time.
-                """
             )
-            with gr.Row():
-                with gr.Column():
-                    live_audio = gr.Audio(
-                        sources=["microphone"],
-                        type="numpy",
-                        label="Live Audio Input",
-                        streaming=True
-                    )
-                    live_max_tokens = gr.Slider(
-                        minimum=32,
-                        maximum=512,
-                        value=128,
-                        step=32,
-                        label="Max Output Tokens"
-                    )
-                with gr.Column():
-                    live_output = gr.Textbox(
-                        label="Live Transcription",
-                        placeholder="Start speaking and transcription will appear here...",
-                        lines=10,
-                        rtl=True
-                    )
-            # Set up live transcription
-            live_audio.stream(
-                fn=live_transcribe,
-                inputs=[live_audio, live_max_tokens],
-                outputs=live_output
-            )
-        # File Upload Tab
-        with gr.Tab("File Upload"):
-            gr.Markdown(
-                """
-                ### 📁 File Upload Mode
-                Upload an audio file or record your voice to get a transcription.
-                """
             )
-            with gr.Row():
-                with gr.Column():
-                    audio_input = gr.Audio(
-                        sources=["upload", "microphone"],
-                        type="filepath",
-                        label="Audio Input"
-                    )
-                    max_tokens_slider = gr.Slider(
-                        minimum=32,
-                        maximum=512,
-                        value=128,
-                        step=32,
-                        label="Max Output Tokens"
-                    )
-                    transcribe_btn = gr.Button("Transcribe", variant="primary")
-                with gr.Column():
-                    output_text = gr.Textbox(
-                        label="Transcription",
-                        placeholder="Your transcription will appear here...",
-                        lines=10,
-                        rtl=True
-                    )
-            # Set up the transcription action
-            transcribe_btn.click(
-                fn=transcribe_audio,
-                inputs=[audio_input, max_tokens_slider],
-                outputs=output_text
-            )
-            # Also allow transcription on audio upload/record
-            audio_input.change(
-                fn=transcribe_audio,
-                inputs=[audio_input, max_tokens_slider],
-                outputs=output_text
             )
     gr.Markdown(
@@ -267,10 +124,23 @@ with gr.Blocks(title="Egyptian Code Switching Audio Transcription") as demo:
         ### Tips:
         - For best results, use clear audio with minimal background noise
         - The model specializes in Egyptian Arabic with English code-switching
-        - Live mode: Speak in short segments for better results
-        - File mode: Recording length should be reasonable (under 30 seconds recommended)
         """
     )
 # Launch the app
 if __name__ == "__main__":

 from transformers import AutoProcessor, Gemma3nForConditionalGeneration
 import torch
 import os
 # Global variables for model and processor
 model = None
     print("Loading model...")
     model_id = "oddadmix/egyptian-code-switching-b4-g2-merged"
     model = Gemma3nForConditionalGeneration.from_pretrained(
         model_id,
         device_map="auto",
     except Exception as e:
         return f"Error during transcription: {str(e)}"
 # Load model at startup
 load_model()
         """
         # 🎙️ Egyptian Code Switching Audio Transcription
+        Upload an audio file or record your voice to get an automatic transcription.
         Specialized for Egyptian Arabic with English code-switching.
         """
     )
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                sources=["upload", "microphone"],
+                type="filepath",
+                label="Audio Input"
             )
+            max_tokens_slider = gr.Slider(
+                minimum=32,
+                maximum=512,
+                value=128,
+                step=32,
+                label="Max Output Tokens"
             )
+            transcribe_btn = gr.Button("Transcribe", variant="primary")
+        with gr.Column():
+            output_text = gr.Textbox(
+                label="Transcription",
+                placeholder="Your transcription will appear here...",
+                lines=10,
+                rtl=True
             )
     gr.Markdown(
         ### Tips:
         - For best results, use clear audio with minimal background noise
         - The model specializes in Egyptian Arabic with English code-switching
+        - Recording length should be reasonable (under 30 seconds recommended)
         """
     )
+    # Set up the transcription action
+    transcribe_btn.click(
+        fn=transcribe_audio,
+        inputs=[audio_input, max_tokens_slider],
+        outputs=output_text
+    )
+    # Also allow transcription on audio upload/record
+    audio_input.change(
+        fn=transcribe_audio,
+        inputs=[audio_input, max_tokens_slider],
+        outputs=output_text
+    )
 # Launch the app
 if __name__ == "__main__":