Spaces:

1morecupofhottea
/

CS-ASR

Sleeping

App Files Files Community

1morecupofhottea commited on Aug 23, 2025

Commit

ec35154

1 Parent(s): 962a236

Update design

Browse files

Files changed (1) hide show

app.py +249 -16

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import pipeline
 import re
 # Load both ASR models
 whisper_asr = pipeline("automatic-speech-recognition", model="1morecupofhottea/Whisper-Code-Switching-Kh-En")
@@ -11,28 +12,260 @@ def clean_transcript(text: str) -> str:
     return re.sub(r"</?[^>]+>", "", text).strip()
 def transcribe(audio, model_choice):
-    if model_choice == "Whisper":
-        result = whisper_asr(audio)
-        return result["text"]
-    else:
-        result = wav2vec_asr(audio)
-        return clean_transcript(result["text"])
-with gr.Blocks() as demo:
-    gr.Markdown("# 🎙️ ASR Demo: Choose Your Model")
-    gr.Markdown("Upload or record audio, then select which ASR model to use.")
-    with gr.Row():
-        audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input Audio")
-        model_selector = gr.Dropdown(choices=["Whisper", "Wav2Vec2"], value="Whisper", label="Select Model")
-    output_text = gr.Textbox(label="Transcription")
-    transcribe_button = gr.Button("Transcribe")
     transcribe_button.click(
         fn=transcribe,
         inputs=[audio_input, model_selector],
-        outputs=output_text
     )
-demo.launch()

 import gradio as gr
 from transformers import pipeline
 import re
+import time
 # Load both ASR models
 whisper_asr = pipeline("automatic-speech-recognition", model="1morecupofhottea/Whisper-Code-Switching-Kh-En")
     return re.sub(r"</?[^>]+>", "", text).strip()
 def transcribe(audio, model_choice):
+    if audio is None:
+        return "❌ Please upload or record an audio file first!"
+    try:
+        if model_choice == "🎯 Whisper (Recommended)":
+            result = whisper_asr(audio)
+            return f"✅ **Transcription Complete**\n\n{result['text']}"
+        else:
+            result = wav2vec_asr(audio)
+            cleaned_text = clean_transcript(result["text"])
+            return f"✅ **Transcription Complete**\n\n{cleaned_text}"
+    except Exception as e:
+        return f"❌ Error during transcription: {str(e)}"
+def clear_all():
+    return None, "🎯 Whisper (Recommended)", ""
+# Custom CSS for modern styling
+custom_css = """
+/* Global Styles */
+.gradio-container {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
+}
+/* Header Styling */
+.header-section {
+    background: rgba(255, 255, 255, 0.95);
+    border-radius: 20px;
+    padding: 30px;
+    margin-bottom: 25px;
+    box-shadow: 0 10px 30px rgba(0, 0, 0, 0.1);
+    backdrop-filter: blur(10px);
+    border: 1px solid rgba(255, 255, 255, 0.2);
+}
+.header-section h1 {
+    background: linear-gradient(45deg, #667eea, #764ba2);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-size: 2.5em !important;
+    font-weight: 700 !important;
+    text-align: center;
+    margin-bottom: 15px;
+}
+.header-section p {
+    color: #555;
+    font-size: 1.1em;
+    text-align: center;
+    margin: 0;
+    line-height: 1.6;
+}
+/* Main Content Cards */
+.input-card, .output-card {
+    background: rgba(255, 255, 255, 0.95);
+    border-radius: 15px;
+    padding: 25px;
+    box-shadow: 0 8px 25px rgba(0, 0, 0, 0.1);
+    backdrop-filter: blur(10px);
+    border: 1px solid rgba(255, 255, 255, 0.2);
+    margin-bottom: 20px;
+}
+/* Button Styling */
+.primary-button {
+    background: linear-gradient(45deg, #667eea, #764ba2) !important;
+    border: none !important;
+    border-radius: 12px !important;
+    padding: 15px 30px !important;
+    font-size: 1.1em !important;
+    font-weight: 600 !important;
+    color: white !important;
+    box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4) !important;
+    transition: all 0.3s ease !important;
+}
+.primary-button:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 8px 25px rgba(102, 126, 234, 0.6) !important;
+}
+.secondary-button {
+    background: linear-gradient(45deg, #ff6b6b, #ee5a24) !important;
+    border: none !important;
+    border-radius: 12px !important;
+    padding: 12px 25px !important;
+    font-size: 1em !important;
+    font-weight: 600 !important;
+    color: white !important;
+    box-shadow: 0 4px 12px rgba(255, 107, 107, 0.4) !important;
+}
+/* Audio Component Styling */
+.audio-component {
+    border-radius: 12px !important;
+    border: 2px solid #e1e8f7 !important;
+    background: #f8faff !important;
+}
+/* Dropdown Styling */
+.dropdown-component {
+    border-radius: 12px !important;
+    border: 2px solid #e1e8f7 !important;
+}
+/* Output Text Styling */
+.output-text {
+    background: #f8faff !important;
+    border-radius: 12px !important;
+    border: 2px solid #e1e8f7 !important;
+    padding: 20px !important;
+    font-size: 1.05em !important;
+    line-height: 1.6 !important;
+}
+/* Features Section */
+.features-section {
+    background: rgba(255, 255, 255, 0.95);
+    border-radius: 15px;
+    padding: 25px;
+    margin-top: 25px;
+    box-shadow: 0 8px 25px rgba(0, 0, 0, 0.1);
+    backdrop-filter: blur(10px);
+    border: 1px solid rgba(255, 255, 255, 0.2);
+}
+.feature-item {
+    margin-bottom: 10px;
+    color: #555;
+    font-size: 1.05em;
+}
+/* Responsive Design */
+@media (max-width: 768px) {
+    .header-section h1 {
+        font-size: 2em !important;
+    }
+    .input-card, .output-card {
+        padding: 20px;
+        margin-bottom: 15px;
+    }
+}
+"""
+# Create the main interface
+with gr.Blocks(css=custom_css, title="🎙️ CS-ASR | Code-Switching Speech Recognition") as demo:
+    # Header Section
+    with gr.Column(elem_classes="header-section"):
+        gr.HTML("""
+            <h1>🎙️ Code-Switching ASR Studio</h1>
+            <p>Advanced Speech Recognition for Khmer-English Code-Switching</p>
+            <p>✨ Powered by state-of-the-art Whisper and Wav2Vec2 models ✨</p>
+        """)
+    # Main Content
+    with gr.Row():
+        # Input Section
+        with gr.Column(scale=1, elem_classes="input-card"):
+            gr.HTML("<h3 style='color: #667eea; margin-bottom: 20px; font-weight: 600;'>🎵 Audio Input</h3>")
+            audio_input = gr.Audio(
+                sources=["microphone", "upload"],
+                type="filepath",
+                label="Record or Upload Audio",
+                elem_classes="audio-component"
+            )
+            model_selector = gr.Dropdown(
+                choices=[
+                    "🎯 Whisper (Recommended)",
+                    "⚡ Wav2Vec2 (Fast)"
+                ],
+                value="🎯 Whisper (Recommended)",
+                label="🤖 Select AI Model",
+                elem_classes="dropdown-component",
+                info="Choose the model that best fits your needs"
+            )
+            # Action Buttons
+            with gr.Row():
+                transcribe_button = gr.Button(
+                    "🚀 Start Transcription",
+                    variant="primary",
+                    elem_classes="primary-button",
+                    scale=2
+                )
+                clear_button = gr.Button(
+                    "🗑️ Clear All",
+                    elem_classes="secondary-button",
+                    scale=1
+                )
+        # Output Section
+        with gr.Column(scale=1, elem_classes="output-card"):
+            gr.HTML("<h3 style='color: #667eea; margin-bottom: 20px; font-weight: 600;'>📝 Transcription Result</h3>")
+            output_text = gr.Textbox(
+                label="Your Transcription Will Appear Here",
+                placeholder="🎤 Upload an audio file and click 'Start Transcription' to see the magic happen!",
+                lines=12,
+                elem_classes="output-text",
+                interactive=False
+            )
+    # Features Section
+    with gr.Column(elem_classes="features-section"):
+        gr.HTML("""
+            <h3 style='color: #667eea; margin-bottom: 20px; font-weight: 600; text-align: center;'>🌟 Key Features</h3>
+            <div style='display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 20px; margin-top: 20px;'>
+                <div class='feature-item'>
+                    <strong>🎯 Dual Model Support:</strong> Choose between Whisper and Wav2Vec2 for optimal results
+                </div>
+                <div class='feature-item'>
+                    <strong>🌍 Code-Switching Ready:</strong> Seamlessly handles Khmer-English mixed speech
+                </div>
+                <div class='feature-item'>
+                    <strong>🎤 Flexible Input:</strong> Record live or upload existing audio files
+                </div>
+                <div class='feature-item'>
+                    <strong>⚡ Real-time Processing:</strong> Fast and accurate transcription results
+                </div>
+                <div class='feature-item'>
+                    <strong>🎨 Modern Interface:</strong> Beautiful, responsive design for all devices
+                </div>
+                <div class='feature-item'>
+                    <strong>🔧 Easy to Use:</strong> No technical knowledge required - just click and transcribe!
+                </div>
+            </div>
+        """)
+    # Event Handlers
     transcribe_button.click(
         fn=transcribe,
         inputs=[audio_input, model_selector],
+        outputs=output_text,
+        show_progress=True
+    )
+    clear_button.click(
+        fn=clear_all,
+        outputs=[audio_input, model_selector, output_text]
+    )
+    # Auto-transcribe when audio is uploaded (optional)
+    audio_input.change(
+        fn=lambda audio, model: transcribe(audio, model) if audio is not None else "",
+        inputs=[audio_input, model_selector],
+        outputs=output_text,
+        show_progress=True
     )
+# Launch with custom configuration
+if __name__ == "__main__":
+    demo.launch()