Spaces:

prachi1507
/

AudioTranscriberTranslator15

Runtime error

App Files Files Community

prachi1507 commited on Jun 25, 2025

Commit

3909dfe

verified ·

1 Parent(s): cd41c00

create app.py

Browse files

Files changed (1) hide show

app.py +461 -0

app.py ADDED Viewed

	@@ -0,0 +1,461 @@

+import streamlit as st
+import whisper
+import tempfile
+import os
+import torch
+from datetime import datetime
+import warnings
+import gc
+# Suppress warnings
+warnings.filterwarnings("ignore")
+# Configure Streamlit page
+st.set_page_config(
+    page_title="Audio Transcriber & Translator",
+    page_icon="🎵",
+    layout="centered"
+)
+# Custom CSS for better UI
+st.markdown("""
+<style>
+    .main-header {
+        text-align: center;
+        padding: 2rem 0;
+        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        border-radius: 10px;
+        margin-bottom: 2rem;
+    }
+    .result-section {
+        background: #f8f9fa;
+        padding: 1.5rem;
+        border-radius: 10px;
+        margin: 1rem 0;
+        border-left: 4px solid #667eea;
+    }
+    .download-section {
+        background: #e8f5e8;
+        padding: 1.5rem;
+        border-radius: 10px;
+        margin-top: 1.5rem;
+        text-align: center;
+    }
+    .language-badge {
+        background: #667eea;
+        color: white;
+        padding: 0.5rem 1rem;
+        border-radius: 20px;
+        font-weight: bold;
+        display: inline-block;
+        margin-bottom: 1rem;
+    }
+    .warning-box {
+        background: #fff3cd;
+        border: 1px solid #ffeaa7;
+        padding: 1rem;
+        border-radius: 8px;
+        margin: 1rem 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+class M2M100Translator:
+    def __init__(self):
+        self.model_name = "facebook/m2m100_418M"
+        self.tokenizer = None
+        self.model = None
+        # M2M100 language codes
+        self.supported_languages = {
+            'af': 'Afrikaans', 'ar': 'Arabic', 'bg': 'Bulgarian', 'bn': 'Bengali',
+            'ca': 'Catalan', 'cs': 'Czech', 'da': 'Danish', 'de': 'German',
+            'el': 'Greek', 'en': 'English', 'es': 'Spanish', 'et': 'Estonian',
+            'fa': 'Persian', 'fi': 'Finnish', 'fr': 'French', 'gu': 'Gujarati',
+            'he': 'Hebrew', 'hi': 'Hindi', 'hr': 'Croatian', 'hu': 'Hungarian',
+            'id': 'Indonesian', 'it': 'Italian', 'ja': 'Japanese', 'ka': 'Georgian',
+            'kk': 'Kazakh', 'km': 'Khmer', 'kn': 'Kannada', 'ko': 'Korean',
+            'lt': 'Lithuanian', 'lv': 'Latvian', 'mk': 'Macedonian', 'ml': 'Malayalam',
+            'mn': 'Mongolian', 'mr': 'Marathi', 'ms': 'Malay', 'my': 'Myanmar',
+            'ne': 'Nepali', 'nl': 'Dutch', 'no': 'Norwegian', 'pl': 'Polish',
+            'pt': 'Portuguese', 'ro': 'Romanian', 'ru': 'Russian', 'si': 'Sinhala',
+            'sk': 'Slovak', 'sl': 'Slovenian', 'sq': 'Albanian', 'sr': 'Serbian',
+            'sv': 'Swedish', 'sw': 'Swahili', 'ta': 'Tamil', 'te': 'Telugu',
+            'th': 'Thai', 'tl': 'Tagalog', 'tr': 'Turkish', 'uk': 'Ukrainian',
+            'ur': 'Urdu', 'vi': 'Vietnamese', 'zh': 'Chinese'
+        }
+    def load_model(self):
+        if self.model is None:
+            try:
+                from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
+                with st.spinner("🔄 Loading M2M100 translation model..."):
+                    # Load tokenizer and model - simplified for HF Spaces
+                    self.tokenizer = M2M100Tokenizer.from_pretrained(self.model_name)
+                    self.model = M2M100ForConditionalGeneration.from_pretrained(
+                        self.model_name,
+                        torch_dtype=torch.float32  # Use float32 for CPU compatibility
+                    )
+                st.success("✅ Translation model loaded successfully!")
+            except Exception as e:
+                st.error(f"❌ Failed to load translation model: {str(e)}")
+                st.info("💡 Translation will be skipped. You can still get transcripts.")
+                return False
+        return True
+    def get_language_name(self, lang_code):
+        return self.supported_languages.get(lang_code, lang_code.upper())
+    def translate_text(self, text, source_language):
+        if not text or not text.strip():
+            return {"success": False, "error": "Empty text provided"}
+        # If already English, return as is
+        if source_language == 'en':
+            return {
+                "success": True,
+                "original_text": text,
+                "translated_text": text,
+                "source_language": source_language,
+                "note": "Source is already English"
+            }
+        # Check if source language is supported
+        if source_language not in self.supported_languages:
+            return {
+                "success": False,
+                "error": f"Language '{source_language}' not supported",
+                "original_text": text,
+                "source_language": source_language
+            }
+        if not self.load_model():
+            return {
+                "success": False,
+                "error": "Translation model not available",
+                "original_text": text,
+                "source_language": source_language
+            }
+        try:
+            # Set source language
+            self.tokenizer.src_lang = source_language
+            # Tokenize input with length limits for HF Spaces
+            inputs = self.tokenizer(
+                text,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=200  # Reduced for faster processing
+            )
+            # Generate translation
+            with torch.no_grad():
+                generated_tokens = self.model.generate(
+                    **inputs,
+                    forced_bos_token_id=self.tokenizer.get_lang_id("en"),
+                    max_length=250,
+                    num_beams=2,  # Reduced beams for speed
+                    early_stopping=True,
+                    do_sample=False
+                )
+            # Decode translation
+            translated_text = self.tokenizer.batch_decode(
+                generated_tokens,
+                skip_special_tokens=True
+            )[0]
+            # Clear memory
+            del inputs, generated_tokens
+            gc.collect()
+            return {
+                "success": True,
+                "original_text": text,
+                "translated_text": translated_text.strip(),
+                "source_language": source_language,
+                "model_used": self.model_name
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "original_text": text,
+                "source_language": source_language
+            }
+@st.cache_resource
+def load_whisper_model():
+    """Load Whisper model with caching - optimized for HF Spaces"""
+    try:
+        # Use tiny model for faster loading and processing on HF Spaces
+        model = whisper.load_model("tiny")
+        return model
+    except Exception as e:
+        st.error(f"Failed to load Whisper model: {e}")
+        return None
+@st.cache_resource
+def load_translator():
+    """Load translator with caching"""
+    return M2M100Translator()
+def transcribe_audio(audio_file):
+    """Transcribe uploaded audio file"""
+    try:
+        # Create temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+            tmp_file.write(audio_file.read())
+            tmp_file_path = tmp_file.name
+        model = load_whisper_model()
+        if model is None:
+            return {"success": False, "error": "Whisper model not available"}
+        # Transcribe with optimized settings for HF Spaces
+        result = model.transcribe(
+            tmp_file_path,
+            fp16=False,  # Use fp32 for better compatibility
+            task="transcribe"
+        )
+        # Clean up
+        os.unlink(tmp_file_path)
+        gc.collect()
+        return {
+            "success": True,
+            "transcript": result["text"].strip(),
+            "language": result["language"]
+        }
+    except Exception as e:
+        if 'tmp_file_path' in locals():
+            try:
+                os.unlink(tmp_file_path)
+            except:
+                pass
+        return {"success": False, "error": str(e)}
+def main():
+    # Header
+    st.markdown("""
+    <div class="main-header">
+        <h1>🎵 Audio Transcriber & Translator</h1>
+        <p>Upload audio files and get transcripts with English translation</p>
+        <small>Optimized for Hugging Face Spaces</small>
+    </div>
+    """, unsafe_allow_html=True)
+    # HF Spaces notice
+    st.markdown("""
+    <div class="warning-box">
+        <strong>🚀 Hugging Face Spaces Version</strong><br>
+        • Using Whisper-tiny for faster processing<br>
+        • File limit: 10MB, Duration: 5 minutes<br>
+        • Processing may take 1-2 minutes
+    </div>
+    """, unsafe_allow_html=True)
+    # Show system info in sidebar
+    with st.sidebar:
+        st.header("🔧 System Info")
+        st.info("Running on Hugging Face Spaces")
+        st.info(f"PyTorch: {torch.__version__}")
+        st.warning("Using CPU (optimized for HF Spaces)")
+        st.header("🌍 Models")
+        st.info("• Whisper: tiny (fast)")
+        st.info("• Translation: M2M100-418M")
+        with st.expander("💡 Tips"):
+            st.caption("• Use shorter audio files (< 5 min)")
+            st.caption("• MP3/WAV work best")
+            st.caption("• Clear speech gives better results")
+            st.caption("• Processing takes 1-2 minutes")
+    # File uploader with restrictions for HF Spaces
+    uploaded_file = st.file_uploader(
+        "🎵 Choose an audio file",
+        type=['mp3', 'wav', 'mp4', 'm4a'],
+        help="Supported: MP3, WAV, MP4, M4A | Max: 10MB, 5 minutes"
+    )
+    if uploaded_file is not None:
+        # File size check
+        file_size_mb = uploaded_file.size / (1024 * 1024)
+        if file_size_mb > 10:
+            st.error("❌ File too large! Please use files under 10MB for optimal performance on HF Spaces.")
+            return
+        st.success(f"📁 **{uploaded_file.name}** ({file_size_mb:.2f} MB)")
+        # Processing options
+        col1, col2 = st.columns(2)
+        with col1:
+            transcribe_only = st.checkbox("Transcribe only (faster)", value=False)
+        with col2:
+            if st.button("🧹 Clear Cache", help="Clear models from memory"):
+                st.cache_resource.clear()
+                st.success("Cache cleared!")
+        # Process button
+        if st.button("🚀 Process Audio", type="primary", use_container_width=True):
+            start_time = datetime.now()
+            # Step 1: Transcription
+            with st.spinner("🎤 Transcribing audio... (this may take 1-2 minutes)"):
+                transcription_result = transcribe_audio(uploaded_file)
+            if transcription_result["success"]:
+                transcript = transcription_result["transcript"]
+                detected_language = transcription_result["language"]
+                # Get language name
+                translator = load_translator()
+                language_name = translator.get_language_name(detected_language)
+                # Display transcription results
+                st.markdown("""
+                <div class="result-section">
+                    <h3>📝 Transcription Results</h3>
+                </div>
+                """, unsafe_allow_html=True)
+                # Language badge
+                st.markdown(f"""
+                <div class="language-badge">
+                    🌍 Detected: {language_name} ({detected_language})
+                </div>
+                """, unsafe_allow_html=True)
+                # Transcript
+                st.text_area(
+                    "Original Transcript",
+                    transcript,
+                    height=150,
+                    key="transcript"
+                )
+                # Step 2: Translation (if requested)
+                if not transcribe_only and detected_language != 'en':
+                    with st.spinner("🌍 Translating to English..."):
+                        translation_result = translator.translate_text(transcript, detected_language)
+                    if translation_result["success"]:
+                        translated_text = translation_result["translated_text"]
+                        st.markdown("""
+                        <div class="result-section">
+                            <h3>🌍 English Translation</h3>
+                        </div>
+                        """, unsafe_allow_html=True)
+                        st.text_area(
+                            "English Translation",
+                            translated_text,
+                            height=150,
+                            key="translation"
+                        )
+                        # Download section
+                        st.markdown("""
+                        <div class="download-section">
+                            <h4>📥 Download Results</h4>
+                        </div>
+                        """, unsafe_allow_html=True)
+                        # Prepare download content
+                        full_content = f"""Audio Transcription & Translation
+{'='*60}
+File: {uploaded_file.name}
+Size: {file_size_mb:.2f} MB
+Detected Language: {language_name} ({detected_language})
+Processing Time: {(datetime.now() - start_time).total_seconds():.1f} seconds
+Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+{'='*60}
+ORIGINAL TRANSCRIPT ({language_name}):
+{transcript}
+ENGLISH TRANSLATION:
+{translated_text}
+{'='*60}
+Processed with Whisper (tiny) + M2M100 on Hugging Face Spaces
+"""
+                        st.download_button(
+                            "📄 Download Complete Results",
+                            full_content,
+                            file_name=f"{os.path.splitext(uploaded_file.name)[0]}_results.txt",
+                            mime="text/plain",
+                            use_container_width=True
+                        )
+                    else:
+                        st.error(f"❌ Translation failed: {translation_result['error']}")
+                        # Still offer transcript download
+                        transcript_content = f"""Audio Transcription
+{'='*50}
+File: {uploaded_file.name}
+Language: {language_name} ({detected_language})
+Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+{'='*50}
+{transcript}
+"""
+                        st.download_button(
+                            "📄 Download Transcript",
+                            transcript_content,
+                            file_name=f"{os.path.splitext(uploaded_file.name)[0]}_transcript.txt",
+                            mime="text/plain"
+                        )
+                elif transcribe_only or detected_language == 'en':
+                    # Transcript only
+                    transcript_content = f"""Audio Transcription
+{'='*50}
+File: {uploaded_file.name}
+Language: {language_name} ({detected_language})
+Processing Time: {(datetime.now() - start_time).total_seconds():.1f} seconds
+Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+{'='*50}
+{transcript}
+"""
+                    st.download_button(
+                        "📄 Download Transcript",
+                        transcript_content,
+                        file_name=f"{os.path.splitext(uploaded_file.name)[0]}_transcript.txt",
+                        mime="text/plain",
+                        use_container_width=True
+                    )
+                # Show processing time
+                processing_time = (datetime.now() - start_time).total_seconds()
+                st.success(f"✅ Processing completed in {processing_time:.1f} seconds")
+            else:
+                st.error(f"❌ Transcription failed: {transcription_result['error']}")
+                st.info("💡 Try with a different audio file or format")
+    # Footer
+    st.markdown("---")
+    st.markdown("""
+    <div style="text-align: center; color: #666; padding: 1rem;">
+        <p>🎵 Powered by OpenAI Whisper & Facebook M2M100</p>
+        <p>Running on Hugging Face Spaces 🤗</p>
+    </div>
+    """, unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()