๐ต Audio Transcriber & Translator
Upload audio files and get transcripts with English translation
Optimized for Hugging Face Spacesimport streamlit as st import whisper import tempfile import os import torch from datetime import datetime import warnings import gc # Suppress warnings warnings.filterwarnings("ignore") # Configure Streamlit page st.set_page_config( page_title="Audio Transcriber & Translator", page_icon="๐ต", layout="centered" ) # Custom CSS for better UI st.markdown(""" """, unsafe_allow_html=True) class M2M100Translator: def __init__(self): self.model_name = "facebook/m2m100_418M" self.tokenizer = None self.model = None # M2M100 language codes self.supported_languages = { 'af': 'Afrikaans', 'ar': 'Arabic', 'bg': 'Bulgarian', 'bn': 'Bengali', 'ca': 'Catalan', 'cs': 'Czech', 'da': 'Danish', 'de': 'German', 'el': 'Greek', 'en': 'English', 'es': 'Spanish', 'et': 'Estonian', 'fa': 'Persian', 'fi': 'Finnish', 'fr': 'French', 'gu': 'Gujarati', 'he': 'Hebrew', 'hi': 'Hindi', 'hr': 'Croatian', 'hu': 'Hungarian', 'id': 'Indonesian', 'it': 'Italian', 'ja': 'Japanese', 'ka': 'Georgian', 'kk': 'Kazakh', 'km': 'Khmer', 'kn': 'Kannada', 'ko': 'Korean', 'lt': 'Lithuanian', 'lv': 'Latvian', 'mk': 'Macedonian', 'ml': 'Malayalam', 'mn': 'Mongolian', 'mr': 'Marathi', 'ms': 'Malay', 'my': 'Myanmar', 'ne': 'Nepali', 'nl': 'Dutch', 'no': 'Norwegian', 'pl': 'Polish', 'pt': 'Portuguese', 'ro': 'Romanian', 'ru': 'Russian', 'si': 'Sinhala', 'sk': 'Slovak', 'sl': 'Slovenian', 'sq': 'Albanian', 'sr': 'Serbian', 'sv': 'Swedish', 'sw': 'Swahili', 'ta': 'Tamil', 'te': 'Telugu', 'th': 'Thai', 'tl': 'Tagalog', 'tr': 'Turkish', 'uk': 'Ukrainian', 'ur': 'Urdu', 'vi': 'Vietnamese', 'zh': 'Chinese' } def load_model(self): if self.model is None: try: from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer with st.spinner("๐ Loading M2M100 translation model..."): # Load tokenizer and model - simplified for HF Spaces self.tokenizer = M2M100Tokenizer.from_pretrained(self.model_name) self.model = M2M100ForConditionalGeneration.from_pretrained( self.model_name, torch_dtype=torch.float32 # Use float32 for CPU compatibility ) st.success("โ Translation model loaded successfully!") except Exception as e: st.error(f"โ Failed to load translation model: {str(e)}") st.info("๐ก Translation will be skipped. You can still get transcripts.") return False return True def get_language_name(self, lang_code): return self.supported_languages.get(lang_code, lang_code.upper()) def translate_text(self, text, source_language): if not text or not text.strip(): return {"success": False, "error": "Empty text provided"} # If already English, return as is if source_language == 'en': return { "success": True, "original_text": text, "translated_text": text, "source_language": source_language, "note": "Source is already English" } # Check if source language is supported if source_language not in self.supported_languages: return { "success": False, "error": f"Language '{source_language}' not supported", "original_text": text, "source_language": source_language } if not self.load_model(): return { "success": False, "error": "Translation model not available", "original_text": text, "source_language": source_language } try: # Set source language self.tokenizer.src_lang = source_language # Tokenize input with length limits for HF Spaces inputs = self.tokenizer( text, return_tensors="pt", padding=True, truncation=True, max_length=200 # Reduced for faster processing ) # Generate translation with torch.no_grad(): generated_tokens = self.model.generate( **inputs, forced_bos_token_id=self.tokenizer.get_lang_id("en"), max_length=250, num_beams=2, # Reduced beams for speed early_stopping=True, do_sample=False ) # Decode translation translated_text = self.tokenizer.batch_decode( generated_tokens, skip_special_tokens=True )[0] # Clear memory del inputs, generated_tokens gc.collect() return { "success": True, "original_text": text, "translated_text": translated_text.strip(), "source_language": source_language, "model_used": self.model_name } except Exception as e: return { "success": False, "error": str(e), "original_text": text, "source_language": source_language } @st.cache_resource def load_whisper_model(): """Load Whisper model with caching - optimized for HF Spaces""" try: # Use tiny model for faster loading and processing on HF Spaces model = whisper.load_model("tiny") return model except Exception as e: st.error(f"Failed to load Whisper model: {e}") return None @st.cache_resource def load_translator(): """Load translator with caching""" return M2M100Translator() def transcribe_audio(audio_file): """Transcribe uploaded audio file""" try: # Create temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_file.write(audio_file.read()) tmp_file_path = tmp_file.name model = load_whisper_model() if model is None: return {"success": False, "error": "Whisper model not available"} # Transcribe with optimized settings for HF Spaces result = model.transcribe( tmp_file_path, fp16=False, # Use fp32 for better compatibility task="transcribe" ) # Clean up os.unlink(tmp_file_path) gc.collect() return { "success": True, "transcript": result["text"].strip(), "language": result["language"] } except Exception as e: if 'tmp_file_path' in locals(): try: os.unlink(tmp_file_path) except: pass return {"success": False, "error": str(e)} def main(): # Header st.markdown("""
Upload audio files and get transcripts with English translation
Optimized for Hugging Face Spaces๐ต Powered by OpenAI Whisper & Facebook M2M100
Running on Hugging Face Spaces ๐ค