Spaces:

Artificial-superintelligence
/

TranslateAi

Paused

App Files Files Community

Artificial-superintelligence commited on Oct 17, 2024

Commit

13dbd66

verified ·

1 Parent(s): 904c22d

Create app.py

Browse files

Files changed (1) hide show

app.py +277 -0

app.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import streamlit as st
+import moviepy.editor as mp
+import speech_recognition as sr
+from deep_translator import GoogleTranslator
+import tempfile
+import os
+from pydub import AudioSegment
+import torch
+from TTS.api import TTS
+import pyttsx3
+import numpy as np
+from scipy.io import wavfile
+import soundfile as sf
+class EnhancedVideoTranslator:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Initialize Coqui TTS
+        try:
+            self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2").to(self.device)
+        except:
+            # Fallback to a smaller model if XTTS fails
+            self.tts = TTS(model_name="tts_models/multilingual/multi-dataset/bark").to(self.device)
+        # Initialize pyttsx3 as backup
+        self.pyttsx3_engine = pyttsx3.init()
+    def extract_audio(self, video_path):
+        video = mp.VideoFileClip(video_path)
+        audio = video.audio
+        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+        audio.write_audiofile(temp_audio.name)
+        return temp_audio.name
+    def enhance_audio(self, audio_path):
+        audio = AudioSegment.from_wav(audio_path)
+        # Noise reduction and enhancement
+        enhanced = audio.high_pass_filter(80)
+        enhanced = enhanced.low_pass_filter(7500)
+        enhanced = enhanced.normalize()
+        # Apply compression
+        enhanced = enhanced.compress_dynamic_range()
+        enhanced_path = audio_path.replace('.wav', '_enhanced.wav')
+        enhanced.export(enhanced_path, format="wav")
+        return enhanced_path
+    def speech_to_text(self, audio_path):
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(audio_path) as source:
+            recognizer.adjust_for_ambient_noise(source)
+            audio = recognizer.record(source)
+        try:
+            # Try multiple language detection
+            text = recognizer.recognize_google(audio)
+            return text
+        except Exception as e:
+            return str(e)
+    def translate_text(self, text, target_lang):
+        translator = GoogleTranslator(source='auto', target=target_lang)
+        return translator.translate(text)
+    def text_to_speech_coqui(self, text, lang):
+        try:
+            temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+            # Use language-specific voices if available
+            if lang == 'ta':
+                speaker = "tamil_female"
+            elif lang == 'hi':
+                speaker = "hindi_female"
+            else:
+                speaker = None
+            self.tts.tts_to_file(
+                text=text,
+                file_path=temp_audio.name,
+                speaker=speaker,
+                language=lang
+            )
+            return temp_audio.name
+        except Exception as e:
+            print(f"Coqui TTS failed: {e}")
+            return self.text_to_speech_pyttsx3(text, lang)
+    def text_to_speech_pyttsx3(self, text, lang):
+        try:
+            temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+            # Configure pyttsx3
+            engine = self.pyttsx3_engine
+            # Set language properties
+            if lang == 'ta':
+                engine.setProperty('voice', 'tamil')
+            elif lang == 'hi':
+                engine.setProperty('voice', 'hindi')
+            # Adjust voice properties
+            engine.setProperty('rate', 150)    # Speed
+            engine.setProperty('volume', 0.9)  # Volume
+            # Save to file
+            engine.save_to_file(text, temp_audio.name)
+            engine.runAndWait()
+            return temp_audio.name
+        except Exception as e:
+            print(f"pyttsx3 TTS failed: {e}")
+            return None
+    def improve_audio_quality(self, audio_path):
+        # Load audio
+        audio, sr = sf.read(audio_path)
+        # Apply basic audio improvements
+        audio = audio * 1.5  # Increase volume
+        audio = np.clip(audio, -1, 1)  # Prevent clipping
+        # Save improved audio
+        improved_path = audio_path.replace('.wav', '_improved.wav')
+        sf.write(improved_path, audio, sr)
+        return improved_path
+def main():
+    st.title("Enhanced AI Video Translator")
+    st.write("Free and Open Source Video Translation with Realistic TTS")
+    LANGUAGES = {
+        'English': 'en',
+        'Tamil': 'ta',
+        'Hindi': 'hi',
+        'Telugu': 'te',
+        'Malayalam': 'ml',
+        'Kannada': 'kn',
+        'Spanish': 'es',
+        'French': 'fr',
+        'German': 'de',
+        'Japanese': 'ja',
+        'Chinese': 'zh',
+        'Korean': 'ko'
+    }
+    translator = EnhancedVideoTranslator()
+    # Improved UI
+    st.markdown("""
+    <style>
+    .stButton>button {
+        background-color: #4CAF50;
+        color: white;
+        padding: 10px 24px;
+        border-radius: 5px;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+    video_file = st.file_uploader(
+        "Upload your video",
+        type=['mp4', 'avi', 'mov'],
+        help="Supported formats: MP4, AVI, MOV"
+    )
+    if video_file:
+        st.video(video_file)
+        col1, col2 = st.columns(2)
+        with col1:
+            target_language = st.selectbox(
+                "Target Language",
+                list(LANGUAGES.keys())
+            )
+        with col2:
+            tts_engine = st.selectbox(
+                "TTS Engine",
+                ["Coqui TTS", "pyttsx3"]
+            )
+        # Advanced options
+        with st.expander("Advanced Settings"):
+            quality_enhancement = st.checkbox("Enable Audio Enhancement", True)
+            speed = st.slider("Speech Speed", 0.5, 2.0, 1.0, 0.1)
+            volume = st.slider("Volume", 0.0, 2.0, 1.0, 0.1)
+        if st.button("Translate Video"):
+            try:
+                progress_bar = st.progress(0)
+                status = st.empty()
+                # Process video
+                temp_video = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+                temp_video.write(video_file.read())
+                # Extract audio
+                status.text("Extracting audio...")
+                progress_bar.progress(20)
+                audio_path = translator.extract_audio(temp_video.name)
+                if quality_enhancement:
+                    audio_path = translator.enhance_audio(audio_path)
+                # Speech to text
+                status.text("Converting speech to text...")
+                progress_bar.progress(40)
+                original_text = translator.speech_to_text(audio_path)
+                # Translate
+                status.text("Translating...")
+                progress_bar.progress(60)
+                translated_text = translator.translate_text(
+                    original_text,
+                    LANGUAGES[target_language]
+                )
+                # Text to speech
+                status.text("Generating speech...")
+                progress_bar.progress(80)
+                if tts_engine == "Coqui TTS":
+                    translated_audio = translator.text_to_speech_coqui(
+                        translated_text,
+                        LANGUAGES[target_language]
+                    )
+                else:
+                    translated_audio = translator.text_to_speech_pyttsx3(
+                        translated_text,
+                        LANGUAGES[target_language]
+                    )
+                if quality_enhancement:
+                    translated_audio = translator.improve_audio_quality(translated_audio)
+                # Create final video
+                status.text("Creating final video...")
+                progress_bar.progress(90)
+                output_path = "translated_video.mp4"
+                video = mp.VideoFileClip(temp_video.name)
+                audio = mp.AudioFileClip(translated_audio)
+                final_video = video.set_audio(audio)
+                final_video.write_videofile(output_path)
+                progress_bar.progress(100)
+                status.text("Complete!")
+                # Display results
+                st.success("Translation completed!")
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.subheader("Original Text")
+                    st.write(original_text)
+                with col2:
+                    st.subheader("Translated Text")
+                    st.write(translated_text)
+                st.subheader("Translated Video")
+                st.video(output_path)
+                # Cleanup
+                for file in [temp_video.name, audio_path, translated_audio, output_path]:
+                    if os.path.exists(file):
+                        os.unlink(file)
+            except Exception as e:
+                st.error(f"An error occurred: {str(e)}")
+if __name__ == "__main__":
+    main()