Spaces:

Mohamed7733
/

Video_translator

Build error

App Files Files Community

Mohamed7733 commited on Apr 9, 2025

Commit

ae52466

verified ·

1 Parent(s): 77480d2

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -40

app.py CHANGED Viewed

@@ -1,53 +1,125 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import torch
-import numpy as np
-import scipy.io.wavfile as wav
-# Use a TTS model like 'espnet/kan-bayashi_ljspeech_tts'
-model_name = "espnet/kan-bayashi_ljspeech_tts"  # Change to a valid TTS model
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 # Initialize FastAPI app
 app = FastAPI()
-# Function to convert text to speech
-def text_to_speech(text: str):
-    # Convert text to model format
-    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
-    # Generate the speech
-    with torch.no_grad():
-        output = model.generate(**inputs)
-    # Convert the output to a numpy array (audio waveform)
-    waveform = output.numpy().squeeze()
-    # Normalize the audio to make it clearer
-    waveform = waveform / np.max(np.abs(waveform))  # Normalize to range [-1, 1]
-    # Save the audio to a file
-    file_path = "/tmp/output.wav"
-    wav.write(file_path, 16000, (waveform * 32767).astype(np.int16))  # Convert to 16-bit PCM
-    return file_path
-# Define request model
-class TextRequest(BaseModel):
-    text: str
-# API endpoint to convert text to speech
-@app.post("/text_to_speech/")
-async def convert_text_to_speech(request: TextRequest):
-    text = request.text
-    # Generate speech from text
-    file_path = text_to_speech(text)
-    # Return the audio file as response
-    with open(file_path, "rb") as f:
-        audio_data = f.read()
-    return {"audio": audio_data}

+import os
+import tempfile
+import subprocess
+from fastapi import FastAPI, UploadFile, File
+import whisper
+from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 import torch
+from datetime import timedelta
+from deep_translator import GoogleTranslator
+import ffmpeg
 # Initialize FastAPI app
 app = FastAPI()
+def format_time(seconds):
+    # Convert seconds to SRT format (00:00:00,000)
+    td = timedelta(seconds=seconds)
+    hours, remainder = divmod(td.seconds, 3600)
+    minutes, seconds = divmod(remainder, 60)
+    milliseconds = td.microseconds // 1000
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+def extract_audio(video_path):
+    # Extract audio from video using ffmpeg
+    temp_dir = tempfile.gettempdir()
+    audio_path = os.path.join(temp_dir, "extracted_audio.wav")
+    # Use ffmpeg to extract audio
+    ffmpeg.input(video_path).output(audio_path, format='wav').run()
+    return audio_path
+def transcribe_audio(audio_path):
+    # Transcribe audio to text using Whisper model
+    try:
+        # Load the Whisper model
+        model = whisper.load_model("base")  # Load the Whisper model
+        result = model.transcribe(audio_path)
+        return result["segments"]
+    except Exception as e:
+        print(f"Error using whisper model: {e}")
+        return []
+def translate_text(text):
+    # Translate text from English to Arabic
+    translator = GoogleTranslator(source='en', target='ar')
+    return translator.translate(text)
+def create_srt(segments, output_path):
+    # Create an SRT file from translated segments ensuring proper encoding
+    with open(output_path, 'w', encoding='utf-8-sig') as srt_file:  # UTF-8 with BOM for compatibility
+        for i, segment in enumerate(segments, start=1):
+            if hasattr(segment, 'get'):  # Handle variations in output models
+                start_time = segment.get('start', 0)
+                end_time = segment.get('end', 0)
+                text = segment.get('text', '')
+                translation = segment.get('translation', '')
+            else:
+                start_time = segment.start
+                end_time = segment.end
+                text = segment.text
+                translation = getattr(segment, 'translation', text)  # Use the original text if no translation
+            # Fixed the string formatting error
+            srt_file.write(f"{i}\n")
+            srt_file.write(f"{format_time(start_time)} --> {format_time(end_time)}\n")
+            srt_file.write(f"{translation}\n\n")
+def burn_subtitles(video_path, srt_path, output_path):
+    # Burn subtitles into video using FFmpeg with Arabic support
+    font_path = "/usr/share/fonts/truetype/Amiri-Regular.ttf"  # Path to Amiri font
+    cmd = [
+        'ffmpeg', '-y',
+        '-i', video_path,
+        '-vf', f"subtitles='{srt_path}':force_style='FontName={font_path},FontSize=24,PrimaryColour=&HFFFFFF,OutlineColour=&H000000,BorderStyle=3,Alignment=2,Encoding=1'",
+        '-sub_charenc', 'UTF-8',
+        '-c:v', 'libx264', '-crf', '18',
+        '-c:a', 'copy',
+        output_path
+    ]
+    try:
+        subprocess.run(cmd, check=True)
+        return output_path
+    except subprocess.CalledProcessError as e:
+        print(f"FFmpeg error: {e}")
+        return None
+def process_video(video_path):
+    # Process the video: extract audio, transcribe, translate, create SRT, burn subtitles
+    temp_dir = tempfile.gettempdir()
+    file_name = os.path.splitext(os.path.basename(video_path))[0]
+    audio_path = extract_audio(video_path)
+    segments = transcribe_audio(audio_path)
+    translated_segments = []
+    for i, segment in enumerate(segments):
+        text = segment.text if hasattr(segment, 'text') else segment.get('text', '')
+        translated_text = translate_text(text)
+        segment.translation = translated_text
+        translated_segments.append(segment)
+    srt_path = os.path.join(temp_dir, f"{file_name}.srt")
+    create_srt(translated_segments, srt_path)
+    output_path = os.path.join(temp_dir, f"{file_name}_translated.mp4")
+    result_path = burn_subtitles(video_path, srt_path, output_path)
+    return result_path, srt_path
+# API endpoint to process video
+@app.post("/process_video/")
+async def process_video_endpoint(file: UploadFile = File(...)):
+    # API to process video and generate translated subtitles
+    temp_dir = tempfile.gettempdir()
+    file_path = os.path.join(temp_dir, file.filename)
+    with open(file_path, "wb") as f:
+        f.write(await file.read())
+    result_path, srt_path = process_video(file_path)
+    return {"video_url": result_path, "srt_url": srt_path}