Upload 8 files

Browse files

Files changed (8) hide show

extract_audio.py +18 -0
main.py +156 -0
moderator.py +29 -0
requirements.txt +16 -0
shorts_generator.py +105 -0
subtitles.py +67 -0
transcript_detect.py +47 -0
translation.py +48 -0

extract_audio.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from moviepy.editor import VideoFileClip
+class VideoHelper(object):
+    def extract_audio(self,video_path, audio_path):
+        # Load the video file
+        video = VideoFileClip(video_path)
+        # Extract the audio
+        audio = video.audio
+        # Write the audio to a file
+        audio.write_audiofile(audio_path)
+        # Close the video clip
+        video.close()

main.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import json
+from altair import value
+from matplotlib.streamplot import OutOfBounds
+from sympy import substitution, viete
+from extract_audio import VideoHelper
+from helpers.srt_generator import SRTGenerator
+from moderator import DetoxifyModerator
+from shorts_generator import ShortsGenerator
+from subtitles import SubtitlesRenderer
+from transcript_detect import *
+from translation import *
+import gradio as gr
+from dotenv import load_dotenv
+def translate_segments(segments,translator: TranslationModel,from_lang,to_lang):
+    transalted_segments = []
+    for segment in segments:
+        translated_segment_text = translator.translate_text(segment['text'],from_lang,to_lang)
+        transalted_segments.append({'text':translated_segment_text,'start':segment['start'],'end':segment['end'],'id':segment['id']})
+    return transalted_segments
+def main(file,translate_to_lang):
+    #Extracting the audio from video
+    video_file_path = file
+    audio_file_path = 'extracted_audio.mp3'
+    video_helper = VideoHelper()
+    print('Extracting audio from video...')
+    video_helper.extract_audio(video_file_path, audio_file_path)
+    whisper_model = WhisperModel('base')
+    print('Transcriping audio file....')
+    transcription = whisper_model.transcribe_audio(audio_file_path)
+    print('Generating transctipt text...')
+    transcript_text = whisper_model.get_text(transcription)
+    print('Detecting audio language....')
+    detected_language = whisper_model.get_detected_language(transcription)
+    print('Generating transcript segments...')
+    transcript_segments = whisper_model.get_segments(transcription)
+    # Write the transcription to a text file
+    print('Writing transcript into text file...')
+    transcript_file_path = "transcript.txt"
+    with open(transcript_file_path, "w",encoding="utf-8") as file:
+        file.write(transcript_text)
+    # Translate transcript
+    translation_model = TranslationModel()
+    target_language = supported_languages[translate_to_lang]
+    print(f'Translating transcript text from {detected_language} to {target_language}...')
+    transalted_text = translation_model.translate_text(transcript_text,detected_language,target_language)
+    # print(f'Translating transcript segments from {detected_language} to {target_language}...')
+    # transalted_segments = translate_segments(transcript_segments,translation_model,detected_language,target_language)
+    # Write the translation to a text file
+    print('Writing translation text file...')
+    translation_file_path = "translation.txt"
+    with open(translation_file_path, "w",encoding="utf-8") as file:
+        file.write(transalted_text)
+    print('Writing transcsript segments and translated segments to json file...')
+    segments_file_path = "segments.json"
+    with open(segments_file_path, "w",encoding="utf-8") as file:
+        json.dump(transcript_segments, file,ensure_ascii=False)
+    # print('Writing transcsript segments and translated segments to json file...')
+    # translated_segments_file_path = "translated_segments.json"
+    # with open(translated_segments_file_path, "w",encoding="utf-8") as file:
+    #     json.dump(transalted_segments, file,ensure_ascii=False)
+    #Run Moderator to detect toxicity
+    print('Analyzing and detecing toxicity levels...')
+    detoxify_moderator = DetoxifyModerator()
+    result = detoxify_moderator.detect_toxicity(transcript_text)
+    df = detoxify_moderator.format_results(result)
+    #Render subtitles on video
+    renderer = SubtitlesRenderer()
+    subtitles_file_path = 'segments.json'
+    output_file_path = 'subtitled_video.mp4'
+    subtitled_video = renderer.add_subtitles(video_file_path,subtitles_file_path,output_file_path)
+    # Generate short videos from video
+    output_srt_file = 'subtitles.srt'
+    print('Generating SRT file...')
+    #Generate srt file
+    SRTGenerator.generate_srt(transcript_segments,output_srt_file)
+    shorts_generator = ShortsGenerator()
+    print('Generating shorts from important scenes...')
+    selected_scenes = shorts_generator.execute(output_srt_file)
+    shorts_path_list = shorts_generator.extract_video_scenes( video_file_path, shorts_generator.extract_scenes(selected_scenes.content))
+    return_shorts_list = shorts_path_list + [""] * (3 - len(shorts_path_list))
+    return transcript_text, transalted_text, df, subtitled_video, return_shorts_list[0], return_shorts_list[1], return_shorts_list[2]
+def interface_function(file,translate_to_lang,with_transcript=False,with_translations=False,with_subtitles=False,with_shorts=False):
+    return main(file,translate_to_lang)
+supported_languages = {
+    "Spanish": "es",
+    "French": "fr",
+    "German": "de",
+    "Russian": "ru",
+    "Arabic": "ar",
+    "Hindi": "hi"
+}
+if __name__ == '__main__':
+    # Load environment variables from .env file
+    load_dotenv()
+    inputs = [gr.Video(label='Content Video'),gr.Dropdown(list(supported_languages.keys()), label="Target Language"),gr.Checkbox(label="Generate Transcript"),
+            gr.Checkbox(label="Translate Transcript"),gr.Checkbox(label="Generate Subtitles"),gr.Checkbox(label="Generate Shorts")]
+    outputs = [gr.Textbox(label="Transcript"), gr.Textbox(label="Translation"),gr.DataFrame(label="Moderation Results"),gr.Video(label='Output Video with Subtitles')]
+    short_outputs = [gr.Video(label=f"Short {i+1}") for i in range(3)]
+    outputs.extend(short_outputs)
+    demo = gr.Interface(
+    fn=interface_function,
+    inputs=inputs,
+    outputs=outputs,
+    title="Rosetta AI",
+    description="Content Creation Customization"
+)
+    # with gr.Blocks() as demo:
+    #     file_output = gr.File()
+    #     upload_button = gr.UploadButton("Click to Upload a Video", file_types=["video"], file_count="single")
+    #     upload_button.upload(main, upload_button, ['text','text'])
+    demo.launch()

moderator.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pprint import pprint
+from detoxify import Detoxify
+import pandas as pd
+class DetoxifyModerator(object):
+    def detect_toxicity(self,text):
+        results = Detoxify('original').predict(text)
+        return results
+    # def get_toxicity_report(self, toxicity_result):
+    #     for key in toxicity_result:
+    #         toxicity_result[key] = round(toxicity_result[key] * 100,2)
+    #     return toxicity_result
+    def format_results(self,results):
+        # Convert the dictionary to a pandas DataFrame
+        df = pd.DataFrame(list(results.items()), columns=["Category", "Percentage"])
+        df["Percentage"] = df["Percentage"].apply(lambda x: f"{x:.2%}")  # Format as percentage
+        return df
+if __name__ == '__main__':
+    detoxify_moderator = DetoxifyModerator()
+    result = detoxify_moderator.detect_toxicity('To let the user select the target language for translation, you can add a dropdown menu in the Gradio interface. This will allow users to choose the target language before processing the video. Here\'s how you can modify the script to include this feature')
+    report = detoxify_moderator.get_toxicity_report(result)
+    pprint(report)

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+openai
+torch
+torchvision
+torchaudio
+openai-whisper
+transformers
+sentencepiece
+sacremoses
+pydub
+moviepy
+gradio
+detoxify
+ffmpeg-python
+opencv-python
+pysrt
+python-dotenv

shorts_generator.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import pysrt
+from openai import OpenAI
+import os
+import re
+import subprocess
+class ShortsGenerator(object):
+    def read_srt(self,file_path):
+        subtitles = pysrt.open(file_path)
+        return subtitles
+    def extract_text(self,subtitles):
+        text = ''
+        for subtitle in subtitles:
+            text += subtitle.text + ' '
+        return text.strip()
+    def get_important_scenes(self,text):
+        # Load OpenAI API key
+        client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "You are a helpful videos editing assistant."},
+                {"role": "user", "content": "Identify the important scenes from the following subtitles text return that by start times and end time,videos should be at less 30s and maximum 2 min with format like this \"1. Arrival of Raymond Reddington at the FBI office - Start time: 00:00:39, End time: 00:01:17\":\n" + text}
+            ],
+            max_tokens=1500
+        )
+        # print(f" this out put : {response.choices[0].message.content}")
+        important_scenes = response.choices[0].message
+        return important_scenes
+    def execute(self,srt_file_path):
+        subtitles = self.read_srt(srt_file_path)
+        text = self.extract_text(subtitles)
+        important_scenes = self.get_important_scenes(text)
+        return important_scenes
+    def extract_scenes(self,input_text):
+        scenes = []
+        pattern = r'(?P<scene>\d+)\. (?P<description>.*?) - Start time: (?P<start>\d{2}:\d{2}:\d{2}), End time: (?P<end>\d{2}:\d{2}:\d{2})'
+        matches = re.finditer(pattern, input_text)
+        for match in matches:
+            scene_data = match.groupdict()
+            scenes.append(scene_data)
+        return scenes
+    def extract_video_scenes(self,video_file, scenes):
+        shorts_files_path_list = []
+        # Output directory
+        output_dir = "output/"
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        # Process each scene
+        for scene in scenes:
+            start_time = scene['start']
+            end_time = scene['end']
+            description = scene['description']
+            output_filename = os.path.join(output_dir, f"{description}.mp4")
+            shorts_files_path_list.append(output_filename)
+            # ffmpeg command to extract scene
+            cmd = [
+                'ffmpeg',
+                '-i', video_file,
+                '-ss', start_time,
+                '-to', end_time,
+                '-c:v', 'libx264',
+                '-c:a', 'aac',
+                '-strict', 'experimental',
+                '-b:a', '192k',
+                output_filename,
+                '-y'  # Overwrite output file if exists
+            ]
+            subprocess.run(cmd, capture_output=True)
+        return shorts_files_path_list
+if __name__ == "__main__":
+    srt_file_path = 's1.srt'
+    path_video = '1.mp4'
+    shorts_generator = ShortsGenerator()
+    important_scenes = shorts_generator.execute(srt_file_path)
+    print("Important Scenes:\n", shorts_generator.extract_scenes(important_scenes.content))
+    shorts_generator.extract_video_scenes( path_video, shorts_generator.extract_scenes(important_scenes.content))
+    print("Well Done")

subtitles.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
+import json
+class SubtitlesRenderer(object):
+    def add_subtitles(self,video_file, subtitle_file, output_file):
+        # Load subtitle data from JSON
+        with open(subtitle_file, 'r', encoding='utf-8') as f:
+            subtitles = json.load(f)
+        # Load the video
+        video = VideoFileClip(video_file)
+        # Initialize an array to store TextClips
+        text_clips_list = []
+        # Define the maximum width for the subtitles
+        max_width = video.size[0] - 40  # Adjust as needed, leaving some padding on the sides
+        # Create TextClips for each subtitle
+        for subtitle in subtitles:
+            text = subtitle['text']
+            start_time = subtitle['start']
+            end_time = subtitle['end']
+            # Create TextClip with subtitle text
+            txt_clip = TextClip(text, fontsize=28, color='white', font='Arial', method='caption',size=(max_width, None),stroke_color='black',
+            stroke_width= 0.5, bg_color='black',)
+            # Set the duration of the subtitle
+            txt_clip = txt_clip.set_duration(end_time - start_time)
+            # Position the subtitle at the bottom
+            txt_clip = txt_clip.set_position(('center', 'bottom'))
+            # Add TextClip to the array
+            text_clips_list.append(txt_clip.set_start(start_time))
+        # Composite all TextClips onto the video
+        #final_clip = video.fl(compose_text, text_clips_list)
+        # Composite all TextClips onto the video
+        final_clip = CompositeVideoClip([video] + text_clips_list)
+        # Write the result to a file
+        final_clip.write_videofile(output_file, codec='libx264', fps=video.fps, audio_codec='aac',
+                                    ffmpeg_params=["-vf", "format=yuv420p"])  # Add this for compatibility
+        return output_file
+    # def compose_text(self,frame, t, text_clips):
+    #     # Select the appropriate TextClips for the current time t
+    #     current_clips = [text_clip for text_clip in text_clips if text_clip.start < t < text_clip.end]
+    #     # Composite the selected TextClips onto the frame
+    #     for clip in current_clips:
+    #         frame = frame.blit(clip.get_frame(t - clip.start), clip.pos)
+    #     return frame
+if __name__ == '__main__':
+    video_file = 'video.mp4'
+    subtitle_file = 'segments.json'
+    output_file = 'output_video_with_subtitles.mp4'
+    renderer = SubtitlesRenderer()
+    renderer.add_subtitles(video_file, subtitle_file, output_file)

transcript_detect.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import whisper
+class WhisperModel(object):
+    def __init__(self,model_type):
+        self.model = whisper.load_model("base")
+    # Transcribe an audio file
+    def transcribe_audio(self,file_path):
+        try:
+            result = self.model.transcribe(file_path)
+            return result
+        except Exception as e:
+            print(f"Error {e}")
+            raise Exception(f'Error trnascribe audio file {e}')
+    def get_text(self,transcription):
+        return transcription['text']
+    def get_detected_language(self,transcription):
+        return transcription['language']
+    def get_segments(self,transcription):
+        text_segments = []
+        for segment in transcription['segments']:
+            text_segments.append({
+                "text": segment['text'],
+                "start": segment['start'],
+                "end": segment['end'],
+                "id": segment['id'],
+            })
+        return text_segments
+    def detect_language(self,file_path):
+        try:
+            audio = whisper.load_audio(file_path)
+            audio = whisper.pad_or_trim(audio)
+            # make log-Mel spectrogram and move to the same device as the model
+            mel = whisper.log_mel_spectrogram(audio).to(self.model.device)
+            # detect the spoken language
+            _, probs = self.model.detect_language(mel)
+            print(f"Detected language: {max(probs, key=probs.get)}")
+            return max(probs, key=probs.get)
+        except Exception as e:
+            print(f"Error {e}")
+            raise Exception(f'Error detecting language {e}')

translation.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from transformers import MarianMTModel, MarianTokenizer
+class TranslationModel(object):
+    def __init__(self):
+        pass
+    def translate_chunk(self,chunk, src_lang, tgt_lang):
+        try:
+            model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
+            model = MarianMTModel.from_pretrained(model_name)
+            tokenizer = MarianTokenizer.from_pretrained(model_name)
+            inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
+            translated_tokens = model.generate(**inputs)
+            translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
+            return translated_text
+        except Exception as e:
+            print(e)
+            raise Exception(f"Error translating text {e}")
+    def translate_text(self,text, src_lang, tgt_lang):
+        max_length = 512
+        chunks = self.split_text(text, max_length)
+        translated_chunks = [self.translate_chunk(chunk, src_lang, tgt_lang) for chunk in chunks]
+        return ' '.join(translated_chunks)
+    def split_text(self,text, max_length):
+        # Split text into sentences
+        sentences = text.split('. ')
+        chunks = []
+        current_chunk = ""
+        for sentence in sentences:
+            if len(current_chunk) + len(sentence) + 1 > max_length:
+                chunks.append(current_chunk.strip())
+                current_chunk = sentence + ". "
+            else:
+                current_chunk += sentence + ". "
+        if current_chunk:
+            chunks.append(current_chunk.strip())
+        return chunks