adeeb-khoja commited on
Commit
87e8f23
·
verified ·
1 Parent(s): 077cdfa

Upload 8 files

Browse files
Files changed (8) hide show
  1. extract_audio.py +18 -0
  2. main.py +156 -0
  3. moderator.py +29 -0
  4. requirements.txt +16 -0
  5. shorts_generator.py +105 -0
  6. subtitles.py +67 -0
  7. transcript_detect.py +47 -0
  8. translation.py +48 -0
extract_audio.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import VideoFileClip
2
+
3
+
4
+ class VideoHelper(object):
5
+ def extract_audio(self,video_path, audio_path):
6
+ # Load the video file
7
+ video = VideoFileClip(video_path)
8
+
9
+ # Extract the audio
10
+ audio = video.audio
11
+
12
+ # Write the audio to a file
13
+ audio.write_audiofile(audio_path)
14
+
15
+ # Close the video clip
16
+ video.close()
17
+
18
+
main.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json
3
+
4
+ from altair import value
5
+ from matplotlib.streamplot import OutOfBounds
6
+ from sympy import substitution, viete
7
+ from extract_audio import VideoHelper
8
+ from helpers.srt_generator import SRTGenerator
9
+ from moderator import DetoxifyModerator
10
+ from shorts_generator import ShortsGenerator
11
+ from subtitles import SubtitlesRenderer
12
+ from transcript_detect import *
13
+ from translation import *
14
+ import gradio as gr
15
+ from dotenv import load_dotenv
16
+
17
+
18
+
19
+ def translate_segments(segments,translator: TranslationModel,from_lang,to_lang):
20
+ transalted_segments = []
21
+ for segment in segments:
22
+ translated_segment_text = translator.translate_text(segment['text'],from_lang,to_lang)
23
+ transalted_segments.append({'text':translated_segment_text,'start':segment['start'],'end':segment['end'],'id':segment['id']})
24
+
25
+ return transalted_segments
26
+
27
+
28
+ def main(file,translate_to_lang):
29
+
30
+ #Extracting the audio from video
31
+ video_file_path = file
32
+ audio_file_path = 'extracted_audio.mp3'
33
+ video_helper = VideoHelper()
34
+ print('Extracting audio from video...')
35
+ video_helper.extract_audio(video_file_path, audio_file_path)
36
+
37
+
38
+ whisper_model = WhisperModel('base')
39
+
40
+ print('Transcriping audio file....')
41
+ transcription = whisper_model.transcribe_audio(audio_file_path)
42
+
43
+ print('Generating transctipt text...')
44
+ transcript_text = whisper_model.get_text(transcription)
45
+
46
+ print('Detecting audio language....')
47
+ detected_language = whisper_model.get_detected_language(transcription)
48
+
49
+ print('Generating transcript segments...')
50
+ transcript_segments = whisper_model.get_segments(transcription)
51
+
52
+
53
+ # Write the transcription to a text file
54
+ print('Writing transcript into text file...')
55
+ transcript_file_path = "transcript.txt"
56
+ with open(transcript_file_path, "w",encoding="utf-8") as file:
57
+ file.write(transcript_text)
58
+
59
+ # Translate transcript
60
+ translation_model = TranslationModel()
61
+ target_language = supported_languages[translate_to_lang]
62
+
63
+ print(f'Translating transcript text from {detected_language} to {target_language}...')
64
+ transalted_text = translation_model.translate_text(transcript_text,detected_language,target_language)
65
+
66
+ # print(f'Translating transcript segments from {detected_language} to {target_language}...')
67
+ # transalted_segments = translate_segments(transcript_segments,translation_model,detected_language,target_language)
68
+
69
+ # Write the translation to a text file
70
+ print('Writing translation text file...')
71
+ translation_file_path = "translation.txt"
72
+ with open(translation_file_path, "w",encoding="utf-8") as file:
73
+ file.write(transalted_text)
74
+
75
+ print('Writing transcsript segments and translated segments to json file...')
76
+ segments_file_path = "segments.json"
77
+ with open(segments_file_path, "w",encoding="utf-8") as file:
78
+ json.dump(transcript_segments, file,ensure_ascii=False)
79
+
80
+ # print('Writing transcsript segments and translated segments to json file...')
81
+ # translated_segments_file_path = "translated_segments.json"
82
+ # with open(translated_segments_file_path, "w",encoding="utf-8") as file:
83
+ # json.dump(transalted_segments, file,ensure_ascii=False)
84
+
85
+ #Run Moderator to detect toxicity
86
+ print('Analyzing and detecing toxicity levels...')
87
+ detoxify_moderator = DetoxifyModerator()
88
+ result = detoxify_moderator.detect_toxicity(transcript_text)
89
+ df = detoxify_moderator.format_results(result)
90
+
91
+
92
+ #Render subtitles on video
93
+ renderer = SubtitlesRenderer()
94
+ subtitles_file_path = 'segments.json'
95
+ output_file_path = 'subtitled_video.mp4'
96
+ subtitled_video = renderer.add_subtitles(video_file_path,subtitles_file_path,output_file_path)
97
+
98
+
99
+
100
+ # Generate short videos from video
101
+ output_srt_file = 'subtitles.srt'
102
+ print('Generating SRT file...')
103
+ #Generate srt file
104
+ SRTGenerator.generate_srt(transcript_segments,output_srt_file)
105
+ shorts_generator = ShortsGenerator()
106
+ print('Generating shorts from important scenes...')
107
+ selected_scenes = shorts_generator.execute(output_srt_file)
108
+ shorts_path_list = shorts_generator.extract_video_scenes( video_file_path, shorts_generator.extract_scenes(selected_scenes.content))
109
+
110
+ return_shorts_list = shorts_path_list + [""] * (3 - len(shorts_path_list))
111
+
112
+
113
+ return transcript_text, transalted_text, df, subtitled_video, return_shorts_list[0], return_shorts_list[1], return_shorts_list[2]
114
+
115
+
116
+
117
+ def interface_function(file,translate_to_lang,with_transcript=False,with_translations=False,with_subtitles=False,with_shorts=False):
118
+
119
+ return main(file,translate_to_lang)
120
+
121
+ supported_languages = {
122
+ "Spanish": "es",
123
+ "French": "fr",
124
+ "German": "de",
125
+ "Russian": "ru",
126
+ "Arabic": "ar",
127
+ "Hindi": "hi"
128
+ }
129
+
130
+ if __name__ == '__main__':
131
+ # Load environment variables from .env file
132
+ load_dotenv()
133
+
134
+
135
+ inputs = [gr.Video(label='Content Video'),gr.Dropdown(list(supported_languages.keys()), label="Target Language"),gr.Checkbox(label="Generate Transcript"),
136
+ gr.Checkbox(label="Translate Transcript"),gr.Checkbox(label="Generate Subtitles"),gr.Checkbox(label="Generate Shorts")]
137
+
138
+ outputs = [gr.Textbox(label="Transcript"), gr.Textbox(label="Translation"),gr.DataFrame(label="Moderation Results"),gr.Video(label='Output Video with Subtitles')]
139
+ short_outputs = [gr.Video(label=f"Short {i+1}") for i in range(3)]
140
+ outputs.extend(short_outputs)
141
+ demo = gr.Interface(
142
+ fn=interface_function,
143
+ inputs=inputs,
144
+ outputs=outputs,
145
+ title="Rosetta AI",
146
+ description="Content Creation Customization"
147
+ )
148
+ # with gr.Blocks() as demo:
149
+ # file_output = gr.File()
150
+ # upload_button = gr.UploadButton("Click to Upload a Video", file_types=["video"], file_count="single")
151
+ # upload_button.upload(main, upload_button, ['text','text'])
152
+
153
+
154
+
155
+ demo.launch()
156
+
moderator.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pprint import pprint
2
+ from detoxify import Detoxify
3
+ import pandas as pd
4
+
5
+ class DetoxifyModerator(object):
6
+
7
+ def detect_toxicity(self,text):
8
+ results = Detoxify('original').predict(text)
9
+ return results
10
+
11
+ # def get_toxicity_report(self, toxicity_result):
12
+ # for key in toxicity_result:
13
+ # toxicity_result[key] = round(toxicity_result[key] * 100,2)
14
+
15
+ # return toxicity_result
16
+
17
+ def format_results(self,results):
18
+ # Convert the dictionary to a pandas DataFrame
19
+ df = pd.DataFrame(list(results.items()), columns=["Category", "Percentage"])
20
+ df["Percentage"] = df["Percentage"].apply(lambda x: f"{x:.2%}") # Format as percentage
21
+ return df
22
+
23
+ if __name__ == '__main__':
24
+ detoxify_moderator = DetoxifyModerator()
25
+ result = detoxify_moderator.detect_toxicity('To let the user select the target language for translation, you can add a dropdown menu in the Gradio interface. This will allow users to choose the target language before processing the video. Here\'s how you can modify the script to include this feature')
26
+ report = detoxify_moderator.get_toxicity_report(result)
27
+ pprint(report)
28
+
29
+
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ openai-whisper
6
+ transformers
7
+ sentencepiece
8
+ sacremoses
9
+ pydub
10
+ moviepy
11
+ gradio
12
+ detoxify
13
+ ffmpeg-python
14
+ opencv-python
15
+ pysrt
16
+ python-dotenv
shorts_generator.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pysrt
3
+ from openai import OpenAI
4
+ import os
5
+ import re
6
+ import subprocess
7
+
8
+
9
+
10
+
11
+
12
+ class ShortsGenerator(object):
13
+
14
+ def read_srt(self,file_path):
15
+ subtitles = pysrt.open(file_path)
16
+ return subtitles
17
+
18
+ def extract_text(self,subtitles):
19
+ text = ''
20
+ for subtitle in subtitles:
21
+ text += subtitle.text + ' '
22
+ return text.strip()
23
+
24
+ def get_important_scenes(self,text):
25
+ # Load OpenAI API key
26
+ client = OpenAI(api_key=os.getenv('OPEN_AI_API_KEY'))
27
+ response = client.chat.completions.create(
28
+ model="gpt-3.5-turbo",
29
+ messages=[
30
+ {"role": "system", "content": "You are a helpful videos editing assistant."},
31
+ {"role": "user", "content": "Identify the important scenes from the following subtitles text return that by start times and end time,videos should be at less 30s and maximum 2 min with format like this \"1. Arrival of Raymond Reddington at the FBI office - Start time: 00:00:39, End time: 00:01:17\":\n" + text}
32
+ ],
33
+ max_tokens=1500
34
+ )
35
+ # print(f" this out put : {response.choices[0].message.content}")
36
+ important_scenes = response.choices[0].message
37
+ return important_scenes
38
+
39
+ def execute(self,srt_file_path):
40
+ subtitles = self.read_srt(srt_file_path)
41
+ text = self.extract_text(subtitles)
42
+ important_scenes = self.get_important_scenes(text)
43
+ return important_scenes
44
+
45
+
46
+ def extract_scenes(self,input_text):
47
+ scenes = []
48
+
49
+ pattern = r'(?P<scene>\d+)\. (?P<description>.*?) - Start time: (?P<start>\d{2}:\d{2}:\d{2}), End time: (?P<end>\d{2}:\d{2}:\d{2})'
50
+
51
+ matches = re.finditer(pattern, input_text)
52
+ for match in matches:
53
+ scene_data = match.groupdict()
54
+ scenes.append(scene_data)
55
+
56
+ return scenes
57
+
58
+
59
+ def extract_video_scenes(self,video_file, scenes):
60
+
61
+ shorts_files_path_list = []
62
+
63
+ # Output directory
64
+ output_dir = "output/"
65
+
66
+ # Ensure output directory exists
67
+ os.makedirs(output_dir, exist_ok=True)
68
+
69
+ # Process each scene
70
+ for scene in scenes:
71
+ start_time = scene['start']
72
+ end_time = scene['end']
73
+ description = scene['description']
74
+ output_filename = os.path.join(output_dir, f"{description}.mp4")
75
+ shorts_files_path_list.append(output_filename)
76
+
77
+ # ffmpeg command to extract scene
78
+ cmd = [
79
+ 'ffmpeg',
80
+ '-i', video_file,
81
+ '-ss', start_time,
82
+ '-to', end_time,
83
+ '-c:v', 'libx264',
84
+ '-c:a', 'aac',
85
+ '-strict', 'experimental',
86
+ '-b:a', '192k',
87
+ output_filename,
88
+ '-y' # Overwrite output file if exists
89
+ ]
90
+
91
+ subprocess.run(cmd, capture_output=True)
92
+
93
+ return shorts_files_path_list
94
+
95
+
96
+
97
+
98
+ if __name__ == "__main__":
99
+ srt_file_path = 's1.srt'
100
+ path_video = '1.mp4'
101
+ shorts_generator = ShortsGenerator()
102
+ important_scenes = shorts_generator.execute(srt_file_path)
103
+ print("Important Scenes:\n", shorts_generator.extract_scenes(important_scenes.content))
104
+ shorts_generator.extract_video_scenes( path_video, shorts_generator.extract_scenes(important_scenes.content))
105
+ print("Well Done")
subtitles.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
2
+ import json
3
+
4
+
5
+ class SubtitlesRenderer(object):
6
+
7
+ def add_subtitles(self,video_file, subtitle_file, output_file):
8
+ # Load subtitle data from JSON
9
+ with open(subtitle_file, 'r', encoding='utf-8') as f:
10
+ subtitles = json.load(f)
11
+
12
+ # Load the video
13
+ video = VideoFileClip(video_file)
14
+
15
+ # Initialize an array to store TextClips
16
+ text_clips_list = []
17
+
18
+ # Define the maximum width for the subtitles
19
+ max_width = video.size[0] - 40 # Adjust as needed, leaving some padding on the sides
20
+
21
+
22
+ # Create TextClips for each subtitle
23
+ for subtitle in subtitles:
24
+ text = subtitle['text']
25
+ start_time = subtitle['start']
26
+ end_time = subtitle['end']
27
+
28
+ # Create TextClip with subtitle text
29
+ txt_clip = TextClip(text, fontsize=28, color='white', font='Arial', method='caption',size=(max_width, None),stroke_color='black',
30
+ stroke_width= 0.5, bg_color='black',)
31
+
32
+ # Set the duration of the subtitle
33
+ txt_clip = txt_clip.set_duration(end_time - start_time)
34
+
35
+ # Position the subtitle at the bottom
36
+ txt_clip = txt_clip.set_position(('center', 'bottom'))
37
+
38
+ # Add TextClip to the array
39
+ text_clips_list.append(txt_clip.set_start(start_time))
40
+
41
+ # Composite all TextClips onto the video
42
+ #final_clip = video.fl(compose_text, text_clips_list)
43
+ # Composite all TextClips onto the video
44
+ final_clip = CompositeVideoClip([video] + text_clips_list)
45
+
46
+ # Write the result to a file
47
+ final_clip.write_videofile(output_file, codec='libx264', fps=video.fps, audio_codec='aac',
48
+ ffmpeg_params=["-vf", "format=yuv420p"]) # Add this for compatibility
49
+
50
+ return output_file
51
+ # def compose_text(self,frame, t, text_clips):
52
+ # # Select the appropriate TextClips for the current time t
53
+ # current_clips = [text_clip for text_clip in text_clips if text_clip.start < t < text_clip.end]
54
+
55
+ # # Composite the selected TextClips onto the frame
56
+ # for clip in current_clips:
57
+ # frame = frame.blit(clip.get_frame(t - clip.start), clip.pos)
58
+ # return frame
59
+
60
+ if __name__ == '__main__':
61
+ video_file = 'video.mp4'
62
+ subtitle_file = 'segments.json'
63
+ output_file = 'output_video_with_subtitles.mp4'
64
+
65
+ renderer = SubtitlesRenderer()
66
+ renderer.add_subtitles(video_file, subtitle_file, output_file)
67
+
transcript_detect.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+
3
+ class WhisperModel(object):
4
+
5
+ def __init__(self,model_type):
6
+ self.model = whisper.load_model("base")
7
+ # Transcribe an audio file
8
+ def transcribe_audio(self,file_path):
9
+ try:
10
+ result = self.model.transcribe(file_path)
11
+ return result
12
+ except Exception as e:
13
+ print(f"Error {e}")
14
+ raise Exception(f'Error trnascribe audio file {e}')
15
+
16
+ def get_text(self,transcription):
17
+ return transcription['text']
18
+
19
+ def get_detected_language(self,transcription):
20
+ return transcription['language']
21
+
22
+ def get_segments(self,transcription):
23
+ text_segments = []
24
+ for segment in transcription['segments']:
25
+ text_segments.append({
26
+ "text": segment['text'],
27
+ "start": segment['start'],
28
+ "end": segment['end'],
29
+ "id": segment['id'],
30
+ })
31
+ return text_segments
32
+
33
+ def detect_language(self,file_path):
34
+ try:
35
+ audio = whisper.load_audio(file_path)
36
+ audio = whisper.pad_or_trim(audio)
37
+ # make log-Mel spectrogram and move to the same device as the model
38
+ mel = whisper.log_mel_spectrogram(audio).to(self.model.device)
39
+ # detect the spoken language
40
+ _, probs = self.model.detect_language(mel)
41
+ print(f"Detected language: {max(probs, key=probs.get)}")
42
+ return max(probs, key=probs.get)
43
+ except Exception as e:
44
+ print(f"Error {e}")
45
+ raise Exception(f'Error detecting language {e}')
46
+
47
+
translation.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MarianMTModel, MarianTokenizer
2
+
3
+ class TranslationModel(object):
4
+ def __init__(self):
5
+ pass
6
+
7
+ def translate_chunk(self,chunk, src_lang, tgt_lang):
8
+ try:
9
+
10
+ model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
11
+ model = MarianMTModel.from_pretrained(model_name)
12
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
13
+
14
+ inputs = tokenizer(chunk, return_tensors="pt", padding=True, truncation=True, max_length=512)
15
+ translated_tokens = model.generate(**inputs)
16
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
17
+
18
+ return translated_text
19
+
20
+ except Exception as e:
21
+ print(e)
22
+ raise Exception(f"Error translating text {e}")
23
+
24
+ def translate_text(self,text, src_lang, tgt_lang):
25
+ max_length = 512
26
+ chunks = self.split_text(text, max_length)
27
+ translated_chunks = [self.translate_chunk(chunk, src_lang, tgt_lang) for chunk in chunks]
28
+ return ' '.join(translated_chunks)
29
+
30
+
31
+
32
+ def split_text(self,text, max_length):
33
+ # Split text into sentences
34
+ sentences = text.split('. ')
35
+ chunks = []
36
+ current_chunk = ""
37
+
38
+ for sentence in sentences:
39
+ if len(current_chunk) + len(sentence) + 1 > max_length:
40
+ chunks.append(current_chunk.strip())
41
+ current_chunk = sentence + ". "
42
+ else:
43
+ current_chunk += sentence + ". "
44
+
45
+ if current_chunk:
46
+ chunks.append(current_chunk.strip())
47
+
48
+ return chunks