Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| import moviepy.editor as mpe | |
| import mimetypes | |
| from pydub import AudioSegment | |
| import google.generativeai as genai | |
| from PIL import Image, ImageColor | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ImageMagick policy fix | |
| mpe.TextClip.imagemagick_binary = "/usr/bin/convert" | |
| os.system( | |
| 'sed -i \'s/rights="none"/rights="read|write"/g\' ' | |
| '/etc/ImageMagick-6/policy.xml' | |
| ) | |
| # API configuration | |
| GEMINI_API_KEY = os.getenv("gemini") | |
| MODEL_NAME = os.getenv("mod") | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| # Available fonts | |
| AVAILABLE_FONTS = { | |
| "DejaVuSans-Bold": "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", | |
| "DejaVuSans": "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", | |
| "LiberationSerif-Bold": "/usr/share/fonts/truetype/liberation/LiberationSerif-Bold.ttf", | |
| "BadScript-Regular": "fonts/Bad_Script/BadScript-Regular.ttf", | |
| "Gidole-Regular": "fonts/Gidole/Gidole-Regular.ttf", | |
| "GreatVibes-Regular": "fonts/Great_Vibes/GreatVibes-Regular.ttf", | |
| "OpenSans-Variable": "fonts/Open_Sans/OpenSans-VariableFont_wdth,wght.ttf", | |
| "OpenSans-Italic-Variable": "fonts/Open_Sans/OpenSans-Italic-VariableFont_wdth,wght.ttf", | |
| "Roboto-Variable": "fonts/Roboto/Roboto-VariableFont_wdth,wght.ttf", | |
| "Roboto-Italic-Variable": "fonts/Roboto/Roboto-Italic-VariableFont_wdth,wght.ttf", | |
| "SourceCodePro-Variable": "fonts/Source_Code_Pro/SourceCodePro-VariableFont_wght.ttf", | |
| "SourceCodePro-Italic-Variable": "fonts/Source_Code_Pro/SourceCodePro-Italic-VariableFont_wght.ttf", | |
| "Tektur-Variable": "fonts/Tektur/Tektur-VariableFont_wdth,wght.ttf", | |
| "Ponomar-Regular": "fonts/Ponomar/Ponomar-Regular.ttf", | |
| } | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Utility: CSS color to RGB tuple | |
| def hex_to_rgb(color_str): | |
| try: | |
| return ImageColor.getrgb(color_str) | |
| except ValueError: | |
| pass | |
| c = color_str.lstrip('#') | |
| return tuple(int(c[i:i+2], 16) for i in (0, 2, 4)) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Transcription & SRT helpers | |
| def transcribe_audio(audio_file): | |
| mime_type, _ = mimetypes.guess_type(audio_file) | |
| if not mime_type: | |
| return "ΠΠ΅ΠΌΠ°Π³ΡΡΠΌΠ° Π²ΡΠ·Π½Π°ΡΡΡΡ ΡΡΠΏ ΡΠ°ΠΉΠ»Π°." | |
| with open(audio_file, "rb") as f: | |
| audio_data = f.read() | |
| prompt_text = os.getenv("p") | |
| model = genai.GenerativeModel(MODEL_NAME) | |
| resp = model.generate_content([prompt_text, {"mime_type": mime_type, "data": audio_data}]) | |
| return resp.text.strip() or "ΠΠ΅ Π°ΡΡΡΠΌΠ°Π»Π°ΡΡ ΡΡΠ°Π½ΡΠΊΡΡΠ±Π°Π²Π°ΡΡ." | |
| def fix_subtitles_format(transcript): | |
| prompt = f"ΠΠ΅ Π·ΠΌΡΠ½ΡΠΉ ΡΡΠΊΡΡΡ, Π²ΡΠΏΡΠ°Π²Ρ ΡΠΎΠ»ΡΠΊΡ ΡΠ°ΡΠΌΠ°Ρ HH:MM:SS,mmm:\n{transcript}" | |
| model = genai.GenerativeModel(MODEL_NAME) | |
| resp = model.generate_content(prompt) | |
| return resp.text.strip() or transcript | |
| def create_srt(transcript): | |
| path = "subtitles.srt" | |
| with open(path, "w", encoding="utf-8") as f: | |
| f.write(transcript) | |
| return transcript, path | |
| def extract_audio_from_video(video_path): | |
| try: | |
| audio = AudioSegment.from_file(video_path) | |
| out = "extracted_audio.mp3" | |
| audio.export(out, format="mp3") | |
| return out, None | |
| except Exception as e: | |
| return None, str(e) | |
| def process_video(video_path): | |
| audio_path, err = extract_audio_from_video(video_path) | |
| if err: | |
| return err, None | |
| transcript = transcribe_audio(audio_path) | |
| fixed = fix_subtitles_format(transcript) | |
| return create_srt(fixed) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # SRT parsing | |
| def srt_time_to_sec(time_str): | |
| t = time_str.strip().replace('.', ',') | |
| if ',' not in t: | |
| t += ',000' | |
| h, m, s_ms = t.split(':') | |
| s, ms = s_ms.split(',') | |
| return int(h)*3600 + int(m)*60 + int(s) + int(ms)/1000 | |
| def parse_srt_from_text(srt_text): | |
| subs = [] | |
| for block in srt_text.strip().split("\n\n"): | |
| lines = block.splitlines() | |
| if len(lines) < 3: | |
| continue | |
| times = lines[1] | |
| try: | |
| start_str, end_str = times.split('-->') | |
| start = srt_time_to_sec(start_str) | |
| end = srt_time_to_sec(end_str) | |
| text = ' '.join(lines[2:]) | |
| subs.append({'start': start, 'end': end, 'text': text}) | |
| except: | |
| continue | |
| return subs | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Animated TextClip creator | |
| def create_animated_text_clip( | |
| text, duration, font, fontsize, color, | |
| stroke_color, stroke_width, | |
| position_type, custom_x, custom_y, | |
| animation, video_width, video_height, | |
| bg_color=None, bg_opacity=1.0 | |
| ): | |
| txt = mpe.TextClip( | |
| text, font=font, fontsize=fontsize, | |
| color=color, stroke_color=stroke_color, | |
| stroke_width=stroke_width | |
| ) | |
| if bg_color: | |
| bg_col = hex_to_rgb(bg_color) | |
| pad_x, pad_y = 10, 5 | |
| txt = txt.on_color( | |
| size=(txt.w + 2*pad_x, txt.h + 2*pad_y), | |
| color=bg_col, pos=(pad_x, pad_y), | |
| col_opacity=bg_opacity | |
| ) | |
| txt = txt.set_duration(duration) | |
| # Position | |
| if position_type == 'bottom': | |
| x = (video_width - txt.w) / 2 | |
| y = video_height - txt.h - 20 - fontsize | |
| elif position_type == 'top': | |
| x = (video_width - txt.w) / 2 | |
| y = 20 + fontsize | |
| elif position_type == 'center': | |
| x = (video_width - txt.w) / 2 | |
| y = (video_height - txt.h) / 2 | |
| else: | |
| x = custom_x - txt.w / 2 | |
| y = custom_y | |
| # Animation | |
| anim = animation.lower() | |
| if anim == 'fade': | |
| fd = min(0.5, duration/2) | |
| txt = txt.fadein(fd).fadeout(fd) | |
| elif anim == 'slide': | |
| fd = min(0.5, duration/2) | |
| txt = txt.set_position(lambda t: ( | |
| -txt.w + (x+txt.w)*min(t/fd,1), y | |
| )) | |
| elif anim == 'zoom': | |
| txt = txt.resize(lambda t: 0.5 + 0.5*min(t/duration,1)) | |
| return txt.set_position((x, y)) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Extract first frame for preview | |
| def extract_first_frame(video_path): | |
| try: | |
| clip = mpe.VideoFileClip(video_path) | |
| frame = clip.get_frame(0) | |
| clip.close() | |
| return frame | |
| except: | |
| return None | |
| # Single-frame preview video (fixed 'bgo' β 'bg_opacity') | |
| def create_single_frame_video( | |
| frame, text, font, fontsize, color, | |
| stroke_color, stroke_width, position_type, | |
| custom_x, custom_y, animation, | |
| bg_color=None, bg_opacity=1.0 | |
| ): | |
| if frame is None: | |
| return None | |
| base = mpe.ImageClip(frame).set_duration(1.0) | |
| h, w, _ = frame.shape | |
| txt = create_animated_text_clip( | |
| text, 1.0, font, fontsize, color, | |
| stroke_color, stroke_width, | |
| position_type, custom_x, custom_y, | |
| animation, w, h, | |
| bg_color=bg_color, bg_opacity=bg_opacity | |
| ) | |
| final_clip = mpe.CompositeVideoClip([base, txt]) | |
| path = 'preview_video.mp4' | |
| final_clip.write_videofile( | |
| path, fps=24, codec='libx264', | |
| audio=False, verbose=False, logger=None | |
| ) | |
| return path | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Apply subtitles to full video | |
| def apply_subtitles( | |
| video_path, srt_text, font_key, fontsize, color, | |
| stroke_color, stroke_width, position_type, | |
| custom_x, custom_y, animation, quality, | |
| bg_color=None, bg_opacity=1.0 | |
| ): | |
| font_path = AVAILABLE_FONTS.get(font_key, font_key) | |
| srt_path = "temp_subtitles.srt" | |
| with open(srt_path, "w", encoding="utf-8") as f: | |
| f.write(srt_text) | |
| def generator(txt): | |
| return mpe.TextClip( | |
| txt, font=font_path, fontsize=fontsize, | |
| color=color, stroke_color=stroke_color, | |
| stroke_width=stroke_width | |
| ) | |
| subs = mpe.SubtitlesClip(srt_path, generator).set_position(('center','bottom')) | |
| clip = mpe.VideoFileClip(video_path) | |
| final = mpe.CompositeVideoClip([clip, subs]) | |
| output_path = "video_with_subs.mp4" | |
| final.write_videofile( | |
| output_path, | |
| codec='libx264', audio_codec='aac', | |
| preset=quality | |
| ) | |
| return output_path | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown('# ΠΡΡΠ°ΠΌΠ°ΡΡΡΠ½Π°Π΅ ΡΡΠ²Π°ΡΡΠ½Π½Π΅ Ρ Π½Π°ΠΊΠ»Π°Π΄Π°Π½Π½Π΅ ΡΡΠ±ΡΡΡΡΠ°Ρ') | |
| # Video upload & preview | |
| video_input = gr.Video(label='ΠΠ°Π³ΡΡΠ·ΡΡΠ΅ Π²ΡΠ΄ΡΠ°') | |
| first_frame = gr.State() | |
| preview_output = gr.Video(label="ΠΡΡΠ²'Ρ", visible=False) | |
| def on_video_load(video_path): | |
| if not video_path: | |
| return None, gr.update(visible=True), gr.update(visible=False) | |
| frame = extract_first_frame(video_path) | |
| return frame, gr.update(visible=False), gr.update(visible=True) | |
| video_input.change( | |
| on_video_load, | |
| inputs=video_input, | |
| outputs=[first_frame, video_input, preview_output] | |
| ) | |
| # Transcribe button | |
| gr.Markdown('## Π‘ΡΠ²Π°ΡΡΡΡ ΡΡΠ±ΡΡΡΡΡ Π· Π°ΡΠ΄ΡΡ') | |
| subs_btn = gr.Button('Π‘ΡΠ²Π°ΡΡΡΡ ΡΡΠ±ΡΡΡΡΡ') | |
| subs_box = gr.Textbox(label='Π’ΡΠΊΡΡ ΡΡΠ±ΡΡΡΡΠ°Ρ', lines=10) | |
| srt_file = gr.File(label='Π€Π°ΠΉΠ» SRT') | |
| subs_btn.click( | |
| process_video, | |
| inputs=video_input, | |
| outputs=[subs_box, srt_file] | |
| ) | |
| # Styling & applying subtitles | |
| gr.Markdown('## ΠΠ°Π»Π°Π΄ΠΊΠ° ΡΡΡΠ»Ρ Ρ Π½Π°ΠΊΠ»Π°Π΄Π°Π½Π½Π΅ ΡΡΠ±ΡΡΡΡΠ°Ρ') | |
| font_dd = gr.Dropdown(list(AVAILABLE_FONTS.keys()), | |
| value='DejaVuSans-Bold', label='Π¨ΡΡΡΡ') | |
| size_sl = gr.Slider(10,100,1,value=40, label='ΠΠ°ΠΌΠ΅Ρ ΡΡΡΡΡΠ°') | |
| color_txt= gr.ColorPicker(value='#FFFFFF', label='ΠΠΎΠ»Π΅Ρ ΡΡΠΊΡΡΡ') | |
| color_st = gr.ColorPicker(value='#000000', label='ΠΠΎΠ»Π΅Ρ Π°Π±Π²ΠΎΠ΄ΠΊΡ') | |
| width_sl = gr.Slider(0,10,1, value=2, label='Π’Π°ΡΡΡΡΠ½Ρ Π°Π±Π²ΠΎΠ΄ΠΊΡ') | |
| pos_dd = gr.Dropdown(['bottom','top','center','custom'], | |
| value='bottom', label='ΠΠ°Π·ΡΡΡΡ') | |
| anim_dd = gr.Dropdown(['None','fade','slide','zoom'], | |
| value='None', label='ΠΠ½ΡΠΌΠ°ΡΡΡ') | |
| x_sl = gr.Slider(0,1920,1,value=100, label='X') | |
| y_sl = gr.Slider(0,1080,1,value=100, label='Y') | |
| bg_color = gr.ColorPicker(value='#000000', label='ΠΠΎΠ»Π΅Ρ ΡΠΎΠ½Ρ') | |
| bg_opac = gr.Slider(0.0,1.0,0.1,value=0.5, label='ΠΡΠ°Π·ΡΡΡΡΠ°ΡΡΡ ΡΠΎΠ½Ρ') | |
| exp_dd = gr.Dropdown(['low','medium','high','veryslow'], | |
| value='medium', label='Π―ΠΊΠ°ΡΡΡ ΠΊΠ°Π΄Π·ΡΡΠΎΡΠΊΡ') | |
| out_vid = gr.Video(label='ΠΡΠ½ΡΠΊ') | |
| apply_btn = gr.Button('ΠΠ°ΠΊΠ»Π°ΡΡΡ ΡΡΠ±ΡΡΡΡΡ') | |
| apply_btn.click( | |
| apply_subtitles, | |
| inputs=[ | |
| video_input, subs_box, font_dd, size_sl, | |
| color_txt, color_st, width_sl, pos_dd, | |
| x_sl, y_sl, anim_dd, exp_dd, | |
| bg_color, bg_opac | |
| ], | |
| outputs=out_vid | |
| ) | |
| demo.launch(debug=True) | |