CaptionBE / app_new.py
archivartaunik's picture
Rename app.py to app_new.py
8ae53c8 verified
import os
import gradio as gr
import moviepy.editor as mpe
import mimetypes
from pydub import AudioSegment
import google.generativeai as genai
from PIL import Image, ImageColor
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# ImageMagick policy fix
mpe.TextClip.imagemagick_binary = "/usr/bin/convert"
os.system(
'sed -i \'s/rights="none"/rights="read|write"/g\' '
'/etc/ImageMagick-6/policy.xml'
)
# API configuration
GEMINI_API_KEY = os.getenv("gemini")
MODEL_NAME = os.getenv("mod")
genai.configure(api_key=GEMINI_API_KEY)
# Available fonts
AVAILABLE_FONTS = {
"DejaVuSans-Bold": "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
"DejaVuSans": "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
"LiberationSerif-Bold": "/usr/share/fonts/truetype/liberation/LiberationSerif-Bold.ttf",
"BadScript-Regular": "fonts/Bad_Script/BadScript-Regular.ttf",
"Gidole-Regular": "fonts/Gidole/Gidole-Regular.ttf",
"GreatVibes-Regular": "fonts/Great_Vibes/GreatVibes-Regular.ttf",
"OpenSans-Variable": "fonts/Open_Sans/OpenSans-VariableFont_wdth,wght.ttf",
"OpenSans-Italic-Variable": "fonts/Open_Sans/OpenSans-Italic-VariableFont_wdth,wght.ttf",
"Roboto-Variable": "fonts/Roboto/Roboto-VariableFont_wdth,wght.ttf",
"Roboto-Italic-Variable": "fonts/Roboto/Roboto-Italic-VariableFont_wdth,wght.ttf",
"SourceCodePro-Variable": "fonts/Source_Code_Pro/SourceCodePro-VariableFont_wght.ttf",
"SourceCodePro-Italic-Variable": "fonts/Source_Code_Pro/SourceCodePro-Italic-VariableFont_wght.ttf",
"Tektur-Variable": "fonts/Tektur/Tektur-VariableFont_wdth,wght.ttf",
"Ponomar-Regular": "fonts/Ponomar/Ponomar-Regular.ttf",
}
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# Utility: CSS color to RGB tuple
def hex_to_rgb(color_str):
try:
return ImageColor.getrgb(color_str)
except ValueError:
pass
c = color_str.lstrip('#')
return tuple(int(c[i:i+2], 16) for i in (0, 2, 4))
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# Transcription & SRT helpers
def transcribe_audio(audio_file):
mime_type, _ = mimetypes.guess_type(audio_file)
if not mime_type:
return "НСмагчыма Π²Ρ‹Π·Π½Π°Ρ‡Ρ‹Ρ†ΡŒ Ρ‚Ρ‹ΠΏ Ρ„Π°ΠΉΠ»Π°."
with open(audio_file, "rb") as f:
audio_data = f.read()
prompt_text = os.getenv("p")
model = genai.GenerativeModel(MODEL_NAME)
resp = model.generate_content([prompt_text, {"mime_type": mime_type, "data": audio_data}])
return resp.text.strip() or "НС атрымалася Ρ‚Ρ€Π°Π½ΡΠΊΡ€Ρ‹Π±Π°Π²Π°Ρ†ΡŒ."
def fix_subtitles_format(transcript):
prompt = f"НС змяняй тэксты, Π²Ρ‹ΠΏΡ€Π°Π²Ρ– Ρ‚ΠΎΠ»ΡŒΠΊΡ– Ρ„Π°Ρ€ΠΌΠ°Ρ‚ HH:MM:SS,mmm:\n{transcript}"
model = genai.GenerativeModel(MODEL_NAME)
resp = model.generate_content(prompt)
return resp.text.strip() or transcript
def create_srt(transcript):
path = "subtitles.srt"
with open(path, "w", encoding="utf-8") as f:
f.write(transcript)
return transcript, path
def extract_audio_from_video(video_path):
try:
audio = AudioSegment.from_file(video_path)
out = "extracted_audio.mp3"
audio.export(out, format="mp3")
return out, None
except Exception as e:
return None, str(e)
def process_video(video_path):
audio_path, err = extract_audio_from_video(video_path)
if err:
return err, None
transcript = transcribe_audio(audio_path)
fixed = fix_subtitles_format(transcript)
return create_srt(fixed)
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# SRT parsing
def srt_time_to_sec(time_str):
t = time_str.strip().replace('.', ',')
if ',' not in t:
t += ',000'
h, m, s_ms = t.split(':')
s, ms = s_ms.split(',')
return int(h)*3600 + int(m)*60 + int(s) + int(ms)/1000
def parse_srt_from_text(srt_text):
subs = []
for block in srt_text.strip().split("\n\n"):
lines = block.splitlines()
if len(lines) < 3:
continue
times = lines[1]
try:
start_str, end_str = times.split('-->')
start = srt_time_to_sec(start_str)
end = srt_time_to_sec(end_str)
text = ' '.join(lines[2:])
subs.append({'start': start, 'end': end, 'text': text})
except:
continue
return subs
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# Animated TextClip creator
def create_animated_text_clip(
text, duration, font, fontsize, color,
stroke_color, stroke_width,
position_type, custom_x, custom_y,
animation, video_width, video_height,
bg_color=None, bg_opacity=1.0
):
txt = mpe.TextClip(
text, font=font, fontsize=fontsize,
color=color, stroke_color=stroke_color,
stroke_width=stroke_width
)
if bg_color:
bg_col = hex_to_rgb(bg_color)
pad_x, pad_y = 10, 5
txt = txt.on_color(
size=(txt.w + 2*pad_x, txt.h + 2*pad_y),
color=bg_col, pos=(pad_x, pad_y),
col_opacity=bg_opacity
)
txt = txt.set_duration(duration)
# Position
if position_type == 'bottom':
x = (video_width - txt.w) / 2
y = video_height - txt.h - 20 - fontsize
elif position_type == 'top':
x = (video_width - txt.w) / 2
y = 20 + fontsize
elif position_type == 'center':
x = (video_width - txt.w) / 2
y = (video_height - txt.h) / 2
else:
x = custom_x - txt.w / 2
y = custom_y
# Animation
anim = animation.lower()
if anim == 'fade':
fd = min(0.5, duration/2)
txt = txt.fadein(fd).fadeout(fd)
elif anim == 'slide':
fd = min(0.5, duration/2)
txt = txt.set_position(lambda t: (
-txt.w + (x+txt.w)*min(t/fd,1), y
))
elif anim == 'zoom':
txt = txt.resize(lambda t: 0.5 + 0.5*min(t/duration,1))
return txt.set_position((x, y))
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# Extract first frame for preview
def extract_first_frame(video_path):
try:
clip = mpe.VideoFileClip(video_path)
frame = clip.get_frame(0)
clip.close()
return frame
except:
return None
# Single-frame preview video (fixed 'bgo' β†’ 'bg_opacity')
def create_single_frame_video(
frame, text, font, fontsize, color,
stroke_color, stroke_width, position_type,
custom_x, custom_y, animation,
bg_color=None, bg_opacity=1.0
):
if frame is None:
return None
base = mpe.ImageClip(frame).set_duration(1.0)
h, w, _ = frame.shape
txt = create_animated_text_clip(
text, 1.0, font, fontsize, color,
stroke_color, stroke_width,
position_type, custom_x, custom_y,
animation, w, h,
bg_color=bg_color, bg_opacity=bg_opacity
)
final_clip = mpe.CompositeVideoClip([base, txt])
path = 'preview_video.mp4'
final_clip.write_videofile(
path, fps=24, codec='libx264',
audio=False, verbose=False, logger=None
)
return path
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# Apply subtitles to full video
def apply_subtitles(
video_path, srt_text, font_key, fontsize, color,
stroke_color, stroke_width, position_type,
custom_x, custom_y, animation, quality,
bg_color=None, bg_opacity=1.0
):
font_path = AVAILABLE_FONTS.get(font_key, font_key)
srt_path = "temp_subtitles.srt"
with open(srt_path, "w", encoding="utf-8") as f:
f.write(srt_text)
def generator(txt):
return mpe.TextClip(
txt, font=font_path, fontsize=fontsize,
color=color, stroke_color=stroke_color,
stroke_width=stroke_width
)
subs = mpe.SubtitlesClip(srt_path, generator).set_position(('center','bottom'))
clip = mpe.VideoFileClip(video_path)
final = mpe.CompositeVideoClip([clip, subs])
output_path = "video_with_subs.mp4"
final.write_videofile(
output_path,
codec='libx264', audio_codec='aac',
preset=quality
)
return output_path
# β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown('# ΠΡžΡ‚Π°ΠΌΠ°Ρ‚Ρ‹Ρ‡Π½Π°Π΅ стварэннС Ρ– Π½Π°ΠΊΠ»Π°Π΄Π°Π½Π½Π΅ ΡΡƒΠ±Ρ‚Ρ‹Ρ‚Ρ€Π°Ρž')
# Video upload & preview
video_input = gr.Video(label='Π—Π°Π³Ρ€ΡƒΠ·Ρ–Ρ†Π΅ відэа')
first_frame = gr.State()
preview_output = gr.Video(label="ΠŸΡ€ΡΠ²'ю", visible=False)
def on_video_load(video_path):
if not video_path:
return None, gr.update(visible=True), gr.update(visible=False)
frame = extract_first_frame(video_path)
return frame, gr.update(visible=False), gr.update(visible=True)
video_input.change(
on_video_load,
inputs=video_input,
outputs=[first_frame, video_input, preview_output]
)
# Transcribe button
gr.Markdown('## Π‘Ρ‚Π²Π°Ρ€Ρ‹Ρ†ΡŒ субцітры Π· Π°ΡžΠ΄Ρ‹Ρ‘')
subs_btn = gr.Button('Π‘Ρ‚Π²Π°Ρ€Ρ‹Ρ†ΡŒ субцітры')
subs_box = gr.Textbox(label='Вэкст ΡΡƒΠ±Ρ‚Ρ‹Ρ‚Ρ€Π°Ρž', lines=10)
srt_file = gr.File(label='Π€Π°ΠΉΠ» SRT')
subs_btn.click(
process_video,
inputs=video_input,
outputs=[subs_box, srt_file]
)
# Styling & applying subtitles
gr.Markdown('## Наладка ΡΡ‚Ρ‹Π»ΡŽ Ρ– Π½Π°ΠΊΠ»Π°Π΄Π°Π½Π½Π΅ ΡΡƒΠ±Ρ‚Ρ‹Ρ‚Ρ€Π°Ρž')
font_dd = gr.Dropdown(list(AVAILABLE_FONTS.keys()),
value='DejaVuSans-Bold', label='Π¨Ρ€Ρ‹Ρ„Ρ‚')
size_sl = gr.Slider(10,100,1,value=40, label='ΠŸΠ°ΠΌΠ΅Ρ€ ΡˆΡ€Ρ‹Ρ„Ρ‚Π°')
color_txt= gr.ColorPicker(value='#FFFFFF', label='ΠšΠΎΠ»Π΅Ρ€ тэксту')
color_st = gr.ColorPicker(value='#000000', label='ΠšΠΎΠ»Π΅Ρ€ Π°Π±Π²ΠΎΠ΄ΠΊΡ–')
width_sl = gr.Slider(0,10,1, value=2, label='Π’Π°ΡžΡˆΡ‡Ρ‹Π½Ρ Π°Π±Π²ΠΎΠ΄ΠΊΡ–')
pos_dd = gr.Dropdown(['bottom','top','center','custom'],
value='bottom', label='ΠŸΠ°Π·Ρ–Ρ†Ρ‹Ρ')
anim_dd = gr.Dropdown(['None','fade','slide','zoom'],
value='None', label='Анімацыя')
x_sl = gr.Slider(0,1920,1,value=100, label='X')
y_sl = gr.Slider(0,1080,1,value=100, label='Y')
bg_color = gr.ColorPicker(value='#000000', label='ΠšΠΎΠ»Π΅Ρ€ Ρ„ΠΎΠ½Ρƒ')
bg_opac = gr.Slider(0.0,1.0,0.1,value=0.5, label='ΠŸΡ€Π°Π·Ρ€Ρ‹ΡΡ‚Π°ΡΡ†ΡŒ Ρ„ΠΎΠ½Ρƒ')
exp_dd = gr.Dropdown(['low','medium','high','veryslow'],
value='medium', label='Π―ΠΊΠ°ΡΡ†ΡŒ ΠΊΠ°Π΄Π·Ρ–Ρ€ΠΎΡžΠΊΡ–')
out_vid = gr.Video(label='Π’Ρ‹Π½Ρ–ΠΊ')
apply_btn = gr.Button('Накласці субтытры')
apply_btn.click(
apply_subtitles,
inputs=[
video_input, subs_box, font_dd, size_sl,
color_txt, color_st, width_sl, pos_dd,
x_sl, y_sl, anim_dd, exp_dd,
bg_color, bg_opac
],
outputs=out_vid
)
demo.launch(debug=True)