Spaces:

archivartaunik
/

CaptionBE

Running

App Files Files Community

CaptionBE / app_new.py

archivartaunik

Rename app.py to app_new.py

8ae53c8 verified 9 months ago

raw

history blame contribute delete

12.1 kB

	import os
	import gradio as gr
	import moviepy.editor as mpe
	import mimetypes
	from pydub import AudioSegment
	import google.generativeai as genai
	from PIL import Image, ImageColor

	# ——————————————————————————————————————————————————————————
	# ImageMagick policy fix
	mpe.TextClip.imagemagick_binary = "/usr/bin/convert"
	os.system(
	'sed -i \'s/rights="none"/rights="read\|write"/g\' '
	'/etc/ImageMagick-6/policy.xml'
	)

	# API configuration
	GEMINI_API_KEY = os.getenv("gemini")
	MODEL_NAME = os.getenv("mod")
	genai.configure(api_key=GEMINI_API_KEY)

	# Available fonts
	AVAILABLE_FONTS = {
	"DejaVuSans-Bold": "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
	"DejaVuSans": "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
	"LiberationSerif-Bold": "/usr/share/fonts/truetype/liberation/LiberationSerif-Bold.ttf",
	"BadScript-Regular": "fonts/Bad_Script/BadScript-Regular.ttf",
	"Gidole-Regular": "fonts/Gidole/Gidole-Regular.ttf",
	"GreatVibes-Regular": "fonts/Great_Vibes/GreatVibes-Regular.ttf",
	"OpenSans-Variable": "fonts/Open_Sans/OpenSans-VariableFont_wdth,wght.ttf",
	"OpenSans-Italic-Variable": "fonts/Open_Sans/OpenSans-Italic-VariableFont_wdth,wght.ttf",
	"Roboto-Variable": "fonts/Roboto/Roboto-VariableFont_wdth,wght.ttf",
	"Roboto-Italic-Variable": "fonts/Roboto/Roboto-Italic-VariableFont_wdth,wght.ttf",
	"SourceCodePro-Variable": "fonts/Source_Code_Pro/SourceCodePro-VariableFont_wght.ttf",
	"SourceCodePro-Italic-Variable": "fonts/Source_Code_Pro/SourceCodePro-Italic-VariableFont_wght.ttf",
	"Tektur-Variable": "fonts/Tektur/Tektur-VariableFont_wdth,wght.ttf",
	"Ponomar-Regular": "fonts/Ponomar/Ponomar-Regular.ttf",
	}

	# ——————————————————————————————————————————————————————————
	# Utility: CSS color to RGB tuple
	def hex_to_rgb(color_str):
	try:
	return ImageColor.getrgb(color_str)
	except ValueError:
	pass
	c = color_str.lstrip('#')
	return tuple(int(c[i:i+2], 16) for i in (0, 2, 4))

	# ——————————————————————————————————————————————————————————
	# Transcription & SRT helpers
	def transcribe_audio(audio_file):
	mime_type, _ = mimetypes.guess_type(audio_file)
	if not mime_type:
	return "Немагчыма вызначыць тып файла."
	with open(audio_file, "rb") as f:
	audio_data = f.read()
	prompt_text = os.getenv("p")
	model = genai.GenerativeModel(MODEL_NAME)
	resp = model.generate_content([prompt_text, {"mime_type": mime_type, "data": audio_data}])
	return resp.text.strip() or "Не атрымалася транскрыбаваць."

	def fix_subtitles_format(transcript):
	prompt = f"Не змяняй тэксты, выправі толькі фармат HH:MM:SS,mmm:\n{transcript}"
	model = genai.GenerativeModel(MODEL_NAME)
	resp = model.generate_content(prompt)
	return resp.text.strip() or transcript

	def create_srt(transcript):
	path = "subtitles.srt"
	with open(path, "w", encoding="utf-8") as f:
	f.write(transcript)
	return transcript, path

	def extract_audio_from_video(video_path):
	try:
	audio = AudioSegment.from_file(video_path)
	out = "extracted_audio.mp3"
	audio.export(out, format="mp3")
	return out, None
	except Exception as e:
	return None, str(e)

	def process_video(video_path):
	audio_path, err = extract_audio_from_video(video_path)
	if err:
	return err, None
	transcript = transcribe_audio(audio_path)
	fixed = fix_subtitles_format(transcript)
	return create_srt(fixed)

	# ——————————————————————————————————————————————————————————
	# SRT parsing
	def srt_time_to_sec(time_str):
	t = time_str.strip().replace('.', ',')
	if ',' not in t:
	t += ',000'
	h, m, s_ms = t.split(':')
	s, ms = s_ms.split(',')
	return int(h)3600 + int(m)60 + int(s) + int(ms)/1000

	def parse_srt_from_text(srt_text):
	subs = []
	for block in srt_text.strip().split("\n\n"):
	lines = block.splitlines()
	if len(lines) < 3:
	continue
	times = lines[1]
	try:
	start_str, end_str = times.split('-->')
	start = srt_time_to_sec(start_str)
	end = srt_time_to_sec(end_str)
	text = ' '.join(lines[2:])
	subs.append({'start': start, 'end': end, 'text': text})
	except:
	continue
	return subs

	# ——————————————————————————————————————————————————————————
	# Animated TextClip creator
	def create_animated_text_clip(
	text, duration, font, fontsize, color,
	stroke_color, stroke_width,
	position_type, custom_x, custom_y,
	animation, video_width, video_height,
	bg_color=None, bg_opacity=1.0
	):
	txt = mpe.TextClip(
	text, font=font, fontsize=fontsize,
	color=color, stroke_color=stroke_color,
	stroke_width=stroke_width
	)
	if bg_color:
	bg_col = hex_to_rgb(bg_color)
	pad_x, pad_y = 10, 5
	txt = txt.on_color(
	size=(txt.w + 2pad_x, txt.h + 2pad_y),
	color=bg_col, pos=(pad_x, pad_y),
	col_opacity=bg_opacity
	)
	txt = txt.set_duration(duration)

	# Position
	if position_type == 'bottom':
	x = (video_width - txt.w) / 2
	y = video_height - txt.h - 20 - fontsize
	elif position_type == 'top':
	x = (video_width - txt.w) / 2
	y = 20 + fontsize
	elif position_type == 'center':
	x = (video_width - txt.w) / 2
	y = (video_height - txt.h) / 2
	else:
	x = custom_x - txt.w / 2
	y = custom_y

	# Animation
	anim = animation.lower()
	if anim == 'fade':
	fd = min(0.5, duration/2)
	txt = txt.fadein(fd).fadeout(fd)
	elif anim == 'slide':
	fd = min(0.5, duration/2)
	txt = txt.set_position(lambda t: (
	-txt.w + (x+txt.w)*min(t/fd,1), y
	))
	elif anim == 'zoom':
	txt = txt.resize(lambda t: 0.5 + 0.5*min(t/duration,1))

	return txt.set_position((x, y))

	# ——————————————————————————————————————————————————————————
	# Extract first frame for preview
	def extract_first_frame(video_path):
	try:
	clip = mpe.VideoFileClip(video_path)
	frame = clip.get_frame(0)
	clip.close()
	return frame
	except:
	return None

	# Single-frame preview video (fixed 'bgo' → 'bg_opacity')
	def create_single_frame_video(
	frame, text, font, fontsize, color,
	stroke_color, stroke_width, position_type,
	custom_x, custom_y, animation,
	bg_color=None, bg_opacity=1.0
	):
	if frame is None:
	return None
	base = mpe.ImageClip(frame).set_duration(1.0)
	h, w, _ = frame.shape
	txt = create_animated_text_clip(
	text, 1.0, font, fontsize, color,
	stroke_color, stroke_width,
	position_type, custom_x, custom_y,
	animation, w, h,
	bg_color=bg_color, bg_opacity=bg_opacity
	)
	final_clip = mpe.CompositeVideoClip([base, txt])
	path = 'preview_video.mp4'
	final_clip.write_videofile(
	path, fps=24, codec='libx264',
	audio=False, verbose=False, logger=None
	)
	return path

	# ——————————————————————————————————————————————————————————
	# Apply subtitles to full video
	def apply_subtitles(
	video_path, srt_text, font_key, fontsize, color,
	stroke_color, stroke_width, position_type,
	custom_x, custom_y, animation, quality,
	bg_color=None, bg_opacity=1.0
	):
	font_path = AVAILABLE_FONTS.get(font_key, font_key)
	srt_path = "temp_subtitles.srt"
	with open(srt_path, "w", encoding="utf-8") as f:
	f.write(srt_text)

	def generator(txt):
	return mpe.TextClip(
	txt, font=font_path, fontsize=fontsize,
	color=color, stroke_color=stroke_color,
	stroke_width=stroke_width
	)

	subs = mpe.SubtitlesClip(srt_path, generator).set_position(('center','bottom'))
	clip = mpe.VideoFileClip(video_path)
	final = mpe.CompositeVideoClip([clip, subs])
	output_path = "video_with_subs.mp4"
	final.write_videofile(
	output_path,
	codec='libx264', audio_codec='aac',
	preset=quality
	)
	return output_path

	# ——————————————————————————————————————————————————————————
	# Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown('# Аўтаматычнае стварэнне і накладанне субтытраў')

	# Video upload & preview
	video_input = gr.Video(label='Загрузіце відэа')
	first_frame = gr.State()
	preview_output = gr.Video(label="Прэв'ю", visible=False)

	def on_video_load(video_path):
	if not video_path:
	return None, gr.update(visible=True), gr.update(visible=False)
	frame = extract_first_frame(video_path)
	return frame, gr.update(visible=False), gr.update(visible=True)

	video_input.change(
	on_video_load,
	inputs=video_input,
	outputs=[first_frame, video_input, preview_output]
	)

	# Transcribe button
	gr.Markdown('## Стварыць субцітры з аўдыё')
	subs_btn = gr.Button('Стварыць субцітры')
	subs_box = gr.Textbox(label='Тэкст субтытраў', lines=10)
	srt_file = gr.File(label='Файл SRT')

	subs_btn.click(
	process_video,
	inputs=video_input,
	outputs=[subs_box, srt_file]
	)

	# Styling & applying subtitles
	gr.Markdown('## Наладка стылю і накладанне субтытраў')
	font_dd = gr.Dropdown(list(AVAILABLE_FONTS.keys()),
	value='DejaVuSans-Bold', label='Шрыфт')
	size_sl = gr.Slider(10,100,1,value=40, label='Памер шрыфта')
	color_txt= gr.ColorPicker(value='#FFFFFF', label='Колер тэксту')
	color_st = gr.ColorPicker(value='#000000', label='Колер абводкі')
	width_sl = gr.Slider(0,10,1, value=2, label='Таўшчыня абводкі')
	pos_dd = gr.Dropdown(['bottom','top','center','custom'],
	value='bottom', label='Пазіцыя')
	anim_dd = gr.Dropdown(['None','fade','slide','zoom'],
	value='None', label='Анімацыя')
	x_sl = gr.Slider(0,1920,1,value=100, label='X')
	y_sl = gr.Slider(0,1080,1,value=100, label='Y')
	bg_color = gr.ColorPicker(value='#000000', label='Колер фону')
	bg_opac = gr.Slider(0.0,1.0,0.1,value=0.5, label='Празрыстасць фону')
	exp_dd = gr.Dropdown(['low','medium','high','veryslow'],
	value='medium', label='Якасць кадзіроўкі')
	out_vid = gr.Video(label='Вынік')

	apply_btn = gr.Button('Накласці субтытры')
	apply_btn.click(
	apply_subtitles,
	inputs=[
	video_input, subs_box, font_dd, size_sl,
	color_txt, color_st, width_sl, pos_dd,
	x_sl, y_sl, anim_dd, exp_dd,
	bg_color, bg_opac
	],
	outputs=out_vid
	)

	demo.launch(debug=True)