subprocess3 / app.py
sreepathi-ravikumar's picture
Update app.py
255df7f verified
raw
history blame
4.71 kB
import os
import tempfile
import traceback
from flask import Flask, request, jsonify, send_file
from PIL import Image
from moviepy.editor import *
from gtts import gTTS
from mutagen.mp3 import MP3
import pytesseract
import numpy as np
app = Flask(__name__)
def video_func(id, lines):
try:
# Temp files
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
temp_audio.close()
# Text-to-Speech
tts = gTTS(text=lines[id], lang='ta', slow=False)
tts.save(temp_audio.name)
audio = MP3(temp_audio.name)
duration = audio.info.length
image_path = f"/tmp/images/slide{id}.png"
video_duration = duration
highlight_color = (255, 255, 0)
highlight_opacity = 0.5
# OCR: Extract words
img = Image.open(image_path)
data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
words = []
for i in range(len(data['text'])):
word = data['text'][i].strip()
if word and int(data['conf'][i]) > 60:
x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
words.append({'text': word, 'box': (x, y, w, h)})
image_clip = ImageClip(image_path).set_duration(video_duration)
# Highlight effect
n_words = len(words)
highlight_duration = video_duration / max(n_words, 1)
highlight_clips = []
for i, word in enumerate(words):
x, y, w, h = word['box']
start = i * highlight_duration
end = start + highlight_duration
rect = ColorClip(size=(w, h), color=highlight_color)
rect = rect.set_opacity(highlight_opacity).set_position((x, y)).set_start(start).set_end(end)
highlight_clips.append(rect)
final_clip = CompositeVideoClip([image_clip] + highlight_clips)
audio = AudioFileClip(temp_audio.name)
final_clip = final_clip.set_audio(audio)
final_clip.write_videofile(f"/tmp/clip{id}.mp4", fps=24)
except Exception as e:
print(f"Error in video_func for ID {id}: {e}")
traceback.print_exc()
@app.route("/generate", methods=["POST"])
def generate_video():
try:
data = request.get_json()
prompt = data.get("duration", '').strip()
prompts = prompt.replace("**", "")
if not prompts:
return jsonify({"error": "prompts must not be empty"}), 400
raw_lines = prompts.splitlines()
lines = []
i = 0
while i < len(raw_lines):
line = raw_lines[i].strip()
if line.startswith("#") and (line.endswith('?') or line.endswith(':')):
block = line
i += 1
paragraph_lines = []
while i < len(raw_lines):
next_line = raw_lines[i].strip()
if next_line.startswith("#") and (next_line.endswith('?') or next_line.endswith(':')):
break
paragraph_lines.append(next_line)
i += 1
if len(paragraph_lines) >= 5:
break
if paragraph_lines:
block += '\n' + '\n'.join(paragraph_lines)
lines.append(block)
else:
block_lines = []
count = 0
while i < len(raw_lines) and count < 5:
next_line = raw_lines[i].strip()
if next_line.startswith("#") and (next_line.endswith('?') or next_line.endswith(':')):
break
block_lines.append(next_line)
i += 1
count += 1
if block_lines:
lines.append('\n'.join(block_lines))
# Slide image creation
image_olst = []
for id in range(len(lines)):
create_text_image(lines[id], id, image_olst)
for i in range(len(lines)):
video_func(i, lines)
clips = [VideoFileClip(f"/tmp/clip{id}.mp4") for id in range(len(lines))]
final_video = concatenate_videoclips(clips)
output_path = "/tmp/final_output.mp4"
final_video.write_videofile(output_path, fps=24)
# Clean up
for img in image_olst:
os.remove(img)
return send_file(output_path, mimetype='video/mp4')
except Exception as e:
traceback.print_exc()
return jsonify({"error": str(e)}), 500
@app.route("/")
def home():
return "Flask Video Generator is Running"
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)