Spaces:
Sleeping
Sleeping
| from moviepy.editor import * | |
| from PIL import Image | |
| import pytesseract | |
| import numpy as np | |
| from gtts import gTTS | |
| from mutagen.mp3 import MP3 | |
| import os | |
| import uuid | |
| import moviepy.config as mpc | |
| # Set FFmpeg path explicitly | |
| os.environ["IMAGEIO_FFMPEG_EXE"] = "/usr/bin/ffmpeg" | |
| os.environ["TMPDIR"] = "/tmp" | |
| # Create directories with full permissions | |
| IMAGE_DIR = "/tmp/images" | |
| AUDIO_DIR = "/tmp/sound" | |
| CLIPS_DIR = "/tmp/video" | |
| for dir_path in [IMAGE_DIR, AUDIO_DIR, CLIPS_DIR]: | |
| os.makedirs(dir_path, exist_ok=True) | |
| os.chmod(dir_path, 0o777) # Ensure full permissions | |
| def video_func(id, lines): | |
| print(f"Processing video for id={id}") | |
| # Generate audio | |
| tts = gTTS(text=lines[id], lang='ta', slow=False) | |
| audio_name = f"audio{id}.mp3" | |
| audio_path = os.path.join(AUDIO_DIR, audio_name) | |
| print(f"Saving audio to {audio_path}") | |
| tts.save(audio_path) | |
| # Verify audio file exists | |
| if not os.path.exists(audio_path): | |
| raise FileNotFoundError(f"Audio file {audio_path} was not created") | |
| audio = MP3(audio_path) | |
| duration = audio.info.length | |
| print(f"Audio duration: {duration} seconds") | |
| # Set image path | |
| IMAGE_PATH = os.path.join(IMAGE_DIR, f"slide{id}.png") | |
| if not os.path.exists(IMAGE_PATH): | |
| raise FileNotFoundError(f"Image file {IMAGE_PATH} not found") | |
| print(f"Using image: {IMAGE_PATH}") | |
| VIDEO_DURATION = duration | |
| HIGHLIGHT_COLOR = (255, 255, 0) # Yellow highlight | |
| HIGHLIGHT_OPACITY = 0.5 # Semi-transparent | |
| # OCR step | |
| img = Image.open(IMAGE_PATH) | |
| data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT) | |
| # Extract words and their positions | |
| words = [] | |
| for i in range(len(data['text'])): | |
| word = data['text'][i].strip() | |
| if word and int(data['conf'][i]) > 60: | |
| x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i] | |
| words.append({'text': word, 'box': (x, y, w, h)}) | |
| print(f"Found {len(words)} words in image") | |
| # Base image clip | |
| image_clip = ImageClip(IMAGE_PATH).set_duration(VIDEO_DURATION) | |
| # Highlight words one by one | |
| n_words = len(words) | |
| highlight_duration = VIDEO_DURATION / n_words if n_words > 0 else VIDEO_DURATION | |
| highlight_clips = [] | |
| for i, word in enumerate(words): | |
| x, y, w, h = word['box'] | |
| start = i * highlight_duration | |
| end = start + highlight_duration | |
| rect = ColorClip(size=(w, h), color=HIGHLIGHT_COLOR) | |
| rect = rect.set_opacity(HIGHLIGHT_OPACITY).set_position((x, y)).set_start(start).set_end(end) | |
| highlight_clips.append(rect) | |
| # Final video | |
| final_clip = CompositeVideoClip([image_clip] + highlight_clips) | |
| audio = AudioFileClip(audio_path) | |
| final_clip = final_clip.set_audio(audio) | |
| clip_name = f"clip{id}.mp4" | |
| video_path = os.path.join(CLIPS_DIR, clip_name) | |
| print(f"Writing video to {video_path}") | |
| # Write video with verbose output for debugging | |
| final_clip.write_videofile(video_path, fps=24, verbose=True, logger='bar') | |
| print(f"Video saved to {video_path}") | |
| return video_path |