import gradio as gr import requests import tempfile from faster_whisper import WhisperModel from gtts import gTTS import os import subprocess from PIL import Image, ImageDraw, ImageFont import random import textwrap import pkg_resources import sys import io import base64 # === Config === GROQ_API_KEY = "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" GROQ_MODEL = "llama3-70b-8192" GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" IMAGE_GENERATION_API_URL = "https://api.openai.com/v1/images/generations" # Corrected URL for DALL-E 3 OPENAI_API_KEY = "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" # Replace with your OpenAI key # === Init Whisper === whisper = WhisperModel("base", device="cpu", compute_type="int8") # === Animation Functions === def get_audio_duration(audio_file): """Gets the duration of the audio using ffprobe.""" try: command = [ "ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", audio_file, ] result = subprocess.run(command, capture_output=True, text=True) return float(result.stdout) except Exception as e: print(f"Error getting audio duration: {e}") return 5 # Return a default duration def generate_images_with_openai(prompt, num_images=1): """ Generates images using OpenAI's API. Args: prompt (str): The prompt to use for image generation. num_images (int, optional): The number of images to generate. Defaults to 1. Returns: list: A list of image URLs, or None on error. """ headers = { "Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json", } payload = { "model": "dall-e-3", # Use the DALL-E 3 model "prompt": prompt, "n": num_images, "size": "1024x1024", # You can adjust the size as needed "response_format": "url" # Ensure we get URLs } try: response = requests.post(IMAGE_GENERATION_API_URL, headers=headers, json=payload) response.raise_for_status() # Raise an exception for bad status codes data = response.json() image_urls = [item["url"] for item in data["data"]] return image_urls except requests.exceptions.RequestException as e: print(f"Error generating images with OpenAI: {e}") return None except KeyError: print(f"Error: Unexpected response format from OpenAI: {data}") return None except Exception as e: print(f"Error during image generation: {e}") return None def create_animated_explanation(text, audio_file): """ Generates a more professional animation with images and synchronized text. Args: text (str): The text to display and explain. audio_file (str): The path to the audio file. Returns: str: The path to the generated video file (.mp4), or None on error. """ try: # 1. Split text into meaningful chunks (sentences or phrases) sentences = split_text_into_chunks(text) audio_duration = get_audio_duration(audio_file) if audio_duration is None: audio_duration = 5 # set default total_frames = 100 # Example number of frames fps = 10 frame_duration = 1 / fps image_urls = [] # 2. Generate images for key sentences for sentence in sentences: image_prompt = f"Illustrate the concept: {sentence}" urls = generate_images_with_openai(image_prompt) # Generate 1 image per sentence if urls: image_urls.append(urls[0]) # Use the first URL else: image_urls.append(None) # Append None if image generation fails # 3. Create frames for the animation frames = [] for i in range(total_frames): frame_progress = i / total_frames sentence_index = int(frame_progress * len(sentences)) sentence_index = min(sentence_index, len(sentences) - 1) #clamp color = (220, 220, 220) # Light gray img = Image.new("RGB", (640, 480), color=color) d = ImageDraw.Draw(img) font = ImageFont.truetype("DejaVuSans.ttf", 20) current_sentence = sentences[sentence_index] lines = textwrap.wrap(current_sentence, width=40) y_start = (480 - len(lines) * 24) // 2 # Display sentence for j, line in enumerate(lines): try: bbox = d.textbbox((0,0), line, font=font) text_width = bbox[2] - bbox[0] text_x = (640 - text_width) // 2 except AttributeError as e: print(f"Pillow version error: {e}") text_x = 10 d.text((text_x, y_start + j * 24), line, fill=(0, 0, 0), font=font) # Add image if available if image_urls[sentence_index]: try: image_data = requests.get(image_urls[sentence_index], stream=True).raw img_to_paste = Image.open(image_data).resize((200, 200)) # Resize as needed img.paste(img_to_paste, (440, 280)) # Position the image except Exception as e: print(f"Error loading or pasting image: {e}") frames.append(img) # 4. Save frames and create video image_files = [] for i, frame in enumerate(frames): image_file = f"frame_{i:04d}.png" frame.save(image_file) image_files.append(image_file) video_file = "animated_explanation.mp4" command = [ "ffmpeg", "-framerate", str(fps), "-i", "frame_%04d.png", "-i", audio_file, "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", video_file, ] subprocess.run(command, check=True, capture_output=True) for image_file in image_files: os.remove(image_file) return video_file except Exception as e: print(f"Error creating animated explanation: {e}") return None # Return None on error def split_text_into_chunks(text): """Splits text into sentences or phrases, handling punctuation.""" import re # Split by common sentence-ending punctuation, but handle abbreviations sentences = re.split(r'(?