from groq import Groq from pydantic import BaseModel, ValidationError from typing import List, Literal import os import tiktoken import json import re import tempfile from gtts import gTTS from bs4 import BeautifulSoup import requests from moviepy.editor import ImageClip, AudioFileClip # Initialize Groq client and tokenizer groq_client = Groq(api_key=os.environ["GROQ_API_KEY"]) tokenizer = tiktoken.get_encoding("cl100k_base") # Data models for validating dialogue structure class DialogueItem(BaseModel): speaker: Literal["Priya", "Ananya"] text: str class Dialogue(BaseModel): dialogue: List[DialogueItem] # Truncate text to meet the token limit def truncate_text(text, max_tokens=2048): tokens = tokenizer.encode(text) if len(tokens) > max_tokens: return tokenizer.decode(tokens[:max_tokens]) return text # Extract plain text content from a URL def extract_text_from_url(url): try: response = requests.get(url) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Remove script and style elements for script in soup(["script", "style"]): script.decompose() # Extract visible text lines = (line.strip() for line in soup.get_text().splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = '\n'.join(chunk for chunk in chunks if chunk) return text except Exception as e: raise ValueError(f"Error extracting text from URL: {str(e)}") # Generate a conversational script def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str): input_text = truncate_text(input_text) word_limit = 300 if target_length == "Short (1-2 min)" else 750 prompt = f""" {system_prompt} TONE: {tone} TARGET LENGTH: {target_length} (approximately {word_limit} words) INPUT TEXT: {input_text} Generate a complete, well-structured podcast script that: - Starts with a friendly introduction. - Features a conversational exchange between Priya (American accent) and Ananya (British accent). - Concludes with a summary and thanks listeners. - Matches the tone and target length specifications. """ try: response = groq_client.chat.completions.create( messages=[{"role": "system", "content": prompt}], model="llama-3.1-70b-versatile", max_tokens=2048, temperature=0.7 ) content = response.choices[0].message.content content = re.sub(r'```json\s*|\s*```', '', content) # Attempt to parse and validate JSON response try: json_data = json.loads(content) dialogue = Dialogue.model_validate(json_data) except (json.JSONDecodeError, ValidationError) as e: match = re.search(r'\{.*\}', content, re.DOTALL) if match: try: json_data = json.loads(match.group()) dialogue = Dialogue.model_validate(json_data) except (json.JSONDecodeError, ValidationError) as parse_error: raise ValueError(f"Failed to parse dialogue JSON: {parse_error}\nContent: {content}") else: raise ValueError(f"Failed to find valid JSON in the response: {content}") return dialogue except Exception as e: raise RuntimeError(f"Error generating script: {str(e)}") # Generate audio for a dialogue line def generate_audio(text: str, speaker: str) -> str: tld = 'com' if speaker == "Priya" else 'co.in' try: tts = gTTS(text=text, lang='en', tld=tld) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: tts.save(temp_audio.name) return temp_audio.name except Exception as e: raise RuntimeError(f"Error generating audio: {str(e)}") # Generate video using audio and image def generate_video(audio_path: str, image_path: str) -> str: """ Combines an audio file and an image to generate a video. Args: audio_path (str): Path to the audio file. image_path (str): Path to the image file. Returns: str: Path to the generated video file. """ try: audio = AudioFileClip(audio_path) video = ImageClip(image_path, duration=audio.duration) video = video.set_audio(audio) # Define the output path output_path = tempfile.mktemp(suffix=".mp4") video.write_videofile(output_path, codec="libx264", audio_codec="aac") return output_path except Exception as e: raise RuntimeError(f"Error generating video: {str(e)}")