Spaces:
Runtime error
Runtime error
| from groq import Groq | |
| from pydantic import BaseModel, ValidationError | |
| from typing import List, Literal | |
| import os | |
| import tiktoken | |
| import json | |
| import re | |
| import tempfile | |
| from gtts import gTTS | |
| from bs4 import BeautifulSoup | |
| import requests | |
| groq_client = Groq(api_key=os.environ["GROQ_API_KEY"]) | |
| tokenizer = tiktoken.get_encoding("cl100k_base") | |
| class DialogueItem(BaseModel): | |
| speaker: Literal["Sarah", "Maria"] | |
| text: str | |
| class Dialogue(BaseModel): | |
| dialogue: List[DialogueItem] | |
| def truncate_text(text, max_tokens=2048): | |
| tokens = tokenizer.encode(text) | |
| if len(tokens) > max_tokens: | |
| return tokenizer.decode(tokens[:max_tokens]) | |
| return text | |
| def extract_text_from_url(url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| for script in soup(["script", "style"]): | |
| script.decompose() | |
| text = soup.get_text() | |
| lines = (line.strip() for line in text.splitlines()) | |
| chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) | |
| text = '\n'.join(chunk for chunk in chunks if chunk) | |
| return text | |
| except Exception as e: | |
| raise ValueError(f"Error extracting text from URL: {str(e)}") | |
| def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str): | |
| input_text = truncate_text(input_text) | |
| word_limit = 300 if target_length == "Short (1-2 min)" else 750 | |
| prompt = f""" | |
| {system_prompt} | |
| TONE: {tone} | |
| TARGET LENGTH: {target_length} (approximately {word_limit} words) | |
| INPUT TEXT: {input_text} | |
| Generate a complete, well-structured podcast script that: | |
| 1. Starts with a proper introduction | |
| 2. Covers the main points from the input text | |
| 3. Has a natural flow of conversation between Sarah (American accent) and Maria (British accent) | |
| 4. Concludes with a summary and sign-off | |
| 5. Fits within the {word_limit} word limit for the target length of {target_length} | |
| 6. Strongly emphasizes the {tone} tone throughout the conversation | |
| For a humorous tone, include jokes, puns, and playful banter. | |
| For a casual tone, use colloquial language and make it sound like a conversation between college students. | |
| For a formal tone, maintain a professional podcast style with well-structured arguments and formal language. | |
| Ensure the script is not abruptly cut off and forms a complete conversation. | |
| """ | |
| response = groq_client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": prompt}, | |
| ], | |
| model="llama-3.1-70b-versatile", | |
| max_tokens=2048, | |
| temperature=0.7 | |
| ) | |
| content = response.choices[0].message.content | |
| content = re.sub(r'```json\s*|\s*```', '', content) | |
| try: | |
| json_data = json.loads(content) | |
| dialogue = Dialogue.model_validate(json_data) | |
| except json.JSONDecodeError as json_error: | |
| match = re.search(r'\{.*\}', content, re.DOTALL) | |
| if match: | |
| try: | |
| json_data = json.loads(match.group()) | |
| dialogue = Dialogue.model_validate(json_data) | |
| except (json.JSONDecodeError, ValidationError) as e: | |
| raise ValueError(f"Failed to parse dialogue JSON: {e}\nContent: {content}") | |
| else: | |
| raise ValueError(f"Failed to find valid JSON in the response: {content}") | |
| except ValidationError as e: | |
| raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {content}") | |
| return dialogue | |
| def generate_audio(text: str, speaker: str) -> str: | |
| tld = 'com' if speaker == "Sarah" else 'co.uk' | |
| tts = gTTS(text=text, lang='en', tld=tld) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: | |
| tts.save(temp_audio.name) | |
| return temp_audio.name |