Spaces:
Build error
Build error
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -1,20 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
-
import
|
| 3 |
import json
|
|
|
|
| 4 |
import tempfile
|
| 5 |
-
from typing import List, Literal
|
| 6 |
-
from pydantic import BaseModel, ValidationError
|
| 7 |
from gtts import gTTS
|
| 8 |
from bs4 import BeautifulSoup
|
| 9 |
import requests
|
| 10 |
-
import tiktoken
|
| 11 |
-
import gradio as gr
|
| 12 |
-
from transformers import pipeline
|
| 13 |
|
| 14 |
-
|
| 15 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
| 16 |
|
| 17 |
-
# Dialogue models
|
| 18 |
class DialogueItem(BaseModel):
|
| 19 |
speaker: Literal["Priya", "Ananya"]
|
| 20 |
text: str
|
|
@@ -22,7 +20,6 @@ class DialogueItem(BaseModel):
|
|
| 22 |
class Dialogue(BaseModel):
|
| 23 |
dialogue: List[DialogueItem]
|
| 24 |
|
| 25 |
-
# Utility functions
|
| 26 |
def truncate_text(text, max_tokens=2048):
|
| 27 |
tokens = tokenizer.encode(text)
|
| 28 |
if len(tokens) > max_tokens:
|
|
@@ -35,59 +32,71 @@ def extract_text_from_url(url):
|
|
| 35 |
response.raise_for_status()
|
| 36 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 37 |
|
| 38 |
-
# Remove scripts and styles
|
| 39 |
for script in soup(["script", "style"]):
|
| 40 |
script.decompose()
|
| 41 |
|
| 42 |
-
# Extract text
|
| 43 |
text = soup.get_text()
|
| 44 |
lines = (line.strip() for line in text.splitlines())
|
| 45 |
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
except Exception as e:
|
| 48 |
raise ValueError(f"Error extracting text from URL: {str(e)}")
|
| 49 |
|
| 50 |
-
def summarize_text(text, max_length=150):
|
| 51 |
-
"""
|
| 52 |
-
Summarize the given text to a specified maximum length.
|
| 53 |
-
"""
|
| 54 |
-
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 55 |
-
summary = summarizer(text, max_length=max_length, min_length=50, do_sample=False)
|
| 56 |
-
return summary[0]['summary_text']
|
| 57 |
-
|
| 58 |
def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
|
| 59 |
input_text = truncate_text(input_text)
|
| 60 |
word_limit = 300 if target_length == "Short (1-2 min)" else 750
|
| 61 |
|
| 62 |
-
# Prompt for dialogue generation
|
| 63 |
prompt = f"""
|
| 64 |
{system_prompt}
|
| 65 |
TONE: {tone}
|
| 66 |
TARGET LENGTH: {target_length} (approximately {word_limit} words)
|
| 67 |
INPUT TEXT: {input_text}
|
| 68 |
Generate a complete, well-structured podcast script that:
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
{"
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
try:
|
| 87 |
-
json_data = json.loads(
|
| 88 |
dialogue = Dialogue.model_validate(json_data)
|
| 89 |
-
except
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
return dialogue
|
| 93 |
|
|
@@ -96,67 +105,4 @@ def generate_audio(text: str, speaker: str) -> str:
|
|
| 96 |
tts = gTTS(text=text, lang='en', tld=tld)
|
| 97 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
| 98 |
tts.save(temp_audio.name)
|
| 99 |
-
return temp_audio.name
|
| 100 |
-
|
| 101 |
-
# Main function for podcast generation
|
| 102 |
-
def generate_podcast(uploaded_file, url, tone, target_length):
|
| 103 |
-
# Extract text from the uploaded file or URL
|
| 104 |
-
if uploaded_file:
|
| 105 |
-
with open(uploaded_file.name, "r") as file:
|
| 106 |
-
input_text = file.read()
|
| 107 |
-
elif url:
|
| 108 |
-
input_text = extract_text_from_url(url)
|
| 109 |
-
else:
|
| 110 |
-
return "Please provide either a URL or a file.", None
|
| 111 |
-
|
| 112 |
-
# Generate podcast script
|
| 113 |
-
system_prompt = "You are an AI script generator for podcasts."
|
| 114 |
-
dialogue = generate_script(system_prompt, input_text, tone, target_length)
|
| 115 |
-
|
| 116 |
-
# Generate audio for each speaker
|
| 117 |
-
audio_files = []
|
| 118 |
-
for item in dialogue.dialogue:
|
| 119 |
-
audio_path = generate_audio(item.text, item.speaker)
|
| 120 |
-
audio_files.append(audio_path)
|
| 121 |
-
|
| 122 |
-
# Combine all audio files into a single output (simplified for demo)
|
| 123 |
-
combined_audio = audio_files[0] # Just returning the first file for demo
|
| 124 |
-
transcript = "\n".join([f"{item.speaker}: {item.text}" for item in dialogue.dialogue])
|
| 125 |
-
return combined_audio, transcript
|
| 126 |
-
|
| 127 |
-
# Gradio Interface
|
| 128 |
-
instructions = """
|
| 129 |
-
1. Upload a PDF file or provide a URL to generate a podcast.
|
| 130 |
-
2. Choose the podcast tone and desired length.
|
| 131 |
-
3. Click submit to generate the podcast and transcript.
|
| 132 |
-
"""
|
| 133 |
-
|
| 134 |
-
iface = gr.Interface(
|
| 135 |
-
fn=generate_podcast,
|
| 136 |
-
inputs=[
|
| 137 |
-
gr.File(label="Upload PDF file (optional)", file_types=[".pdf", ".txt"]),
|
| 138 |
-
gr.Textbox(label="OR Enter URL"),
|
| 139 |
-
gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
|
| 140 |
-
gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
|
| 141 |
-
],
|
| 142 |
-
outputs=[
|
| 143 |
-
gr.Audio(label="Generated Podcast"),
|
| 144 |
-
gr.Markdown(label="Transcript")
|
| 145 |
-
],
|
| 146 |
-
title="🎙️ Amuthvani: AI Podcast!",
|
| 147 |
-
description=instructions,
|
| 148 |
-
allow_flagging="never",
|
| 149 |
-
theme=gr.themes.Soft()
|
| 150 |
-
)
|
| 151 |
-
|
| 152 |
-
# Summarization Interface
|
| 153 |
-
summarize_interface = gr.Interface(
|
| 154 |
-
fn=summarize_text,
|
| 155 |
-
inputs=gr.Textbox(label="Enter text for briefing"),
|
| 156 |
-
outputs=gr.Textbox(label="Briefing Document Summary"),
|
| 157 |
-
title="📝 Briefing Document"
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
# Combined Tabbed Interface
|
| 161 |
-
combined = gr.TabbedInterface([iface, summarize_interface], ["Podcast Generator", "Briefing Document"])
|
| 162 |
-
combined.launch()
|
|
|
|
| 1 |
+
from groq import Groq
|
| 2 |
+
from pydantic import BaseModel, ValidationError
|
| 3 |
+
from typing import List, Literal
|
| 4 |
import os
|
| 5 |
+
import tiktoken
|
| 6 |
import json
|
| 7 |
+
import re
|
| 8 |
import tempfile
|
|
|
|
|
|
|
| 9 |
from gtts import gTTS
|
| 10 |
from bs4 import BeautifulSoup
|
| 11 |
import requests
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
| 14 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
| 15 |
|
|
|
|
| 16 |
class DialogueItem(BaseModel):
|
| 17 |
speaker: Literal["Priya", "Ananya"]
|
| 18 |
text: str
|
|
|
|
| 20 |
class Dialogue(BaseModel):
|
| 21 |
dialogue: List[DialogueItem]
|
| 22 |
|
|
|
|
| 23 |
def truncate_text(text, max_tokens=2048):
|
| 24 |
tokens = tokenizer.encode(text)
|
| 25 |
if len(tokens) > max_tokens:
|
|
|
|
| 32 |
response.raise_for_status()
|
| 33 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 34 |
|
|
|
|
| 35 |
for script in soup(["script", "style"]):
|
| 36 |
script.decompose()
|
| 37 |
|
|
|
|
| 38 |
text = soup.get_text()
|
| 39 |
lines = (line.strip() for line in text.splitlines())
|
| 40 |
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 41 |
+
text = '\n'.join(chunk for chunk in chunks if chunk)
|
| 42 |
+
|
| 43 |
+
return text
|
| 44 |
except Exception as e:
|
| 45 |
raise ValueError(f"Error extracting text from URL: {str(e)}")
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
|
| 48 |
input_text = truncate_text(input_text)
|
| 49 |
word_limit = 300 if target_length == "Short (1-2 min)" else 750
|
| 50 |
|
|
|
|
| 51 |
prompt = f"""
|
| 52 |
{system_prompt}
|
| 53 |
TONE: {tone}
|
| 54 |
TARGET LENGTH: {target_length} (approximately {word_limit} words)
|
| 55 |
INPUT TEXT: {input_text}
|
| 56 |
Generate a complete, well-structured podcast script that:
|
| 57 |
+
1. Starts with a friendly, engaging introduction that feels natural, welcoming the listeners as if Priya and Ananya are speaking directly to them.
|
| 58 |
+
2. Covers the main points from the input text in a conversational, relaxed manner with smooth transitions. Priya (American accent) and Ananya (British accent) should engage in a back-and-forth conversation that feels authentic and lively, as if two people are having a real interaction.
|
| 59 |
+
3. Voice adjustments: Ensure that the flow of conversation is natural, with slight pauses for thought and clear enunciation, making it easy for all listeners to follow along. Keep the pace relaxed but steady, with slight variations in speed for emphasis on key points—ensuring clarity and ease of understanding.
|
| 60 |
+
4. Concludes with a smooth and heartfelt summary, wrapping up the discussion in a way that feels genuine and leaves listeners with a sense of closure, while thanking them for tuning in.
|
| 61 |
+
5. The overall voice speed and tone should match the conversation and topic, ensuring the dialogue is easy to comprehend. For more intense moments, you can use a slightly faster pace for energy, and for reflective points, use a slower, thoughtful pace.
|
| 62 |
+
6. Fits within the {word_limit} word limit for the target length of {target_length}.
|
| 63 |
+
7. Strongly emphasizes the {tone} tone throughout the conversation.
|
| 64 |
+
For a humorous tone, include jokes, puns, and playful banter, making the conversation feel light-hearted while integrating subtle cultural references and humor that listeners can relate to.
|
| 65 |
+
For a casual tone, use colloquial language and friendly expressions that make it feel like a relaxed, informal chat between friends. Include cultural references and inside jokes to keep the conversation fun.
|
| 66 |
+
For a formal tone, maintain a professional style with clear, structured arguments, presenting information with respect and authority, but still keeping the conversation friendly and accessible.
|
| 67 |
+
Ensure the script feels like a real, flowing podcast conversation without abrupt transitions or unnatural interruptions.
|
| 68 |
+
"""
|
| 69 |
|
| 70 |
+
|
| 71 |
+
response = groq_client.chat.completions.create(
|
| 72 |
+
messages=[
|
| 73 |
+
{"role": "system", "content": prompt},
|
| 74 |
+
],
|
| 75 |
+
model="llama-3.1-70b-versatile",
|
| 76 |
+
max_tokens=2048,
|
| 77 |
+
temperature=0.7
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
content = response.choices[0].message.content
|
| 81 |
+
content = re.sub(r'
|
| 82 |
+
json\s*|\s*
|
| 83 |
+
', '', content)
|
| 84 |
|
| 85 |
try:
|
| 86 |
+
json_data = json.loads(content)
|
| 87 |
dialogue = Dialogue.model_validate(json_data)
|
| 88 |
+
except json.JSONDecodeError as json_error:
|
| 89 |
+
match = re.search(r'\{.*\}', content, re.DOTALL)
|
| 90 |
+
if match:
|
| 91 |
+
try:
|
| 92 |
+
json_data = json.loads(match.group())
|
| 93 |
+
dialogue = Dialogue.model_validate(json_data)
|
| 94 |
+
except (json.JSONDecodeError, ValidationError) as e:
|
| 95 |
+
raise ValueError(f"Failed to parse dialogue JSON: {e}\nContent: {content}")
|
| 96 |
+
else:
|
| 97 |
+
raise ValueError(f"Failed to find valid JSON in the response: {content}")
|
| 98 |
+
except ValidationError as e:
|
| 99 |
+
raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {content}")
|
| 100 |
|
| 101 |
return dialogue
|
| 102 |
|
|
|
|
| 105 |
tts = gTTS(text=text, lang='en', tld=tld)
|
| 106 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
|
| 107 |
tts.save(temp_audio.name)
|
| 108 |
+
return temp_audio.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|