Spaces:

Narayana02
/

podify

Build error

App Files Files Community

Narayana02 commited on Jan 18, 2025

Commit

fbd006a

verified ·

1 Parent(s): 5152b68

Update utils.py

Browse files

Files changed (1) hide show

utils.py +49 -103

utils.py CHANGED Viewed

@@ -1,20 +1,18 @@
 import os
-import re
 import json
 import tempfile
-from typing import List, Literal
-from pydantic import BaseModel, ValidationError
 from gtts import gTTS
 from bs4 import BeautifulSoup
 import requests
-import tiktoken
-import gradio as gr
-from transformers import pipeline
-# Initialize necessary modules
 tokenizer = tiktoken.get_encoding("cl100k_base")
-# Dialogue models
 class DialogueItem(BaseModel):
     speaker: Literal["Priya", "Ananya"]
     text: str
@@ -22,7 +20,6 @@ class DialogueItem(BaseModel):
 class Dialogue(BaseModel):
     dialogue: List[DialogueItem]
-# Utility functions
 def truncate_text(text, max_tokens=2048):
     tokens = tokenizer.encode(text)
     if len(tokens) > max_tokens:
@@ -35,59 +32,71 @@ def extract_text_from_url(url):
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Remove scripts and styles
         for script in soup(["script", "style"]):
             script.decompose()
-        # Extract text
         text = soup.get_text()
         lines = (line.strip() for line in text.splitlines())
         chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-        return '\n'.join(chunk for chunk in chunks if chunk)
     except Exception as e:
         raise ValueError(f"Error extracting text from URL: {str(e)}")
-def summarize_text(text, max_length=150):
-    """
-    Summarize the given text to a specified maximum length.
-    """
-    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    summary = summarizer(text, max_length=max_length, min_length=50, do_sample=False)
-    return summary[0]['summary_text']
 def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
     input_text = truncate_text(input_text)
     word_limit = 300 if target_length == "Short (1-2 min)" else 750
-    # Prompt for dialogue generation
     prompt = f"""
     {system_prompt}
     TONE: {tone}
     TARGET LENGTH: {target_length} (approximately {word_limit} words)
     INPUT TEXT: {input_text}
     Generate a complete, well-structured podcast script that:
-    - Starts with a friendly introduction.
-    - Covers the main points from the input text in a conversational style.
-    - Priya (American accent) and Ananya (British accent) alternate in a lively back-and-forth conversation.
-    - Concludes with a heartfelt summary and thanks listeners.
-    - Strongly emphasizes the {tone} tone and keeps within the {word_limit} word limit.
-    """
-    # Mockup Groq response for demonstration (replace with actual API call if needed)
-    response_content = json.dumps({
-        "dialogue": [
-            {"speaker": "Priya", "text": "Hi everyone, welcome to our podcast!"},
-            {"speaker": "Ananya", "text": "Yes, we're so glad you're here! Let's dive in."},
-            {"speaker": "Priya", "text": "Today, we're talking about AI and its impact on society."},
-            {"speaker": "Ananya", "text": "Absolutely, it's such a fascinating topic."}
-        ]
-    })
     try:
-        json_data = json.loads(response_content)
         dialogue = Dialogue.model_validate(json_data)
-    except (json.JSONDecodeError, ValidationError) as e:
-        raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {response_content}")
     return dialogue
@@ -96,67 +105,4 @@ def generate_audio(text: str, speaker: str) -> str:
     tts = gTTS(text=text, lang='en', tld=tld)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         tts.save(temp_audio.name)
-        return temp_audio.name
-# Main function for podcast generation
-def generate_podcast(uploaded_file, url, tone, target_length):
-    # Extract text from the uploaded file or URL
-    if uploaded_file:
-        with open(uploaded_file.name, "r") as file:
-            input_text = file.read()
-    elif url:
-        input_text = extract_text_from_url(url)
-    else:
-        return "Please provide either a URL or a file.", None
-    # Generate podcast script
-    system_prompt = "You are an AI script generator for podcasts."
-    dialogue = generate_script(system_prompt, input_text, tone, target_length)
-    # Generate audio for each speaker
-    audio_files = []
-    for item in dialogue.dialogue:
-        audio_path = generate_audio(item.text, item.speaker)
-        audio_files.append(audio_path)
-    # Combine all audio files into a single output (simplified for demo)
-    combined_audio = audio_files[0]  # Just returning the first file for demo
-    transcript = "\n".join([f"{item.speaker}: {item.text}" for item in dialogue.dialogue])
-    return combined_audio, transcript
-# Gradio Interface
-instructions = """
-1. Upload a PDF file or provide a URL to generate a podcast.
-2. Choose the podcast tone and desired length.
-3. Click submit to generate the podcast and transcript.
-"""
-iface = gr.Interface(
-    fn=generate_podcast,
-    inputs=[
-        gr.File(label="Upload PDF file (optional)", file_types=[".pdf", ".txt"]),
-        gr.Textbox(label="OR Enter URL"),
-        gr.Radio(["humorous", "casual", "formal"], label="Select podcast tone", value="casual"),
-        gr.Radio(["Short (1-2 min)", "Medium (3-5 min)"], label="Podcast length", value="Medium (3-5 min)")
-    ],
-    outputs=[
-        gr.Audio(label="Generated Podcast"),
-        gr.Markdown(label="Transcript")
-    ],
-    title="🎙️ Amuthvani: AI Podcast!",
-    description=instructions,
-    allow_flagging="never",
-    theme=gr.themes.Soft()
-)
-# Summarization Interface
-summarize_interface = gr.Interface(
-    fn=summarize_text,
-    inputs=gr.Textbox(label="Enter text for briefing"),
-    outputs=gr.Textbox(label="Briefing Document Summary"),
-    title="📝 Briefing Document"
-)
-# Combined Tabbed Interface
-combined = gr.TabbedInterface([iface, summarize_interface], ["Podcast Generator", "Briefing Document"])
-combined.launch()

+from groq import Groq
+from pydantic import BaseModel, ValidationError
+from typing import List, Literal
 import os
+import tiktoken
 import json
+import re
 import tempfile
 from gtts import gTTS
 from bs4 import BeautifulSoup
 import requests
+groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
 tokenizer = tiktoken.get_encoding("cl100k_base")
 class DialogueItem(BaseModel):
     speaker: Literal["Priya", "Ananya"]
     text: str
 class Dialogue(BaseModel):
     dialogue: List[DialogueItem]
 def truncate_text(text, max_tokens=2048):
     tokens = tokenizer.encode(text)
     if len(tokens) > max_tokens:
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
         for script in soup(["script", "style"]):
             script.decompose()
         text = soup.get_text()
         lines = (line.strip() for line in text.splitlines())
         chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        text = '\n'.join(chunk for chunk in chunks if chunk)
+        return text
     except Exception as e:
         raise ValueError(f"Error extracting text from URL: {str(e)}")
 def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
     input_text = truncate_text(input_text)
     word_limit = 300 if target_length == "Short (1-2 min)" else 750
     prompt = f"""
     {system_prompt}
     TONE: {tone}
     TARGET LENGTH: {target_length} (approximately {word_limit} words)
     INPUT TEXT: {input_text}
     Generate a complete, well-structured podcast script that:
+    1. Starts with a friendly, engaging introduction that feels natural, welcoming the listeners as if Priya and Ananya are speaking directly to them.
+    2. Covers the main points from the input text in a conversational, relaxed manner with smooth transitions. Priya (American accent) and Ananya (British accent) should engage in a back-and-forth conversation that feels authentic and lively, as if two people are having a real interaction.
+    3. Voice adjustments: Ensure that the flow of conversation is natural, with slight pauses for thought and clear enunciation, making it easy for all listeners to follow along. Keep the pace relaxed but steady, with slight variations in speed for emphasis on key points—ensuring clarity and ease of understanding.
+    4. Concludes with a smooth and heartfelt summary, wrapping up the discussion in a way that feels genuine and leaves listeners with a sense of closure, while thanking them for tuning in.
+    5. The overall voice speed and tone should match the conversation and topic, ensuring the dialogue is easy to comprehend. For more intense moments, you can use a slightly faster pace for energy, and for reflective points, use a slower, thoughtful pace.
+    6. Fits within the {word_limit} word limit for the target length of {target_length}.
+    7. Strongly emphasizes the {tone} tone throughout the conversation.
+    For a humorous tone, include jokes, puns, and playful banter, making the conversation feel light-hearted while integrating subtle cultural references and humor that listeners can relate to.
+    For a casual tone, use colloquial language and friendly expressions that make it feel like a relaxed, informal chat between friends. Include cultural references and inside jokes to keep the conversation fun.
+    For a formal tone, maintain a professional style with clear, structured arguments, presenting information with respect and authority, but still keeping the conversation friendly and accessible.
+    Ensure the script feels like a real, flowing podcast conversation without abrupt transitions or unnatural interruptions.
+"""
+    response = groq_client.chat.completions.create(
+        messages=[
+            {"role": "system", "content": prompt},
+        ],
+        model="llama-3.1-70b-versatile",
+        max_tokens=2048,
+        temperature=0.7
+    )
+    content = response.choices[0].message.content
+    content = re.sub(r'
+json\s*|\s*
+', '', content)
     try:
+        json_data = json.loads(content)
         dialogue = Dialogue.model_validate(json_data)
+    except json.JSONDecodeError as json_error:
+        match = re.search(r'\{.*\}', content, re.DOTALL)
+        if match:
+            try:
+                json_data = json.loads(match.group())
+                dialogue = Dialogue.model_validate(json_data)
+            except (json.JSONDecodeError, ValidationError) as e:
+                raise ValueError(f"Failed to parse dialogue JSON: {e}\nContent: {content}")
+        else:
+            raise ValueError(f"Failed to find valid JSON in the response: {content}")
+    except ValidationError as e:
+        raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {content}")
     return dialogue
     tts = gTTS(text=text, lang='en', tld=tld)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
         tts.save(temp_audio.name)
+        return temp_audio.name