Spaces:

siddhartharyaai
/

SearchPod1.0

Build error

App Files Files Community

siddhartharyaai commited on Feb 12, 2025

Commit

48c504d

verified ·

1 Parent(s): 1530c27

Upload 4 files

Browse files

Files changed (4) hide show

app.py +520 -0
prompts.py +58 -0
qa.py +88 -0
utils.py +641 -0

app.py ADDED Viewed

	@@ -0,0 +1,520 @@

+import streamlit as st
+import time
+import re
+import os
+import tempfile
+import pypdf
+from pydub import AudioSegment, effects
+import difflib
+#CORRECTED IMPORT
+from utils import (
+    generate_script,
+    generate_audio_mp3,
+    mix_with_bg_music,
+    DialogueItem,
+    run_research_agent,
+    generate_report
+)
+from prompts import SYSTEM_PROMPT
+from qa import transcribe_audio_deepgram, handle_qa_exchange
+MAX_QA_QUESTIONS = 5  # up to 5 voice/text questions
+def parse_user_edited_transcript(edited_text: str, host_name: str, guest_name: str):
+    pattern = r"\*\*(.+?)\*\*:\s*(.+)"
+    matches = re.findall(pattern, edited_text)
+    items = []
+    if not matches:
+        raw_name = host_name or "Jane"
+        text_line = edited_text.strip()
+        speaker = "Jane"
+        if raw_name.lower() == guest_name.lower():
+            speaker = "John"
+        item = DialogueItem(
+            speaker=speaker,
+            display_speaker=raw_name,
+            text=text_line
+        )
+        items.append(item)
+        return items
+    for (raw_name, text_line) in matches:
+        if raw_name.lower() == host_name.lower():
+            speaker = "Jane"
+        elif raw_name.lower() == guest_name.lower():
+            speaker = "John"
+        else:
+            speaker = "Jane"
+        item = DialogueItem(
+            speaker=speaker,
+            display_speaker=raw_name,
+            text=text_line
+        )
+        items.append(item)
+    return items
+def regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path=None):
+    audio_segments = []
+    transcript = ""
+    crossfade_duration = 50  # ms
+    for item in dialogue_items:
+        audio_file = generate_audio_mp3(item.text, item.speaker)
+        seg = AudioSegment.from_file(audio_file, format="mp3")
+        audio_segments.append(seg)
+        transcript += f"**{item.display_speaker}**: {item.text}\n\n"
+        os.remove(audio_file)
+    if not audio_segments:
+        return None, "No audio segments were generated."
+    combined_spoken = audio_segments[0]
+    for seg in audio_segments[1:]:
+        combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
+    final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
+        final_mix.export(temp_audio.name, format="mp3")
+        final_mp3_path = temp_audio.name
+    with open(final_mp3_path, "rb") as f:
+        audio_bytes = f.read()
+    os.remove(final_mp3_path)
+    return audio_bytes, transcript
+def generate_podcast(
+    research_topic_input,
+    tone,
+    length_minutes,
+    host_name,
+    host_desc,
+    guest_name,
+    guest_desc,
+    user_specs,
+    sponsor_content,
+    sponsor_style,
+    custom_bg_music_path
+):
+    if not research_topic_input:
+      return None, "Please enter a topic to research for the podcast."
+    text = st.session_state.get("report_content", "") # Get report content
+    if not text:
+        return None, "Please generate a research report first, or enter a topic."
+    extra_instructions = []
+    if host_name or guest_name:
+        host_line = f"Host: {host_name or 'Jane'} - {host_desc or 'a curious host'}."
+        guest_line = f"Guest: {guest_name or 'John'} - {guest_desc or 'an expert'}."
+        extra_instructions.append(f"{host_line}\n{guest_line}")
+    if user_specs.strip():
+        extra_instructions.append(f"Additional User Instructions: {user_specs}")
+    if sponsor_content.strip():
+        extra_instructions.append(
+            f"Sponsor Content Provided (should be under ~30 seconds):\n{sponsor_content}"
+        )
+    combined_instructions = "\n\n".join(extra_instructions).strip()
+    full_prompt = SYSTEM_PROMPT
+    if combined_instructions:
+        full_prompt += f"\n\n# Additional Instructions\n{combined_instructions}\n"
+    # Add language-specific instructions
+    if st.session_state.get("language_selection") == "Hinglish":
+        full_prompt += "\n\nPlease generate the script in Romanized Hindi.\n"
+    # Add similar instruction here for Hindi
+    try:
+        script = generate_script(
+            full_prompt,
+            text,
+            tone,
+            f"{length_minutes} Mins",
+            host_name=host_name or "Jane",
+            guest_name=guest_name or "John",
+            sponsor_style=sponsor_style,
+            sponsor_provided=bool(sponsor_content.strip())
+        )
+        # If language is Hinglish, transliterate script dialogues to IAST
+        if st.session_state.get("language_selection") == "Hinglish":
+            from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
+            for dialogue_item in script.dialogue:
+                dialogue_item.text = transliterate(dialogue_item.text, DEVANAGARI, IAST)
+    except Exception as e:
+        return None, f"Error generating script: {str(e)}"
+    audio_segments = []
+    transcript = ""
+    crossfade_duration = 50
+    try:
+        for item in script.dialogue:
+            language = st.session_state.get("language_selection", "English (American)")
+            if language in ["English (Indian)", "Hinglish", "Hindi"]:
+                tts_speaker = "John" if item.display_speaker.lower() == (guest_name or "John").lower() else "Jane"
+            else:
+                tts_speaker = item.speaker
+            audio_file = generate_audio_mp3(item.text, tts_speaker)
+            seg = AudioSegment.from_file(audio_file, format="mp3")
+            audio_segments.append(seg)
+            transcript += f"**{item.display_speaker}**: {item.text}\n\n"
+            os.remove(audio_file)
+        if not audio_segments:
+            return None, "No audio segments generated."
+        combined_spoken = audio_segments[0]
+        for seg in audio_segments[1:]:
+            combined_spoken = combined_spoken.append(seg, crossfade=crossfade_duration)
+        final_mix = mix_with_bg_music(combined_spoken, custom_bg_music_path)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
+            final_mix.export(temp_audio.name, format="mp3")
+            final_mp3_path = temp_audio.name
+        with open(final_mp3_path, "rb") as f:
+            audio_bytes = f.read()
+        os.remove(final_mp3_path)
+        return audio_bytes, transcript
+    except Exception as e:
+        return None, f"Error generating audio: {str(e)}"
+def highlight_differences(original: str, edited: str) -> str:
+    matcher = difflib.SequenceMatcher(None, original.split(), edited.split())
+    highlighted = []
+    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
+        if opcode == 'equal':
+            highlighted.extend(original.split()[i1:i2])
+        elif opcode in ('replace', 'insert'):
+            added_words = edited.split()[j1:j2]
+            highlighted.extend([f'<span style="color:red">{word}</span>' for word in added_words])
+        elif opcode == 'delete':
+            pass
+    return ' '.join(highlighted)
+def main():
+    st.set_page_config(
+        page_title="MyPod v3: AI-Powered Podcast & Research",
+        layout="centered"
+    )
+    st.markdown("""
+    <style>
+    .stFileUploader>div>div>div {
+        transform: scale(0.9);
+    }
+    footer {
+        text-align: center;
+        padding: 1em 0;
+        font-size: 0.8em;
+        color: #888;
+    }
+    </style>
+    """, unsafe_allow_html=True)
+    logo_col, title_col = st.columns([1, 10])
+    with logo_col:
+        st.image("logomypod.jpg", width=70)
+    with title_col:
+        st.markdown("## MyPod v3: AI-Powered Podcast & Research")
+    st.markdown("""
+    Welcome to **MyPod**, your go-to AI-powered podcast generator and research report tool! 🎉
+    MyPod now offers two main functionalities:
+    1.  **Generate Research Reports:**  Provide a research topic, and MyPod will use its AI-powered research agent to create a comprehensive, well-structured research report in PDF format.
+    2.  **Generate Podcasts:**  Transform your research topic (or the generated report) into an engaging, human-sounding podcast.
+    Select your desired mode below and let the magic happen!
+    """)
+    with st.expander("How to Use"):
+        st.markdown("""
+        **For Research Reports:**
+        <ol style="font-size:18px;">
+        <li>Select "Generate Research Report".</li>
+        <li>Enter your research topic.</li>
+        <li>Click 'Generate Report'.</li>
+        <li>MyPod will use its AI agent to research the topic and create a PDF report.</li>
+        <li>Once generated, you can view and download the report.</li>
+        </ol>
+        **For Podcasts:**
+        <ol style="font-size:18px;">
+        <li>Select "Generate Podcast".</li>
+        <li>Enter the research topic (this will be used as the basis for the podcast). OR FIRST GENERATE A REPORT AND THEN SELECT PODCAST.</li>
+        <li>Choose the tone, language, and target duration.</li>
+        <li>Add custom names and descriptions for the speakers (optional).</li>
+        <li>Add sponsored content (optional).</li>
+        <li>Click 'Generate Podcast'.</li>
+        </ol>
+        """, unsafe_allow_html=True)
+    # --- Main Mode Selection ---
+    mode = st.radio("Choose a Mode:", ["Generate Research Report", "Generate Podcast"])
+    # --- Research Report Section ---
+    if mode == "Generate Research Report":
+        st.markdown("### Generate Research Report")
+        research_topic_input = st.text_input("Enter your research topic:")
+        report_button = st.button("Generate Report")
+        if report_button:
+            if not research_topic_input:
+                st.error("Please enter a research topic.")
+            else:
+                with st.spinner("Researching and generating report... This may take several minutes."):
+                    try:
+                        report_content = run_research_agent(research_topic_input)
+                        st.session_state["report_content"] = report_content
+                        # Display report (basic text for now)
+                        st.markdown("### Generated Report Preview")
+                        st.text_area("Report Content", value=report_content, height=300)
+                        # Generate PDF and offer download
+                        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
+                            pdf_path = tmpfile.name
+                            generate_report(report_content, filename=pdf_path)  # Generate PDF
+                        with open(pdf_path, "rb") as f:
+                            pdf_bytes = f.read()
+                        os.remove(pdf_path)  # Clean up temp file
+                        st.download_button(
+                            label="Download Report (PDF)",
+                            data=pdf_bytes,
+                            file_name=f"{research_topic_input}_report.pdf",
+                            mime="application/pdf"
+                        )
+                        st.success("Report generated successfully!")
+                    except Exception as e:
+                        st.error(f"An error occurred: {e}")
+    # --- Podcast Generation Section ---
+    elif mode == "Generate Podcast":
+        st.markdown("### Generate Podcast")
+        research_topic_input = st.text_input("Enter research topic for the podcast (or use a generated report):")
+        tone = st.radio("Tone", ["Casual", "Formal", "Humorous", "Youthful"], index=0)
+        length_minutes = st.slider("Podcast Length (in minutes)", 1, 60, 3)
+        language = st.selectbox(
+            "Choose Language and Accent",
+            ["English (American)", "English (Indian)", "Hinglish", "Hindi"],
+            index=0
+        )
+        st.session_state["language_selection"] = language
+        st.markdown("### Customize Your Podcast (Optional)")
+        with st.expander("Set Host & Guest Names/Descriptions (Optional)"):
+            host_name = st.text_input("Female Host Name (leave blank for 'Jane')")
+            host_desc = st.text_input("Female Host Description (Optional)")
+            guest_name = st.text_input("Male Guest Name (leave blank for 'John')")
+            guest_desc = st.text_input("Male Guest Description (Optional)")
+        user_specs = st.text_area("Any special instructions or prompts for the script? (Optional)", "")
+        sponsor_content = st.text_area("Sponsored Content / Ad (Optional)", "")
+        sponsor_style = st.selectbox("Sponsor Integration Style", ["Separate Break", "Blended"])
+        custom_bg_music_file = st.file_uploader("Upload Custom Background Music (Optional)", type=["mp3", "wav"])
+        custom_bg_music_path = None
+        if custom_bg_music_file:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(custom_bg_music_file.name)[1]) as tmp:
+                tmp.write(custom_bg_music_file.read())
+                custom_bg_music_path = tmp.name
+        if "audio_bytes" not in st.session_state:
+            st.session_state["audio_bytes"] = None
+        if "transcript" not in st.session_state:
+            st.session_state["transcript"] = None
+        if "transcript_original" not in st.session_state:
+            st.session_state["transcript_original"] = None
+        if "qa_count" not in st.session_state:
+            st.session_state["qa_count"] = 0
+        if "conversation_history" not in st.session_state:
+            st.session_state["conversation_history"] = ""
+        generate_button = st.button("Generate Podcast")
+        if generate_button:
+            progress_bar = st.progress(0)
+            progress_text = st.empty()
+            progress_messages = [
+                "🔍 Analyzing your input...",
+                "📝 Crafting the perfect script...",
+                "🎙️ Generating high-quality audio...",
+                "🎶 Adding the finishing touches..."
+            ]
+            progress_text.write(progress_messages[0])
+            progress_bar.progress(0)
+            time.sleep(1.0)
+            progress_text.write(progress_messages[1])
+            progress_bar.progress(25)
+            time.sleep(1.0)
+            progress_text.write(progress_messages[2])
+            progress_bar.progress(50)
+            time.sleep(1.0)
+            progress_text.write(progress_messages[3])
+            progress_bar.progress(75)
+            time.sleep(1.0)
+            audio_bytes, transcript = generate_podcast(
+                research_topic_input,
+                tone,
+                length_minutes,
+                host_name,
+                host_desc,
+                guest_name,
+                guest_desc,
+                user_specs,
+                sponsor_content,
+                sponsor_style,
+                custom_bg_music_path
+            )
+            progress_bar.progress(100)
+            progress_text.write("✅ Done!")
+            if audio_bytes is None:
+                st.error(transcript)
+                st.session_state["audio_bytes"] = None
+                st.session_state["transcript"] = None
+                st.session_state["transcript_original"] = None
+            else:
+                st.success("Podcast generated successfully!")
+                st.session_state["audio_bytes"] = audio_bytes
+                st.session_state["transcript"] = transcript
+                st.session_state["transcript_original"] = transcript
+                st.session_state["qa_count"] = 0
+                st.session_state["conversation_history"] = ""
+        if st.session_state.get("audio_bytes"):
+            st.audio(st.session_state["audio_bytes"], format='audio/mp3')
+            st.download_button(
+                label="Download Podcast (MP3)",
+                data=st.session_state["audio_bytes"],
+                file_name="my_podcast.mp3",
+                mime="audio/mpeg"
+            )
+            st.markdown("### Generated Transcript (Editable)")
+            edited_text = st.text_area(
+                "Feel free to tweak lines, fix errors, or reword anything.",
+                value=st.session_state["transcript"],
+                height=300
+            )
+            if st.session_state.get("transcript_original"):
+                highlighted_transcript = highlight_differences(
+                    st.session_state["transcript_original"],
+                    edited_text
+                )
+                st.markdown("### **Edited Transcript Highlights**", unsafe_allow_html=True)
+                st.markdown(highlighted_transcript, unsafe_allow_html=True)
+            if st.button("Regenerate Audio From Edited Text"):
+                regen_bar = st.progress(0)
+                regen_text = st.empty()
+                regen_text.write("🔄 Regenerating your podcast with the edits...")
+                regen_bar.progress(25)
+                time.sleep(1.0)
+                regen_text.write("🔧 Adjusting the script based on your changes...")
+                regen_bar.progress(50)
+                time.sleep(1.0)
+                dialogue_items = parse_user_edited_transcript(
+                    edited_text,
+                    host_name or "Jane",
+                    guest_name or "John"
+                )
+                new_audio_bytes, new_transcript = regenerate_audio_from_dialogue(dialogue_items, custom_bg_music_path)
+                regen_bar.progress(75)
+                time.sleep(1.0)
+                if new_audio_bytes is None:
+                    regen_bar.progress(100)
+                    st.error(new_transcript)
+                else:
+                    regen_bar.progress(100)
+                    regen_text.write("✅ Regeneration complete!")
+                    st.success("Regenerated audio below:")
+                    st.session_state["audio_bytes"] = new_audio_bytes
+                    st.session_state["transcript"] = new_transcript
+                    st.session_state["transcript_original"] = new_transcript
+                    st.audio(new_audio_bytes, format='audio/mp3')
+                    st.download_button(
+                        label="Download Edited Podcast (MP3)",
+                        data=new_audio_bytes,
+                        file_name="my_podcast_edited.mp3",
+                        mime="audio/mpeg"
+                    )
+                    st.markdown("### Updated Transcript")
+                    st.markdown(new_transcript)
+            st.markdown("## Post-Podcast Q&A")
+            used_questions = st.session_state.get("qa_count", 0)
+            remaining = MAX_QA_QUESTIONS - used_questions
+            if remaining > 0:
+                st.write(f"You can ask up to {remaining} more question(s).")
+                typed_q = st.text_input("Type your follow-up question:")
+                audio_q = st.audio_input("Or record an audio question (WAV)")
+                if st.button("Submit Q&A"):
+                    if used_questions >= MAX_QA_QUESTIONS:
+                        st.warning("You have reached the Q&A limit.")
+                    else:
+                        question_text = typed_q.strip()
+                        if audio_q is not None:
+                            suffix = ".wav"
+                            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+                                tmp.write(audio_q.read())
+                                local_audio_path = tmp.name
+                            st.write("Transcribing your audio question...")
+                            audio_transcript = transcribe_audio_deepgram(local_audio_path)
+                            if audio_transcript:
+                                question_text = audio_transcript
+                        if not question_text:
+                            st.warning("No question found (text or audio).")
+                        else:
+                            st.write("Generating an answer...")
+                            ans_audio, ans_text = handle_qa_exchange(question_text)
+                            if ans_audio:
+                                st.audio(ans_audio, format='audio/mp3')
+                                st.markdown(f"**John**: {ans_text}")
+                                st.session_state["qa_count"] = used_questions + 1
+                            else:
+                                st.warning("No response could be generated.")
+            else:
+                st.write("You have used all 5 Q&A opportunities.")
+    st.markdown("<footer>©2025 MyPod. All rights reserved.</footer>", unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

prompts.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# prompts.py
+SYSTEM_PROMPT = (
+    "You are a skilled podcast producer tasked with transforming unstructured or messy input text into an engaging "
+    "and informative podcast script. Your goal is to extract the most interesting and insightful content for a "
+    "compelling podcast discussion. Critically, you must incorporate both established background information (e.g., "
+    "from an LLM knowledge base or Wikipedia) AND you must include any new or breaking news items found through RSS "
+    "feeds or other sources.\n\n"
+    "Steps to Follow:\n"
+    "1. **Analyze the Input:** Carefully examine the text, identifying key topics, points, recent developments, and "
+    "interesting facts or anecdotes that could drive an engaging podcast conversation. Disregard irrelevant or "
+    "duplicate information.\n"
+    "2. **Brainstorm Ideas:** Consider creative ways to present the key points in a lively, entertaining manner, "
+    "incorporating the latest news or any recently discovered updates.\n"
+    "3. **Craft the Dialogue:**\n"
+    "   - **Warm Opening**: Have Jane (the host) welcome listeners, introduce the podcast name, and greet the guest. "
+    "     Provide some quick background on John’s expertise or credentials.\n"
+    "   - **Main Discussion**: Discuss the key points thoroughly, including new/breaking news items or any fresh "
+    "     details from the topic’s latest developments. Jane asks thoughtful questions; John responds with "
+    "     well-substantiated facts and relevant news. Be sure to highlight if there are significant changes, such "
+    "     as a resignation or other major events.\n"
+    "   - **Pleasant Conclusion**: End the episode in a friendly way, with Jane wrapping up and thanking the audience, "
+    "     possibly directing them to future updates if the topic is ongoing.\n\n"
+    "**Rules for the Dialogue:**\n"
+    "- Jane always initiates the conversation and interviews John.\n"
+    "- Include thoughtful questions from Jane to guide the discussion.\n"
+    "- Incorporate natural speech patterns, including occasional verbal fillers (e.g., 'um,' 'well,' 'you know').\n"
+    "- Allow for natural interruptions and back-and-forth between Jane and John.\n"
+    "- If any new or updated info is found (e.g., a resignation), it must be mentioned and integrated into the flow.\n"
+    "- Ensure John's responses are on-topic and substantiated by the input text and any newly discovered or breaking "
+    "  news.\n"
+    "- Maintain a PG-rated conversation appropriate for all audiences.\n"
+    "- Avoid any marketing or self-promotional content from John.\n"
+    "- Jane concludes the conversation in a pleasant manner, possibly teasing future updates if the topic is still "
+    "  evolving.\n\n"
+    "**Stylistic Guidelines for Natural Dialogue:**\n"
+    "- The dialogue should sound natural and conversational between Jane and John.\n"
+    "- Use a mix of short, punchy sentences along with longer, reflective sentences to create a dynamic rhythm.\n"
+    "- Include natural pauses and breaks to mimic human speech, using ellipses (...) or sentence fragments where "
+    "  appropriate.\n"
+    "- Vary sentence structures to avoid monotony; mix questions, statements, and exclamations.\n"
+    "- Inject humor or light-hearted comments to enhance relatability and keep the tone friendly.\n"
+    "- Predominantly use active voice to create a direct and engaging conversation.\n"
+    "- Add emotional inflections reflecting excitement, curiosity, or contemplation as needed.\n"
+    "- Occasionally include filler words like 'um' or 'you know' to enhance authenticity, but avoid overuse.\n"
+    "- Ensure Jane and John occasionally acknowledge each other with phrases like 'That's a great point!' or "
+    "  'I totally agree!' to simulate a real conversation.\n\n"
+    "The goal is to create an audio output that feels lively, relatable, and easy for listeners to follow.\n\n"
+    "# Additional Instruction for Interjections / Interruptions\n"
+    "Please include occasional, short interruptions or interjections where Jane or John might briefly cut in on "
+    "the other’s sentence (without overlapping audio). For example, they might say, 'Wait, wait...' or 'Hold on...' "
+    "to jump in, and then politely yield so the conversation remains understandable in sequence.\n"
+)

qa.py ADDED Viewed

	@@ -0,0 +1,88 @@

+# qa.py
+import os
+import requests
+import json
+import tempfile
+import streamlit as st
+from utils import generate_audio_mp3  # Reuse your existing TTS function
+def transcribe_audio_deepgram(local_audio_path: str) -> str:
+    """
+    Sends a local audio file to Deepgram for STT.
+    Returns the transcript text if successful, or raises an error if failed.
+    """
+    DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
+    if not DEEPGRAM_API_KEY:
+        raise ValueError("Deepgram API key not found in environment variables.")
+    url = "https://api.deepgram.com/v1/listen?model=nova-2&smart_format=true"
+    headers = {
+        "Authorization": f"Token {DEEPGRAM_API_KEY}",
+        "Content-Type": "audio/wav"
+    }
+    with open(local_audio_path, "rb") as f:
+        response = requests.post(url, headers=headers, data=f)
+    response.raise_for_status()
+    data = response.json()
+    # Extract the transcript
+    transcript = data["results"]["channels"][0]["alternatives"][0].get("transcript", "")
+    return transcript
+def call_llm_for_qa(conversation_so_far: str, user_question: str) -> dict:
+    """
+    Calls Groq LLM to answer a follow-up question.
+    Returns a Python dict: {"speaker": "John", "text": "..."}
+    """
+    system_prompt = f"""
+    You are John, the guest speaker. The user is asking a follow-up question.
+    Conversation so far:
+    {conversation_so_far}
+    New user question:
+    {user_question}
+    Please respond in JSON with keys "speaker" and "text", e.g.:
+    {{ "speaker": "John", "text": "Sure, here's my answer..." }}
+    """
+    from utils import call_groq_api_for_qa  # Import from utils
+    raw_json_response = call_groq_api_for_qa(system_prompt)  # Corrected call
+    # Expect a JSON string: {"speaker": "John", "text": "some short answer"}
+    response_dict = json.loads(raw_json_response)
+    return response_dict
+def handle_qa_exchange(user_question: str) -> (bytes, str):
+    """
+    1) Read conversation_so_far from session_state
+    2) Call the LLM for a short follow-up answer
+    3) Generate TTS audio
+    4) Return (audio_bytes, answer_text)
+    """
+    conversation_so_far = st.session_state.get("conversation_history", "")
+    # Ask the LLM
+    response_dict = call_llm_for_qa(conversation_so_far, user_question)
+    answer_text = response_dict.get("text", "")
+    speaker = response_dict.get("speaker", "John")
+    # Update conversation
+    new_history = conversation_so_far + f"\nUser: {user_question}\n{speaker}: {answer_text}\n"
+    st.session_state["conversation_history"] = new_history
+    if not answer_text.strip():
+        return (None, "")
+    # TTS
+    audio_file_path = generate_audio_mp3(answer_text, "John")  # always John
+    with open(audio_file_path, "rb") as f:
+        audio_bytes = f.read()
+    os.remove(audio_file_path)
+    return (audio_bytes, answer_text)

utils.py ADDED Viewed

	@@ -0,0 +1,641 @@

+import os
+import re
+import json
+import requests
+import tempfile
+import random
+import numpy as np
+import torch
+import time
+from bs4 import BeautifulSoup
+from typing import List, Literal, Optional
+from pydantic import BaseModel
+from pydub import AudioSegment, effects
+from transformers import pipeline
+import tiktoken
+from groq import Groq
+import streamlit as st  # If you use Streamlit for session state
+from report_structure import generate_report  # Your PDF generator
+from tavily import TavilyClient  # For search
+###############################################################################
+#                                DATA MODELS
+###############################################################################
+class DialogueItem(BaseModel):
+    speaker: Literal["Jane", "John"]
+    display_speaker: str = "Jane"
+    text: str
+class Dialogue(BaseModel):
+    dialogue: List[DialogueItem]
+###############################################################################
+#                            HYBRID RATE-LIMIT HANDLER
+###############################################################################
+def call_llm_with_retry(groq_client, **payload):
+    """
+    Wraps groq_client.chat.completions.create(**payload) in a retry loop
+    to catch 429 rate-limit errors. If we see “try again in XXs,” we parse
+    that wait time, sleep, then retry. We also do a short sleep (0.3s)
+    after each successful call to spread usage.
+    """
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            print(f"[DEBUG] call_llm_with_retry attempt {attempt+1}")
+            response = groq_client.chat.completions.create(**payload)
+            # Short sleep to avoid bursting usage
+            time.sleep(0.3)
+            print("[DEBUG] LLM call succeeded, returning response.")
+            return response
+        except Exception as e:
+            err_str = str(e).lower()
+            print(f"[WARN] call_llm_with_retry attempt {attempt+1} failed: {e}")
+            if "rate_limit_exceeded" in err_str or "try again in" in err_str:
+                # parse recommended wait time
+                wait_time = 60.0
+                match = re.search(r'try again in (\d+(?:\.\d+)?)s', str(e), re.IGNORECASE)
+                if match:
+                    wait_time = float(match.group(1)) + 1.0
+                print(f"[WARN] Rate limited. Sleeping for {wait_time:.1f}s, then retrying.")
+                time.sleep(wait_time)
+            else:
+                raise
+    raise RuntimeError("Exceeded max_retries due to repeated rate limit or other errors.")
+###############################################################################
+#                                TRUNCATION
+###############################################################################
+def truncate_text_tokens(text: str, max_tokens: int) -> str:
+    """
+    Truncates 'text' to 'max_tokens' tokens. Used for controlling maximum
+    total text size after scraping.
+    """
+    tokenizer = tiktoken.get_encoding("cl100k_base")
+    tokens = tokenizer.encode(text)
+    if len(tokens) > max_tokens:
+        truncated = tokenizer.decode(tokens[:max_tokens])
+        print(f"[DEBUG] Truncating from {len(tokens)} tokens to {max_tokens} tokens.")
+        return truncated
+    return text
+def truncate_text_for_llm(text: str, max_tokens: int = 1024) -> str:
+    """
+    Typical truncation for partial merges or final calls.
+    """
+    tokenizer = tiktoken.get_encoding("cl100k_base")
+    tokens = tokenizer.encode(text)
+    if len(tokens) > max_tokens:
+        truncated = tokenizer.decode(tokens[:max_tokens])
+        print(f"[DEBUG] Truncating text from {len(tokens)} to {max_tokens} tokens for LLM.")
+        return truncated
+    return text
+###############################################################################
+#                         PITCH SHIFT (Optional)
+###############################################################################
+def pitch_shift(audio: AudioSegment, semitones: int) -> AudioSegment:
+    print(f"[LOG] Shifting pitch by {semitones} semitones.")
+    new_sample_rate = int(audio.frame_rate * (2.0 ** (semitones / 12.0)))
+    shifted_audio = audio._spawn(audio.raw_data, overrides={'frame_rate': new_sample_rate})
+    return shifted_audio.set_frame_rate(audio.frame_rate)
+###############################################################################
+#                      PODCAST SCRIPT GENERATION (Single Call)
+###############################################################################
+def generate_script(
+    system_prompt: str,
+    input_text: str,
+    tone: str,
+    target_length: str,
+    host_name: str = "Jane",
+    guest_name: str = "John",
+    sponsor_style: str = "Separate Break",
+    sponsor_provided=None
+):
+    """
+    If you do a single call to generate the entire script.
+    Uses DEEPSEEK_R1. Just ensure you parse the JSON.
+    """
+    print("[LOG] Generating script with tone:", tone, "and length:", target_length)
+    language_selection = st.session_state.get("language_selection", "English (American)")
+    if (host_name == "Jane" or not host_name) and language_selection in ["English (Indian)", "Hinglish", "Hindi"]:
+        host_name = "Isha"
+    if (guest_name == "John" or not guest_name) and language_selection in ["English (Indian)", "Hinglish", "Hindi"]:
+        guest_name = "Aarav"
+    words_per_minute = 150
+    numeric_minutes = 3
+    match = re.search(r"(\d+)", target_length)
+    if match:
+        numeric_minutes = int(match.group(1))
+    min_words = max(50, numeric_minutes * 100)
+    max_words = numeric_minutes * words_per_minute
+    tone_map = {
+        "Humorous": "funny and exciting, makes people chuckle",
+        "Formal": "business-like, well-structured, professional",
+        "Casual": "like a conversation between close friends, relaxed and informal",
+        "Youthful": "like how teenagers might chat, energetic and lively"
+    }
+    chosen_tone = tone_map.get(tone, "casual")
+    if sponsor_provided:
+        if sponsor_style == "Separate Break":
+            sponsor_instructions = (
+                "If sponsor content is provided, include it in a separate ad break (~30 seconds). "
+                "Use 'Now a word from our sponsor...' and end with 'Back to the show', etc."
+            )
+        else:
+            sponsor_instructions = (
+                "If sponsor content is provided, blend it naturally (~30 seconds) into conversation. "
+                "Avoid abrupt transitions."
+            )
+    else:
+        sponsor_instructions = ""
+    prompt = (
+        f"{system_prompt}\n"
+        f"TONE: {chosen_tone}\n"
+        f"TARGET LENGTH: {target_length} (~{min_words}-{max_words} words)\n"
+        f"INPUT TEXT: {input_text}\n\n"
+        f"# Sponsor Style Instruction:\n{sponsor_instructions}\n\n"
+        "Please provide the output in the following JSON format without any extra text:\n"
+        "{\n"
+        '   "dialogue": [\n'
+        '     { "speaker": "Jane", "text": "..." },\n'
+        '     { "speaker": "John", "text": "..." }\n'
+        "   ]\n"
+        "}"
+    )
+    if language_selection == "Hinglish":
+        prompt += "\n\nPlease generate the script in Romanized Hindi.\n"
+    elif language_selection == "Hindi":
+        prompt += "\n\nPlease generate the script exclusively in Hindi.\n"
+    print("[LOG] Sending script generation prompt to LLM.")
+    try:
+        headers = {
+            "Authorization": f"Bearer {os.environ.get('DEEPSEEK_API_KEY')}",
+            "Content-Type": "application/json"
+        }
+        data = {
+            "model": "deepseek/deepseek-r1",
+            "messages": [{"role": "user", "content": prompt}],
+            "max_tokens": 2048,
+            "temperature": 0.7
+        }
+        resp = requests.post("https://openrouter.ai/api/v1/chat/completions",
+                             headers=headers, data=json.dumps(data))
+        resp.raise_for_status()
+        raw_content = resp.json()["choices"][0]["message"]["content"].strip()
+    except Exception as e:
+        print("[ERROR] LLM error generating script:", e)
+        raise ValueError(f"Error generating script: {str(e)}")
+    start_idx = raw_content.find("{")
+    end_idx = raw_content.rfind("}")
+    if start_idx == -1 or end_idx == -1:
+        raise ValueError("No JSON found in LLM response for script generation.")
+    json_str = raw_content[start_idx:end_idx+1]
+    try:
+        data_js = json.loads(json_str)
+        dialogue_list = data_js.get("dialogue", [])
+        # Adjust speaker names if they match
+        for d in dialogue_list:
+            raw_speaker = d.get("speaker", "Jane")
+            if raw_speaker.lower() == host_name.lower():
+                d["speaker"] = "Jane"
+                d["display_speaker"] = host_name
+            elif raw_speaker.lower() == guest_name.lower():
+                d["speaker"] = "John"
+                d["display_speaker"] = guest_name
+            else:
+                d["speaker"] = "Jane"
+                d["display_speaker"] = raw_speaker
+        new_dialogue_items = []
+        for d in dialogue_list:
+            if "display_speaker" not in d:
+                d["display_speaker"] = d["speaker"]
+            new_dialogue_items.append(DialogueItem(**d))
+        return Dialogue(dialogue=new_dialogue_items)
+    except json.JSONDecodeError as e:
+        print("[ERROR] JSON decoding failed for script generation:", e)
+        raise ValueError(f"Script parse error: {str(e)}")
+    except Exception as e:
+        print("[ERROR] Unknown error parsing script JSON:", e)
+        raise ValueError(f"Script parse error: {str(e)}")
+###############################################################################
+#                      YOUTUBE TRANSCRIPTION (RAPIDAPI)
+###############################################################################
+def transcribe_youtube_video(video_url: str) -> str:
+    print("[LOG] Transcribing YouTube video:", video_url)
+    match = re.search(r"(?:v=|/)([0-9A-Za-z_-]{11})", video_url)
+    if not match:
+        raise ValueError(f"Invalid YouTube URL: {video_url}, cannot extract video ID.")
+    video_id = match.group(1)
+    print("[LOG] Extracted video ID:", video_id)
+    base_url = "https://youtube-transcriptor.p.rapidapi.com/transcript"
+    params = {"video_id": video_id, "lang": "en"}
+    headers = {
+        "x-rapidapi-host": "youtube-transcriptor.p.rapidapi.com",
+        "x-rapidapi-key": os.environ.get("RAPIDAPI_KEY")
+    }
+    try:
+        resp = requests.get(base_url, headers=headers, params=params, timeout=30)
+        resp.raise_for_status()
+        data = resp.json()
+        if not isinstance(data, list) or not data:
+            raise ValueError(f"Unexpected transcript format or empty transcript: {data}")
+        transcript_as_text = data[0].get("transcriptionAsText", "").strip()
+        if not transcript_as_text:
+            raise ValueError("transcriptionAsText missing or empty in RapidAPI response.")
+        print("[LOG] Transcript retrieval successful. Sample:", transcript_as_text[:200], "...")
+        return transcript_as_text
+    except Exception as e:
+        print("[ERROR] YouTube transcription error:", e)
+        raise ValueError(f"Error transcribing YouTube video: {str(e)}")
+###############################################################################
+#                  AUDIO GENERATION (TTS) AND BG MUSIC MIX
+###############################################################################
+def _preprocess_text_for_tts(text: str, speaker: str) -> str:
+    text = re.sub(r"\bNo\.\b", "Number", text, flags=re.IGNORECASE)
+    text = re.sub(r"\b(?i)SaaS\b", "sass", text)
+    abbreviations_as_words = {"NASA", "NATO", "UNESCO"}
+    def insert_periods_for_abbrev(m):
+        abbr = m.group(0)
+        if abbr in abbreviations_as_words:
+            return abbr
+        return ".".join(list(abbr)) + "."
+    text = re.sub(r"\b([A-Z]{2,})\b", insert_periods_for_abbrev, text)
+    text = re.sub(r"\.\.", ".", text)
+    def remove_periods_for_tts(m):
+        return m.group().replace(".", " ").strip()
+    text = re.sub(r"[A-Z]\.[A-Z](?:\.[A-Z])*\.", remove_periods_for_tts, text)
+    text = re.sub(r"-", " ", text)
+    text = re.sub(r"\b(ha(ha)?|heh|lol)\b", "(* laughs *)", text, flags=re.IGNORECASE)
+    text = re.sub(r"\bsigh\b", "(* sighs *)", text, flags=re.IGNORECASE)
+    text = re.sub(r"\b(groan|moan)\b", "(* groans *)", text, flags=re.IGNORECASE)
+    if speaker != "Jane":
+        def insert_thinking_pause(m):
+            wd = m.group(1)
+            if random.random() < 0.3:
+                filler = random.choice(["hmm,", "well,", "let me see,"])
+                return f"{wd}..., {filler}"
+            else:
+                return f"{wd}...,"
+        keywords_pattern = r"\b(important|significant|crucial|point|topic)\b"
+        text = re.sub(keywords_pattern, insert_thinking_pause, text, flags=re.IGNORECASE)
+        conj_pattern = r"\b(and|but|so|because|however)\b"
+        text = re.sub(conj_pattern, lambda m: f"{m.group()}...", text, flags=re.IGNORECASE)
+    text = re.sub(r"\b(uh|um|ah)\b", "", text, flags=re.IGNORECASE)
+    def capitalize_after_sentence(m):
+        return m.group().upper()
+    text = re.sub(r'(^\s*\w)|([.!?]\s*\w)', capitalize_after_sentence, text)
+    return text.strip()
+def generate_audio_mp3(text: str, speaker: str) -> str:
+    """
+    Uses Deepgram (English) or Murf (Indian/Hinglish/Hindi) for TTS.
+    """
+    print(f"[LOG] Generating TTS for speaker={speaker}")
+    language_selection = st.session_state.get("language_selection", "English (American)")
+    try:
+        if language_selection == "English (American)":
+            print("[LOG] Using Deepgram for American English TTS.")
+            processed_text = text if speaker in ["Jane", "John"] else _preprocess_text_for_tts(text, speaker)
+            deepgram_api_url = "https://api.deepgram.com/v1/speak"
+            params = {"model": "aura-asteria-en"} if speaker != "John" else {"model": "aura-zeus-en"}
+            headers = {
+                "Accept": "audio/mpeg",
+                "Content-Type": "application/json",
+                "Authorization": f"Token {os.environ.get('DEEPGRAM_API_KEY')}"
+            }
+            body = {"text": processed_text}
+            r = requests.post(deepgram_api_url, params=params, headers=headers, json=body, stream=True)
+            r.raise_for_status()
+            content_type = r.headers.get("Content-Type", "")
+            if "audio/mpeg" not in content_type:
+                raise ValueError("Unexpected content-type from Deepgram TTS.")
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as mp3_file:
+                for chunk in r.iter_content(chunk_size=8192):
+                    if chunk:
+                        mp3_file.write(chunk)
+                mp3_path = mp3_file.name
+            audio_seg = AudioSegment.from_file(mp3_path, format="mp3")
+            audio_seg = effects.normalize(audio_seg)
+            final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+            audio_seg.export(final_mp3_path, format="mp3")
+            if os.path.exists(mp3_path):
+                os.remove(mp3_path)
+            return final_mp3_path
+        else:
+            print("[LOG] Using Murf API for TTS. Language=", language_selection)
+            from indic_transliteration.sanscript import transliterate, DEVANAGARI, IAST
+            if language_selection == "Hinglish":
+                text = transliterate(text, DEVANAGARI, IAST)
+            api_key = os.environ.get("MURF_API_KEY")
+            headers = {
+                "Content-Type": "application/json",
+                "Accept": "application/json",
+                "api-key": api_key
+            }
+            multi_native_locale = "hi-IN" if language_selection in ["Hinglish", "Hindi"] else "en-IN"
+            if language_selection == "English (Indian)":
+                voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
+            elif language_selection in ["Hindi", "Hinglish"]:
+                voice_id = "hi-IN-kabir" if speaker == "John" else "hi-IN-shweta"
+            else:
+                voice_id = "en-IN-aarav" if speaker == "John" else "en-IN-isha"
+            payload = {
+                "audioDuration": 0,
+                "channelType": "MONO",
+                "encodeAsBase64": False,
+                "format": "WAV",
+                "modelVersion": "GEN2",
+                "multiNativeLocale": multi_native_locale,
+                "pitch": 0,
+                "pronunciationDictionary": {},
+                "rate": 0,
+                "sampleRate": 48000,
+                "style": "Conversational",
+                "text": text,
+                "variation": 1,
+                "voiceId": voice_id
+            }
+            r = requests.post("https://api.murf.ai/v1/speech/generate", headers=headers, json=payload)
+            r.raise_for_status()
+            j = r.json()
+            audio_url = j.get("audioFile")
+            if not audio_url:
+                raise ValueError("No audioFile URL from Murf API.")
+            audio_resp = requests.get(audio_url)
+            audio_resp.raise_for_status()
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file:
+                wav_file.write(audio_resp.content)
+                wav_path = wav_file.name
+            audio_seg = AudioSegment.from_file(wav_path, format="wav")
+            audio_seg = effects.normalize(audio_seg)
+            final_mp3_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
+            audio_seg.export(final_mp3_path, format="mp3")
+            os.remove(wav_path)
+            return final_mp3_path
+    except Exception as e:
+        print("[ERROR] TTS generation error:", e)
+        raise ValueError(f"Error generating TTS audio: {str(e)}")
+def mix_with_bg_music(spoken: AudioSegment, custom_music_path=None) -> AudioSegment:
+    """
+    Overlays 'spoken' with background music, offset by ~2s, volume lowered.
+    """
+    if custom_music_path:
+        music_path = custom_music_path
+    else:
+        music_path = "bg_music.mp3"
+    try:
+        bg_music = AudioSegment.from_file(music_path, format="mp3")
+    except Exception as e:
+        print("[ERROR] Failed to load background music:", e)
+        return spoken
+    bg_music = bg_music - 18.0
+    total_length_ms = len(spoken) + 2000
+    looped_music = AudioSegment.empty()
+    while len(looped_music) < total_length_ms:
+        looped_music += bg_music
+    looped_music = looped_music[:total_length_ms]
+    final_mix = looped_music.overlay(spoken, position=2000)
+    return final_mix
+###############################################################################
+#                     Q&A UTILITY (POST-PODCAST)
+###############################################################################
+def call_groq_api_for_qa(system_prompt: str) -> str:
+    """
+    Single-step Q&A for post-podcast. Usually short usage => minimal tokens.
+    """
+    try:
+        headers = {
+            "Authorization": f"Bearer {os.environ.get('GROQ_API_KEY')}",
+            "Content-Type": "application/json",
+            "Accept": "application/json"
+        }
+        data = {
+            "model": "deepseek-r1-distill-llama-70b",
+            "messages": [{"role": "user", "content": system_prompt}],
+            "max_tokens": 512,
+            "temperature": 0.7
+        }
+        r = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, data=json.dumps(data))
+        r.raise_for_status()
+        return r.json()["choices"][0]["message"]["content"].strip()
+    except Exception as e:
+        print("[ERROR] Groq QA error:", e)
+        fallback = {"speaker": "John", "text": "Sorry, I'm having trouble answering now."}
+        return json.dumps(fallback)
+###############################################################################
+#                    LOW-CALL RESEARCH AGENT (Minimizing LLM Calls)
+###############################################################################
+MODEL_SUMMARIZATION = "llama-3.1-8b-instant"
+MODEL_COMBINATION   = "deepseek-r1-distill-llama-70b"
+def run_research_agent(
+    topic: str,
+    report_type: str = "research_report",
+    max_results: int = 20
+) -> str:
+    """
+    Low-Call approach:
+      1) Tavily search (up to 20 URLs).
+      2) Firecrawl scrape => combined text
+      3) Truncate to 12k tokens total
+      4) Split => at most 2 x 6k chunks => Summarize each chunk once => summaries
+      5) Single final merge => final PDF
+      => 2 or 3 total LLM calls => drastically fewer calls => less chance of 429
+    Logs at each step for clarity.
+    """
+    print(f"[LOG] Starting LOW-CALL research agent for topic: {topic}")
+    try:
+        # Step 1: Tavily search
+        print("[LOG] Step 1: Searching with Tavily for relevant URLs (max_results=20).")
+        tavily_client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))
+        search_data = tavily_client.search(query=topic, max_results=max_results)
+        search_results = search_data.get("results", [])
+        print(f"[LOG] Tavily provided {len(search_results)} results. Proceeding to Step 2.")
+        if not search_results:
+            print("[LOG] No relevant search results found by Tavily.")
+            return "No relevant search results found."
+        references_list = [r["url"] for r in search_results if "url" in r]
+        # Step 2: Firecrawl scraping
+        print("[LOG] Step 2: Scraping each URL with Firecrawl.")
+        combined_content = ""
+        for result in search_results:
+            url = result["url"]
+            print(f"[LOG] Firecrawl scraping: {url}")
+            headers = {'Authorization': f'Bearer {os.environ.get("FIRECRAWL_API_KEY")}'}
+            payload = {"url": url, "formats": ["markdown"], "onlyMainContent": True}
+            try:
+                resp = requests.post("https://api.firecrawl.dev/v1/scrape", headers=headers, json=payload)
+                resp.raise_for_status()
+                data = resp.json()
+                if data.get("success") and "markdown" in data.get("data", {}):
+                    combined_content += data["data"]["markdown"] + "\n\n"
+                else:
+                    print(f"[WARNING] Firecrawl scrape failed or no markdown for {url}: {data.get('error')}")
+            except requests.RequestException as e:
+                print(f"[ERROR] Firecrawl error for {url}: {e}")
+                continue
+        if not combined_content:
+            print("[LOG] Could not retrieve content from any search results. Exiting.")
+            return "Could not retrieve content from any of the search results."
+        # Step 3: Truncate to 12k tokens total
+        print("[LOG] Step 3: Truncating combined text to 12,000 tokens if needed.")
+        combined_content = truncate_text_tokens(combined_content, max_tokens=12000)
+        # Step 4: At most 2 chunks => Summaries
+        print("[LOG] Step 4: Splitting text into up to 2 chunks (6,000 tokens each). Summarizing each chunk.")
+        tokenizer = tiktoken.get_encoding("cl100k_base")
+        tokens = tokenizer.encode(combined_content)
+        chunk_size = 6000
+        groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+        summaries = []
+        start = 0
+        chunk_index = 1
+        while start < len(tokens):
+            end = min(start + chunk_size, len(tokens))
+            chunk_text = tokenizer.decode(tokens[start:end])
+            print(f"[LOG] Summarizing chunk {chunk_index} with ~{len(tokens[start:end])} tokens.")
+            prompt = f"""
+You are a specialized summarization engine. Summarize the following text
+for a professional research report. Provide accurate details but do not
+include chain-of-thought or internal reasoning. Keep it concise, but
+include key data points and context:
+{chunk_text}
+"""
+            data = {
+                "model": MODEL_SUMMARIZATION,
+                "messages": [{"role": "user", "content": prompt}],
+                "temperature": 0.2,
+                "max_tokens": 768
+            }
+            response = call_llm_with_retry(groq_client, **data)
+            summary_text = response.choices[0].message.content.strip()
+            summaries.append(summary_text)
+            start = end
+            chunk_index += 1
+            # Because chunk_size=6000, only 2 chunks max
+            if chunk_index > 2:
+                break
+        # Step 5: Single final merge call
+        print("[LOG] Step 5: Doing one final merge of chunk summaries.")
+        references_text = "\n".join(f"- {url}" for url in references_list) if references_list else "None"
+        truncated_summaries = [truncate_text_for_llm(s, max_tokens=1000) for s in summaries]
+        merged_input = "\n\n".join(truncated_summaries)
+        final_prompt = f"""
+IMPORTANT: Do NOT include chain-of-thought or hidden planning.
+Produce a long, academic-style research paper with the following structure:
+- Title Page (concise descriptive title)
+- Table of Contents
+- Executive Summary
+- Introduction
+- Historical or Contextual Background
+- Multiple Thematic Sections (with subheadings)
+- Detailed Analysis (multi-paragraph sections)
+- Footnotes or inline citations referencing the URLs
+- Conclusion
+- References / Bibliography (list these URLs at the end)
+Requirements:
+- Minimal bullet points, prefer multi-paragraph
+- Each section at least 2-3 paragraphs
+- Aim for 1500+ words if possible
+- Under 6000 tokens total
+- Professional, academic tone
+Partial Summaries:
+{merged_input}
+References (URLs):
+{references_text}
+Now, merge these partial summaries into one thoroughly expanded research paper:
+"""
+        final_data = {
+            "model": MODEL_COMBINATION,
+            "messages": [{"role": "user", "content": final_prompt}],
+            "temperature": 0.3,
+            "max_tokens": 2048
+        }
+        final_response = call_llm_with_retry(groq_client, **final_data)
+        final_text = final_response.choices[0].message.content.strip()
+        # Step 6: PDF generation
+        print("[LOG] Step 6: Generating final PDF from the merged text.")
+        final_report = generate_report(final_text)
+        print("[LOG] Done! Returning PDF from run_research_agent (low-call).")
+        return final_report
+    except Exception as e:
+        print(f"[ERROR] Error in run_research_agent: {e}")
+        return f"Sorry, encountered an error: {str(e)}"