File size: 5,222 Bytes
68dff12
 
 
 
 
331f1ed
68dff12
 
198f8e5
68dff12
 
 
 
331f1ed
 
4863b2c
 
68dff12
198f8e5
68dff12
 
 
4863b2c
 
331f1ed
68dff12
 
 
198f8e5
 
331f1ed
198f8e5
331f1ed
 
198f8e5
 
 
331f1ed
 
 
68dff12
198f8e5
4863b2c
68dff12
198f8e5
4863b2c
 
198f8e5
331f1ed
 
 
4863b2c
41bab03
4863b2c
 
41bab03
68dff12
41bab03
331f1ed
4863b2c
41bab03
4863b2c
 
41bab03
198f8e5
 
68dff12
4863b2c
 
68dff12
 
41bab03
68dff12
198f8e5
4863b2c
 
 
41bab03
4863b2c
198f8e5
68dff12
198f8e5
41bab03
198f8e5
4863b2c
 
198f8e5
41bab03
4863b2c
198f8e5
4863b2c
 
331f1ed
 
4863b2c
 
 
 
 
 
 
 
 
331f1ed
41bab03
4863b2c
 
331f1ed
41bab03
 
198f8e5
4863b2c
331f1ed
198f8e5
331f1ed
41bab03
68dff12
4863b2c
331f1ed
198f8e5
41bab03
198f8e5
68dff12
198f8e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41bab03
198f8e5
 
4863b2c
41bab03
198f8e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41bab03
4863b2c
198f8e5
 
 
41bab03
198f8e5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import gradio as gr
import google.generativeai as genai
from elevenlabs.client import ElevenLabs
import os
import json
from pypdf import PdfReader
from dotenv import load_dotenv

# 1️⃣ Load API Keys
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
ELEVEN_API_KEY = os.getenv("ELEVEN_API_KEY")

if GEMINI_API_KEY:
    genai.configure(api_key=GEMINI_API_KEY)
if ELEVEN_API_KEY:
    client = ElevenLabs(api_key=ELEVEN_API_KEY)

# 2️⃣ Podcast State
class PodcastState:
    def __init__(self):
        self.script = []
        self.current_index = 0
        self.persona = "Serious Academic"
        self.full_text = ""

state = PodcastState()

# 3️⃣ PDF Extraction
def extract_text_from_pdf(pdf_file):
    try:
        reader = PdfReader(pdf_file)
        text = ""
        for page in reader.pages[:5]:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
        return text
    except Exception as e:
        return f"Error reading PDF: {e}"

# 4️⃣ Generate Script
def generate_script(pdf_file, persona_style):
    if not pdf_file:
        return "⚠️ Upload PDF first.", {}

    if not GEMINI_API_KEY or not ELEVEN_API_KEY:
        return "⚠️ API Keys missing in HF Secrets!", {}

    pdf_text = extract_text_from_pdf(pdf_file)
    state.full_text = pdf_text
    state.persona = persona_style

    prompts = {
        "Serious Academic": "Two professors discussing the paper. Tone: Intellectual.",
        "Gossip Columnist": "Two gossip hosts reacting dramatically.",
    }

    system_prompt = f"""
    {prompts.get(persona_style)}

    Based on the paper:
    "{pdf_text[:4000]}..."

    Generate a short 4-turn dialogue script.
    Return JSON ONLY:
    [
        {{"speaker":"Host A","text":"..."}},
        {{"speaker":"Host B","text":"..."}}
    ]
    """

    try:
        model = genai.GenerativeModel("gemini-2.0-flash")
        response = model.generate_content(system_prompt)
        clean_json = response.text.replace("```json", "").replace("```", "").strip()
        script = json.loads(clean_json)
        state.script = script
        state.current_index = 0
        return "✅ Script ready!", script
    except Exception as e:
        return f"Error generating script: {e}", {}

# 5️⃣ Play next line
def play_next_chunk():
    if state.current_index >= len(state.script):
        return None, "🎉 Podcast finished."

    line = state.script[state.current_index]
    voice_id = "nPczCjz82tPNOwVbpGE2"
    if state.persona == "Gossip Columnist" and line["speaker"] == "Host B":
        voice_id = "21m00Tcm4TlvDq8ikWAM"

    try:
        audio_stream = client.generate(
            text=line["text"],
            voice=voice_id,
            model="eleven_monolingual_v1"
        )

        save_path = f"temp_{state.current_index}.mp3"
        with open(save_path, "wb") as f:
            for chunk in audio_stream:
                f.write(chunk)

        state.current_index += 1
        return save_path, f"{line['speaker']}: {line['text']}"
    except Exception as e:
        return None, f"Audio error: {e}"

# 6️⃣ Interrupt & Ask
def interrupt_and_ask(question):
    if not state.full_text:
        return None, "Upload a PDF first."

    model = genai.GenerativeModel("gemini-2.0-flash")
    prompt = f"""
    Persona: {state.persona}
    Context: {state.full_text[:1000]}
    User Question: {question}

    Answer briefly, then say "Anyway, back to the paper..."
    """
    try:
        response = model.generate_content(prompt)
        answer = response.text
        audio_stream = client.generate(
            text=answer,
            voice="nPczCjz82tPNOwVbpGE2",
            model="eleven_monolingual_v1"
        )
        save_path = "interrupt.mp3"
        with open(save_path, "wb") as f:
            for chunk in audio_stream:
                f.write(chunk)
        return save_path, answer
    except Exception as e:
        return None, f"Error: {e}"

# 7️⃣ Build Gradio 5.7 UI
with gr.Blocks() as demo:
    gr.Markdown("# 🎧 PodQuery — Research Paper Podcast Generator")

    with gr.Row():
        with gr.Column():
            pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
            persona = gr.Dropdown(
                ["Serious Academic", "Gossip Columnist"],
                value="Serious Academic",
                label="Persona Style"
            )
            btn_gen = gr.Button("Generate Podcast Script")
            status = gr.Textbox(label="Status")
            script_display = gr.JSON(label="Generated Script")

        with gr.Column():
            player = gr.Audio(label="Audio Output", autoplay=True)
            transcript = gr.Textbox(label="Transcript")
            btn_play = gr.Button("▶️ Play Next Line")
            q_input = gr.Textbox(label="Ask a Question")
            btn_interrupt = gr.Button("✋ Interrupt Podcast")

    # Bind events
    btn_gen.click(generate_script, [pdf_input, persona], [status, script_display])
    btn_play.click(play_next_chunk, [], [player, transcript])
    btn_interrupt.click(interrupt_and_ask, [q_input], [player, transcript])

# 8️⃣ Launch
if __name__ == "__main__":
    demo.launch()