Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import asyncio | |
| import nest_asyncio | |
| import edge_tts | |
| from dotenv import load_dotenv | |
| from pypdf import PdfReader | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from spaces import GPU | |
| # Allow async loops in Gradio | |
| nest_asyncio.apply() | |
| # Load environment keys | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") # Automatically set in Spaces | |
| # Initialize Client (Qwen 72B) | |
| if HF_TOKEN: | |
| hf_client = InferenceClient(model="Qwen/Qwen2.5-72B-Instruct", token=HF_TOKEN) | |
| # ========================= | |
| # HELPER FUNCTIONS | |
| # ========================= | |
| def extract_text_from_pdf(pdf): | |
| try: | |
| reader = PdfReader(pdf) | |
| text = "" | |
| # Extract first 5 pages to avoid token limits | |
| for page in reader.pages[:5]: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| return f"Error reading PDF: {e}" | |
| async def generate_audio_file(text, voice, output_path): | |
| """Generates audio using free Edge TTS""" | |
| communicate = edge_tts.Communicate(text, voice) | |
| await communicate.save(output_path) | |
| return output_path | |
| # ========================= | |
| # CORE LOGIC | |
| # ========================= | |
| def generate_script(pdf_file, persona_style): | |
| new_state = { | |
| "script": [], | |
| "current_index": 0, | |
| "persona": persona_style, | |
| "full_text": "" | |
| } | |
| if not pdf_file: | |
| return "⚠️ Upload a PDF first.", None, new_state | |
| if not HF_TOKEN: | |
| return "⚠️ Missing HF_TOKEN. This usually works automatically in Spaces.", None, new_state | |
| pdf_text = extract_text_from_pdf(pdf_file) | |
| new_state["full_text"] = pdf_text | |
| prompts = { | |
| "Serious Academic": "You are a serious academic professor. Tone: Intellectual, critical, and insightful.", | |
| "Gossip Columnist": "You are a gossip columnist host. Tone: Dramatic, sensationalist, and excited.", | |
| } | |
| # Qwen System Prompt | |
| system_instruction = f""" | |
| {prompts.get(persona_style)} | |
| You will be given a research paper text. | |
| Generate a 4-line dialogue script between two hosts (Host A and Host B) discussing the paper. | |
| CRITICAL OUTPUT RULES: | |
| 1. Output MUST be valid JSON only. | |
| 2. Do not add markdown blocks like ```json. | |
| 3. Format: [ {{"speaker": "Host A", "text": "..."}}, {{"speaker": "Host B", "text": "..."}} ] | |
| """ | |
| user_message = f"Here is the paper text:\n\n{pdf_text[:4000]}..." | |
| messages = [ | |
| {"role": "system", "content": system_instruction}, | |
| {"role": "user", "content": user_message} | |
| ] | |
| try: | |
| # Call Qwen via HF Inference | |
| response = hf_client.chat_completion( | |
| messages=messages, | |
| max_tokens=1000, | |
| temperature=0.7 | |
| ) | |
| raw_content = response.choices[0].message.content | |
| # Clean up potential markdown formatting from LLM | |
| clean_json = raw_content.replace("```json", "").replace("```", "").strip() | |
| script = json.loads(clean_json) | |
| new_state["script"] = script | |
| new_state["current_index"] = 0 | |
| return "✅ Script ready (Qwen 2.5).", script, new_state | |
| except Exception as e: | |
| return f"Error with Qwen: {e}", None, new_state | |
| # We use async here for Edge TTS | |
| async def play_next_chunk(state_data): | |
| if not state_data or not state_data.get("script"): | |
| return None, "⚠️ No script generated yet.", state_data | |
| idx = state_data["current_index"] | |
| script = state_data["script"] | |
| if idx >= len(script): | |
| return None, "🎉 Podcast complete.", state_data | |
| line = script[idx] | |
| # SELECT VOICES (Free Edge TTS) | |
| # Host A = Male, Host B = Female | |
| voice_id = "en-US-ChristopherNeural" | |
| if line["speaker"] == "Host B": | |
| voice_id = "en-US-AriaNeural" | |
| # Switch voices for Gossip mode | |
| if state_data["persona"] == "Gossip Columnist": | |
| voice_id = "en-US-EricNeural" if line["speaker"] == "Host A" else "en-US-AnaNeural" | |
| try: | |
| save_path = f"temp_{idx}.mp3" | |
| await generate_audio_file(line["text"], voice_id, save_path) | |
| state_data["current_index"] += 1 | |
| return save_path, f"{line['speaker']}: {line['text']}", state_data | |
| except Exception as e: | |
| return None, f"Audio error: {e}", state_data | |
| async def interrupt_and_ask(question, state_data): | |
| if not state_data or not state_data.get("full_text"): | |
| return None, "Upload PDF first.", state_data | |
| # Use Qwen for the interruption answer | |
| try: | |
| messages = [ | |
| {"role": "system", "content": f"You are a {state_data['persona']}. Answer the question briefly based on the paper, then say 'Anyway, back to the paper...'"}, | |
| {"role": "user", "content": f"Context: {state_data['full_text'][:2000]}\n\nUser Question: {question}"} | |
| ] | |
| response = hf_client.chat_completion(messages=messages, max_tokens=200) | |
| answer = response.choices[0].message.content | |
| except Exception as e: | |
| return None, f"Qwen Error: {e}", state_data | |
| try: | |
| save_path = "interrupt.mp3" | |
| await generate_audio_file(answer, "en-US-ChristopherNeural", save_path) | |
| return save_path, answer, state_data | |
| except Exception as e: | |
| return None, f"Audio Error: {e}", state_data | |
| # ========================= | |
| # GRADIO UI | |
| # ========================= | |
| with gr.Blocks() as demo: | |
| app_state = gr.State({}) | |
| gr.Markdown("# 🎧 PodQuery — Research Paper Podcast Generator (Powered by Qwen 2.5)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
| persona = gr.Dropdown( | |
| ["Serious Academic", "Gossip Columnist"], | |
| value="Serious Academic", | |
| label="Persona Style" | |
| ) | |
| btn_gen = gr.Button("Generate Podcast Script", variant="primary") | |
| status = gr.Textbox(label="Status") | |
| with gr.Column(): | |
| script_display = gr.JSON(label="Generated Script") | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| player = gr.Audio(label="Audio Output", autoplay=True) | |
| transcript = gr.Textbox(label="Transcript") | |
| btn_play = gr.Button("▶️ Play Next Line") | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| q_input = gr.Textbox(label="Ask a Question (Interrupt)") | |
| btn_interrupt = gr.Button("✋ Interrupt Podcast") | |
| btn_gen.click( | |
| generate_script, | |
| inputs=[pdf_input, persona], | |
| outputs=[status, script_display, app_state] | |
| ) | |
| btn_play.click( | |
| play_next_chunk, | |
| inputs=[app_state], | |
| outputs=[player, transcript, app_state] | |
| ) | |
| btn_interrupt.click( | |
| interrupt_and_ask, | |
| inputs=[q_input, app_state], | |
| outputs=[player, transcript, app_state] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) |