Spaces:
Sleeping
Sleeping
app.py
CHANGED
|
@@ -1,25 +1,49 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
#!pip install PyPDF2 gtts nltk gradio pydub
|
| 4 |
-
from gtts import gTTS
|
| 5 |
-
from pydub import AudioSegment
|
| 6 |
-
from PyPDF2 import PdfReader
|
| 7 |
-
import gradio as gr
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
import heapq
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
from transformers import pipeline
|
| 13 |
import concurrent.futures
|
| 14 |
|
| 15 |
-
#
|
|
|
|
|
|
|
|
|
|
| 16 |
generator = pipeline("text-generation",
|
| 17 |
model="unsloth/gemma-3-1b-it",
|
| 18 |
device_map='cpu',
|
| 19 |
max_new_tokens=300)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
def split_sentences(text):
|
| 21 |
-
# Naive sentence splitter — works well for most academic text
|
| 22 |
return re.split(r'(?<=[.!?])\s+', text.strip())
|
|
|
|
| 23 |
def extract_sections_from_pdf(pdf_path):
|
| 24 |
reader = PdfReader(pdf_path)
|
| 25 |
full_text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
|
|
@@ -47,15 +71,8 @@ def extract_sections_from_pdf(pdf_path):
|
|
| 47 |
lines = section_text.splitlines()
|
| 48 |
limited_section_text = "\n".join(lines[:20])
|
| 49 |
sections[name] = limited_section_text
|
| 50 |
-
print(sections)
|
| 51 |
-
return sections
|
| 52 |
|
| 53 |
-
|
| 54 |
-
KEY_TERMS = [
|
| 55 |
-
"model", "propose", "architecture", "performance", "accuracy", "experiment",
|
| 56 |
-
"framework", "design", "method", "network", "approach", "outperform",
|
| 57 |
-
"layer", "training", "results", "learning", "evaluate", "baseline"
|
| 58 |
-
]
|
| 59 |
|
| 60 |
def summarize_section_by_heuristics(text, max_sentences=5):
|
| 61 |
sentences = split_sentences(text)
|
|
@@ -76,52 +93,45 @@ def summarize_section_by_heuristics(text, max_sentences=5):
|
|
| 76 |
top_sentences = heapq.nlargest(max_sentences, scored)
|
| 77 |
top_sentences = [s for _, s in sorted(top_sentences, key=lambda x: sentences.index(x[1]))]
|
| 78 |
return " ".join(top_sentences)
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
lines = script.split('\n')
|
| 85 |
segments = []
|
|
|
|
|
|
|
|
|
|
| 86 |
for i, line in enumerate(lines):
|
| 87 |
if 'Host:' in line or 'Guest:' in line:
|
| 88 |
-
print("print line ",i,line)
|
| 89 |
speaker, content = line.split(':', 1)
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
tts = gTTS(content.strip(), lang=host_lang,tld='ie', slow=False)
|
| 93 |
-
filename = f"segment_{i}.mp3"
|
| 94 |
-
tts.save(filename)
|
| 95 |
-
if speaker.lower() == "guest":
|
| 96 |
-
print( line.split(':', 1))
|
| 97 |
-
tts = gTTS(content.strip(), lang=guest_lang,tld='co.uk',slow=False)
|
| 98 |
-
filename = f"segment_{i}.mp3"
|
| 99 |
-
tts.save(filename)
|
| 100 |
-
|
| 101 |
-
segment = AudioSegment.from_mp3(filename)
|
| 102 |
-
segments.append(segment)
|
| 103 |
-
return segments
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
{section_text}
|
| 109 |
-
\"\"\"
|
| 110 |
-
\n\nFormat:\nHost: ...\nGuest: ..."""
|
| 111 |
|
| 112 |
-
|
| 113 |
-
print(user_prompt[:200])
|
| 114 |
-
messages =[{"role": "user", "content": user_prompt}]
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
return response[0]["generated_text"]
|
| 119 |
|
|
|
|
| 120 |
|
| 121 |
def merge_segments(segments, output="podcast_output.mp3"):
|
| 122 |
podcast = AudioSegment.empty()
|
| 123 |
for segment in segments:
|
| 124 |
-
podcast += segment + AudioSegment.silent(duration=300)
|
| 125 |
podcast.export(output, format="mp3")
|
| 126 |
print(f"Podcast saved as {output}")
|
| 127 |
|
|
@@ -130,62 +140,38 @@ def process_section(section_summary_pair):
|
|
| 130 |
dialogue = generate_podcast_script(section, summary)
|
| 131 |
dialogue_content = dialogue[1]["content"]
|
| 132 |
lines = dialogue_content.split('\n')
|
| 133 |
-
dialogue_fine = "\n".join([line for line in lines]).replace("**", "")
|
| 134 |
-
|
| 135 |
-
return result
|
| 136 |
|
| 137 |
def process_pdf(pdf_file):
|
| 138 |
-
# Save the uploaded file to a temporary location
|
| 139 |
pdf_path = "uploaded_pdf.pdf"
|
| 140 |
with open(pdf_file.name, "rb") as infile, open(pdf_path, "wb") as outfile:
|
| 141 |
outfile.write(infile.read())
|
| 142 |
|
| 143 |
-
# Extract text, generate conversation, and create audio
|
| 144 |
-
|
| 145 |
-
# Step 1: Extract & summarize sections
|
| 146 |
sections = extract_sections_from_pdf(pdf_path)
|
| 147 |
summarized_sections = {
|
| 148 |
name: summarize_section_by_heuristics(content)
|
| 149 |
for name, content in sections.items()
|
| 150 |
}
|
| 151 |
|
| 152 |
-
# Step 2: Generate podcast script
|
| 153 |
-
final_script = ""
|
| 154 |
-
# Prepare data
|
| 155 |
section_summary_pairs = list(summarized_sections.items())
|
| 156 |
-
|
| 157 |
-
# Run in parallel using threads (good for API calls)
|
| 158 |
-
final_script = ""
|
| 159 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 160 |
results = executor.map(process_section, section_summary_pairs)
|
| 161 |
|
| 162 |
-
# Combine results
|
| 163 |
final_script = "".join(results)
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
print(final_script)
|
| 168 |
-
segments = tts_line_by_line(final_script)
|
| 169 |
output_audio_path = "podcast_output.mp3"
|
| 170 |
merge_segments(segments, output=output_audio_path)
|
| 171 |
|
| 172 |
-
# Clean up the temporary PDF file
|
| 173 |
os.remove(pdf_path)
|
| 174 |
-
# Clean up individual segment files
|
| 175 |
-
#for i in range(1,len(segments)):
|
| 176 |
-
# os.remove(f"segment_{i}.mp3")
|
| 177 |
-
|
| 178 |
-
|
| 179 |
return output_audio_path
|
| 180 |
|
| 181 |
-
# Create the Gradio interface
|
| 182 |
iface = gr.Interface(
|
| 183 |
fn=process_pdf,
|
| 184 |
inputs=gr.File(label="Upload a PDF file"),
|
| 185 |
outputs=gr.Audio(label="Generated Podcast Audio"),
|
| 186 |
title="PDF to Podcast",
|
| 187 |
-
description="Upload a PDF and get a podcast-style audio summary."
|
| 188 |
)
|
| 189 |
|
| 190 |
-
|
| 191 |
-
iface.launch(debug=True,share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import re
|
| 3 |
import heapq
|
| 4 |
+
import uuid
|
| 5 |
+
import asyncio
|
| 6 |
+
import edge_tts
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import nest_asyncio
|
| 9 |
+
from PyPDF2 import PdfReader
|
| 10 |
+
from pydub import AudioSegment
|
| 11 |
from transformers import pipeline
|
| 12 |
import concurrent.futures
|
| 13 |
|
| 14 |
+
# Apply nested event loop patch for Jupyter/Colab
|
| 15 |
+
nest_asyncio.apply()
|
| 16 |
+
|
| 17 |
+
# Load LLM
|
| 18 |
generator = pipeline("text-generation",
|
| 19 |
model="unsloth/gemma-3-1b-it",
|
| 20 |
device_map='cpu',
|
| 21 |
max_new_tokens=300)
|
| 22 |
+
|
| 23 |
+
voices = await VoicesManager.create()
|
| 24 |
+
voices_female = await VoicesManager.create()
|
| 25 |
+
voice_male = voice_male.find(Gender="Male", Language="es")
|
| 26 |
+
voice_female = voice_female.find(Gender="Female", Language="es")
|
| 27 |
+
|
| 28 |
+
MALE_VOICE = random.choice(voice_male)["Name"]
|
| 29 |
+
FEMALE_VOICE = random.choice(voice_female)["Name"]
|
| 30 |
+
|
| 31 |
+
rate_male=40
|
| 32 |
+
pitch_male=40
|
| 33 |
+
pitch_female=40
|
| 34 |
+
rate_female=40
|
| 35 |
+
rate_female_str = f"{rate_female:+d}%"
|
| 36 |
+
pitch_female_str = f"{rate_female:+d}Hz"
|
| 37 |
+
|
| 38 |
+
KEY_TERMS = [
|
| 39 |
+
"model", "propose", "architecture", "performance", "accuracy", "experiment",
|
| 40 |
+
"framework", "design", "method", "network", "approach", "outperform",
|
| 41 |
+
"layer", "training", "results", "learning", "evaluate", "baseline"
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
def split_sentences(text):
|
|
|
|
| 45 |
return re.split(r'(?<=[.!?])\s+', text.strip())
|
| 46 |
+
|
| 47 |
def extract_sections_from_pdf(pdf_path):
|
| 48 |
reader = PdfReader(pdf_path)
|
| 49 |
full_text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
|
|
|
|
| 71 |
lines = section_text.splitlines()
|
| 72 |
limited_section_text = "\n".join(lines[:20])
|
| 73 |
sections[name] = limited_section_text
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
return sections
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def summarize_section_by_heuristics(text, max_sentences=5):
|
| 78 |
sentences = split_sentences(text)
|
|
|
|
| 93 |
top_sentences = heapq.nlargest(max_sentences, scored)
|
| 94 |
top_sentences = [s for _, s in sorted(top_sentences, key=lambda x: sentences.index(x[1]))]
|
| 95 |
return " ".join(top_sentences)
|
| 96 |
+
|
| 97 |
+
def generate_podcast_script(section_name, section_text):
|
| 98 |
+
user_prompt = f"""You are hosting a podcast. Create a friendly, engaging conversation with maximum 10 sentence between Host and Guest discussing the {section_name} of a research paper:
|
| 99 |
+
\nPaper Section:\n\"\"\"\n{section_text}\n\"\"\"\n\nFormat:\nHost: ...\nGuest: ..."""
|
| 100 |
+
messages = [{"role": "user", "content": user_prompt}]
|
| 101 |
+
response = generator(messages, max_new_tokens=300, do_sample=True, temperature=0.7)
|
| 102 |
+
return response[0]["generated_text"]
|
| 103 |
+
|
| 104 |
+
async def generate_voice_line(text, voice, filename):
|
| 105 |
+
communicate = edge_tts.Communicate(text, voice)
|
| 106 |
+
await communicate.save(filename)
|
| 107 |
+
|
| 108 |
+
async def tts_edge_line_by_line(script):
|
| 109 |
lines = script.split('\n')
|
| 110 |
segments = []
|
| 111 |
+
tasks = []
|
| 112 |
+
filenames = []
|
| 113 |
+
|
| 114 |
for i, line in enumerate(lines):
|
| 115 |
if 'Host:' in line or 'Guest:' in line:
|
|
|
|
| 116 |
speaker, content = line.split(':', 1)
|
| 117 |
+
speaker = speaker.strip().lower()
|
| 118 |
+
voice = MALE_VOICE if speaker == 'host' else FEMALE_VOICE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
+
filename = f"segment_{uuid.uuid4().hex}.mp3"
|
| 121 |
+
filenames.append(filename)
|
| 122 |
+
tasks.append(generate_voice_line(content.strip(), voice, filename))
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
await asyncio.gather(*tasks)
|
|
|
|
|
|
|
| 125 |
|
| 126 |
+
for filename in filenames:
|
| 127 |
+
segments.append(AudioSegment.from_mp3(filename))
|
|
|
|
| 128 |
|
| 129 |
+
return segments
|
| 130 |
|
| 131 |
def merge_segments(segments, output="podcast_output.mp3"):
|
| 132 |
podcast = AudioSegment.empty()
|
| 133 |
for segment in segments:
|
| 134 |
+
podcast += segment + AudioSegment.silent(duration=300)
|
| 135 |
podcast.export(output, format="mp3")
|
| 136 |
print(f"Podcast saved as {output}")
|
| 137 |
|
|
|
|
| 140 |
dialogue = generate_podcast_script(section, summary)
|
| 141 |
dialogue_content = dialogue[1]["content"]
|
| 142 |
lines = dialogue_content.split('\n')
|
| 143 |
+
dialogue_fine = "\n".join([line for line in lines if 'Host:' in line or 'Guest:' in line]).replace("**", "")
|
| 144 |
+
return f"\n\n=== {section.upper()} ===\n{dialogue_fine}\n"
|
|
|
|
| 145 |
|
| 146 |
def process_pdf(pdf_file):
|
|
|
|
| 147 |
pdf_path = "uploaded_pdf.pdf"
|
| 148 |
with open(pdf_file.name, "rb") as infile, open(pdf_path, "wb") as outfile:
|
| 149 |
outfile.write(infile.read())
|
| 150 |
|
|
|
|
|
|
|
|
|
|
| 151 |
sections = extract_sections_from_pdf(pdf_path)
|
| 152 |
summarized_sections = {
|
| 153 |
name: summarize_section_by_heuristics(content)
|
| 154 |
for name, content in sections.items()
|
| 155 |
}
|
| 156 |
|
|
|
|
|
|
|
|
|
|
| 157 |
section_summary_pairs = list(summarized_sections.items())
|
|
|
|
|
|
|
|
|
|
| 158 |
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 159 |
results = executor.map(process_section, section_summary_pairs)
|
| 160 |
|
|
|
|
| 161 |
final_script = "".join(results)
|
| 162 |
+
segments = asyncio.run(tts_edge_line_by_line(final_script))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
output_audio_path = "podcast_output.mp3"
|
| 164 |
merge_segments(segments, output=output_audio_path)
|
| 165 |
|
|
|
|
| 166 |
os.remove(pdf_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
return output_audio_path
|
| 168 |
|
|
|
|
| 169 |
iface = gr.Interface(
|
| 170 |
fn=process_pdf,
|
| 171 |
inputs=gr.File(label="Upload a PDF file"),
|
| 172 |
outputs=gr.Audio(label="Generated Podcast Audio"),
|
| 173 |
title="PDF to Podcast",
|
| 174 |
+
description="Upload a Research Paper PDF and get a podcast-style audio summary."
|
| 175 |
)
|
| 176 |
|
| 177 |
+
iface.launch(debug=True)
|
|
|