Spaces:
Sleeping
Sleeping
update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import re
|
|
| 10 |
import heapq
|
| 11 |
#from nltk.tokenize import sent_tokenize
|
| 12 |
from transformers import pipeline
|
|
|
|
| 13 |
|
| 14 |
# Load a dialogue-friendly LLM (you can cache it offline too)
|
| 15 |
generator = pipeline("text-generation",
|
|
@@ -27,8 +28,8 @@ def extract_sections_from_pdf(pdf_path):
|
|
| 27 |
section_patterns = {
|
| 28 |
"Start of podcast with first section of paper as abstract": r"\babstract\b",
|
| 29 |
"second section continuing from abstract to introduction": r"\bintroduction\b",
|
| 30 |
-
"methodology": r"\b(method(?:ology)?|proposed method|approach|model architecture|architecture|experimental setup|network design|implementation details|techniques|framework|learning algorithm|system description)\b",
|
| 31 |
-
"conclusion": r"\bconclusion(?:s)?\b|\bsummary\b|final thoughts\b|result(?:s)?",
|
| 32 |
}
|
| 33 |
|
| 34 |
sections = {}
|
|
@@ -124,6 +125,14 @@ def merge_segments(segments, output="podcast_output.mp3"):
|
|
| 124 |
podcast.export(output, format="mp3")
|
| 125 |
print(f"Podcast saved as {output}")
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
def process_pdf(pdf_file):
|
| 129 |
# Save the uploaded file to a temporary location
|
|
@@ -142,15 +151,16 @@ def process_pdf(pdf_file):
|
|
| 142 |
|
| 143 |
# Step 2: Generate podcast script
|
| 144 |
final_script = ""
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
| 154 |
|
| 155 |
|
| 156 |
|
|
@@ -178,4 +188,4 @@ iface = gr.Interface(
|
|
| 178 |
)
|
| 179 |
|
| 180 |
# Launch the interface
|
| 181 |
-
iface.launch(debug=True)
|
|
|
|
| 10 |
import heapq
|
| 11 |
#from nltk.tokenize import sent_tokenize
|
| 12 |
from transformers import pipeline
|
| 13 |
+
import concurrent.futures
|
| 14 |
|
| 15 |
# Load a dialogue-friendly LLM (you can cache it offline too)
|
| 16 |
generator = pipeline("text-generation",
|
|
|
|
| 28 |
section_patterns = {
|
| 29 |
"Start of podcast with first section of paper as abstract": r"\babstract\b",
|
| 30 |
"second section continuing from abstract to introduction": r"\bintroduction\b",
|
| 31 |
+
"third section continuing from introduction to methodology": r"\b(method(?:ology)?|proposed method|approach|model architecture|architecture|experimental setup|network design|implementation details|techniques|framework|learning algorithm|system description)\b",
|
| 32 |
+
"fourth and the last section continuing from methodology to conclusion": r"\bconclusion(?:s)?\b|\bsummary\b|final thoughts\b|result(?:s)?",
|
| 33 |
}
|
| 34 |
|
| 35 |
sections = {}
|
|
|
|
| 125 |
podcast.export(output, format="mp3")
|
| 126 |
print(f"Podcast saved as {output}")
|
| 127 |
|
| 128 |
+
def process_section(section_summary_pair):
|
| 129 |
+
section, summary = section_summary_pair
|
| 130 |
+
dialogue = generate_podcast_script(section, summary)
|
| 131 |
+
dialogue_content = dialogue[1]["content"]
|
| 132 |
+
lines = dialogue_content.split('\n')
|
| 133 |
+
dialogue_fine = "\n".join([line for line in lines]).replace("**", "")
|
| 134 |
+
result = f"\n\n=== {section.upper()} ===\n{dialogue_fine}\n"
|
| 135 |
+
return result
|
| 136 |
|
| 137 |
def process_pdf(pdf_file):
|
| 138 |
# Save the uploaded file to a temporary location
|
|
|
|
| 151 |
|
| 152 |
# Step 2: Generate podcast script
|
| 153 |
final_script = ""
|
| 154 |
+
# Prepare data
|
| 155 |
+
section_summary_pairs = list(summarized_sections.items())
|
| 156 |
+
|
| 157 |
+
# Run in parallel using threads (good for API calls)
|
| 158 |
+
final_script = ""
|
| 159 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 160 |
+
results = executor.map(process_section, section_summary_pairs)
|
| 161 |
+
|
| 162 |
+
# Combine results
|
| 163 |
+
final_script = "".join(results)
|
| 164 |
|
| 165 |
|
| 166 |
|
|
|
|
| 188 |
)
|
| 189 |
|
| 190 |
# Launch the interface
|
| 191 |
+
iface.launch(debug=True,share=True)
|