mlokendra commited on
Commit
2436d0b
·
verified ·
1 Parent(s): 7da5350

update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -12
app.py CHANGED
@@ -10,6 +10,7 @@ import re
10
  import heapq
11
  #from nltk.tokenize import sent_tokenize
12
  from transformers import pipeline
 
13
 
14
  # Load a dialogue-friendly LLM (you can cache it offline too)
15
  generator = pipeline("text-generation",
@@ -27,8 +28,8 @@ def extract_sections_from_pdf(pdf_path):
27
  section_patterns = {
28
  "Start of podcast with first section of paper as abstract": r"\babstract\b",
29
  "second section continuing from abstract to introduction": r"\bintroduction\b",
30
- "methodology": r"\b(method(?:ology)?|proposed method|approach|model architecture|architecture|experimental setup|network design|implementation details|techniques|framework|learning algorithm|system description)\b",
31
- "conclusion": r"\bconclusion(?:s)?\b|\bsummary\b|final thoughts\b|result(?:s)?",
32
  }
33
 
34
  sections = {}
@@ -124,6 +125,14 @@ def merge_segments(segments, output="podcast_output.mp3"):
124
  podcast.export(output, format="mp3")
125
  print(f"Podcast saved as {output}")
126
 
 
 
 
 
 
 
 
 
127
 
128
  def process_pdf(pdf_file):
129
  # Save the uploaded file to a temporary location
@@ -142,15 +151,16 @@ def process_pdf(pdf_file):
142
 
143
  # Step 2: Generate podcast script
144
  final_script = ""
145
- for section, summary in summarized_sections.items():
146
- dialogue = generate_podcast_script(section, summary)
147
- dialogue_content = dialogue[1]["content"]
148
- lines = dialogue_content.split('\n')
149
- print("lines" ,lines)
150
- # Filter lines that start with "Host:" or "Guest:" and join them
151
- dialogue_fine = "\n".join([line for line in lines]).replace("**", "")
152
- print("dialogue_fine",dialogue_fine)
153
- final_script += f"\n\n=== {section.upper()} ===\n{dialogue_fine}\n"
 
154
 
155
 
156
 
@@ -178,4 +188,4 @@ iface = gr.Interface(
178
  )
179
 
180
  # Launch the interface
181
- iface.launch(debug=True)
 
10
  import heapq
11
  #from nltk.tokenize import sent_tokenize
12
  from transformers import pipeline
13
+ import concurrent.futures
14
 
15
  # Load a dialogue-friendly LLM (you can cache it offline too)
16
  generator = pipeline("text-generation",
 
28
  section_patterns = {
29
  "Start of podcast with first section of paper as abstract": r"\babstract\b",
30
  "second section continuing from abstract to introduction": r"\bintroduction\b",
31
+ "third section continuing from introduction to methodology": r"\b(method(?:ology)?|proposed method|approach|model architecture|architecture|experimental setup|network design|implementation details|techniques|framework|learning algorithm|system description)\b",
32
+ "fourth and the last section continuing from methodology to conclusion": r"\bconclusion(?:s)?\b|\bsummary\b|final thoughts\b|result(?:s)?",
33
  }
34
 
35
  sections = {}
 
125
  podcast.export(output, format="mp3")
126
  print(f"Podcast saved as {output}")
127
 
128
+ def process_section(section_summary_pair):
129
+ section, summary = section_summary_pair
130
+ dialogue = generate_podcast_script(section, summary)
131
+ dialogue_content = dialogue[1]["content"]
132
+ lines = dialogue_content.split('\n')
133
+ dialogue_fine = "\n".join([line for line in lines]).replace("**", "")
134
+ result = f"\n\n=== {section.upper()} ===\n{dialogue_fine}\n"
135
+ return result
136
 
137
  def process_pdf(pdf_file):
138
  # Save the uploaded file to a temporary location
 
151
 
152
  # Step 2: Generate podcast script
153
  final_script = ""
154
+ # Prepare data
155
+ section_summary_pairs = list(summarized_sections.items())
156
+
157
+ # Run in parallel using threads (good for API calls)
158
+ final_script = ""
159
+ with concurrent.futures.ThreadPoolExecutor() as executor:
160
+ results = executor.map(process_section, section_summary_pairs)
161
+
162
+ # Combine results
163
+ final_script = "".join(results)
164
 
165
 
166
 
 
188
  )
189
 
190
  # Launch the interface
191
+ iface.launch(debug=True,share=True)