Rakshitjan commited on
Commit
e356985
·
verified ·
1 Parent(s): 8889896

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +31 -110
main.py CHANGED
@@ -1,6 +1,6 @@
1
  # main.py
2
  from fastapi import FastAPI, File, UploadFile, HTTPException
3
- from fastapi.responses import StreamingResponse
4
  from fastapi.middleware.cors import CORSMiddleware
5
  import google.generativeai as genai
6
  import pdfplumber
@@ -11,8 +11,6 @@ import tempfile
11
  import shutil
12
  from gtts import gTTS
13
  from pydub import AudioSegment
14
- import asyncio
15
- import io
16
 
17
  app = FastAPI()
18
 
@@ -112,8 +110,8 @@ def generate_conversation(pdf_text):
112
  print(f"Problem text: {cleaned_text}")
113
  raise ValueError(f"Failed to parse generated conversation: {str(e)}")
114
 
115
- def create_audio_from_conversation(conversation, temp_dir):
116
- """Create audio file from conversation with progress tracking"""
117
  # Female voice
118
  def generate_female_voice(text, filename):
119
  tts = gTTS(text=text, lang='en')
@@ -141,10 +139,12 @@ def create_audio_from_conversation(conversation, temp_dir):
141
  "Bob": "male"
142
  }
143
 
 
 
 
144
  # Combine lines
145
  final_podcast = AudioSegment.silent(duration=1000) # 1 sec silence at start
146
 
147
- total_lines = len(conversation)
148
  for i, line_dict in enumerate(conversation):
149
  for speaker, line in line_dict.items():
150
  voice_type = speaker_voice_map.get(speaker, "female")
@@ -156,125 +156,46 @@ def create_audio_from_conversation(conversation, temp_dir):
156
  voice = generate_male_voice(line, filename)
157
 
158
  final_podcast += voice + AudioSegment.silent(duration=500)
159
-
160
- # Yield progress update
161
- progress = int(100 * (i+1) / total_lines)
162
- yield json.dumps({
163
- "status": "processing",
164
- "step": "generating_audio",
165
- "progress": progress,
166
- "message": f"Processing dialogue {i+1}/{total_lines}"
167
- }) + "\n"
168
 
169
  # Export final audio
170
- output_path = f"{temp_dir}/final_podcast.mp3"
171
  final_podcast.export(output_path, format="mp3")
172
 
173
- # Return the path to the final audio file
174
- return output_path
175
 
176
- async def process_pdf_to_audio_stream(file_path):
177
- """Process PDF to audio with streaming progress updates"""
 
178
  try:
179
- # Stream initial status
180
- yield json.dumps({
181
- "status": "processing",
182
- "step": "extracting_text",
183
- "progress": 10,
184
- "message": "Extracting text from PDF..."
185
- }) + "\n"
186
 
187
  # Extract text from PDF
188
- pdf_text = extract_text_from_pdf(file_path)
189
  if not pdf_text.strip():
190
- yield json.dumps({
191
- "status": "error",
192
- "message": "No text extracted from PDF"
193
- }) + "\n"
194
- return
195
-
196
- # Stream progress update
197
- yield json.dumps({
198
- "status": "processing",
199
- "step": "generating_conversation",
200
- "progress": 30,
201
- "message": "Generating conversation from PDF content..."
202
- }) + "\n"
203
 
204
  # Generate conversation
205
  conversation = generate_conversation(pdf_text)
206
 
207
- # Stream progress update
208
- yield json.dumps({
209
- "status": "processing",
210
- "step": "starting_audio_generation",
211
- "progress": 50,
212
- "message": "Starting audio generation..."
213
- }) + "\n"
214
-
215
- # Create temp directory for audio files
216
- temp_dir = tempfile.mkdtemp()
217
 
218
- # Generate audio for each line with streaming updates
219
- audio_file_path = None
220
- async for update in async_generator_wrapper(create_audio_from_conversation(conversation, temp_dir)):
221
- yield update
222
- # The last non-json output will be the file path
223
- if not update.startswith("{"):
224
- audio_file_path = update.strip()
225
-
226
- if not audio_file_path or not os.path.exists(audio_file_path):
227
- yield json.dumps({
228
- "status": "error",
229
- "message": "Failed to generate audio file"
230
- }) + "\n"
231
- return
232
-
233
- # Read the audio file
234
- with open(audio_file_path, "rb") as audio_file:
235
- audio_data = audio_file.read()
236
-
237
- # Stream completion status with the audio data as base64
238
- import base64
239
- audio_base64 = base64.b64encode(audio_data).decode('utf-8')
240
- yield json.dumps({
241
- "status": "complete",
242
- "progress": 100,
243
- "message": "Audio generation complete",
244
- "audio_data": audio_base64
245
- }) + "\n"
246
-
247
- # Clean up
248
- shutil.rmtree(temp_dir)
249
-
250
- except Exception as e:
251
- yield json.dumps({
252
- "status": "error",
253
- "message": f"Error: {str(e)}"
254
- }) + "\n"
255
-
256
- async def async_generator_wrapper(sync_generator):
257
- """Convert a synchronous generator to an async generator"""
258
- for item in sync_generator:
259
- await asyncio.sleep(0.01) # Small sleep to allow other tasks to run
260
- yield item
261
-
262
- @app.post("/convert-stream/")
263
- async def convert_pdf_to_audio_stream(file: UploadFile = File(...)):
264
- """Convert PDF to audio with streaming progress updates"""
265
- try:
266
- # Create temporary file for PDF
267
- temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
268
- temp_pdf_path = temp_pdf.name
269
-
270
- # Save uploaded PDF
271
- with open(temp_pdf_path, "wb") as pdf_file:
272
- shutil.copyfileobj(file.file, pdf_file)
273
 
274
- # Return streaming response
275
- return StreamingResponse(
276
- process_pdf_to_audio_stream(temp_pdf_path),
277
- media_type="text/event-stream"
 
278
  )
279
 
280
  except Exception as e:
 
1
  # main.py
2
  from fastapi import FastAPI, File, UploadFile, HTTPException
3
+ from fastapi.responses import FileResponse
4
  from fastapi.middleware.cors import CORSMiddleware
5
  import google.generativeai as genai
6
  import pdfplumber
 
11
  import shutil
12
  from gtts import gTTS
13
  from pydub import AudioSegment
 
 
14
 
15
  app = FastAPI()
16
 
 
110
  print(f"Problem text: {cleaned_text}")
111
  raise ValueError(f"Failed to parse generated conversation: {str(e)}")
112
 
113
+ def create_audio_from_conversation(conversation, output_path):
114
+ """Create audio file from conversation"""
115
  # Female voice
116
  def generate_female_voice(text, filename):
117
  tts = gTTS(text=text, lang='en')
 
139
  "Bob": "male"
140
  }
141
 
142
+ # Create temp directory
143
+ temp_dir = tempfile.mkdtemp()
144
+
145
  # Combine lines
146
  final_podcast = AudioSegment.silent(duration=1000) # 1 sec silence at start
147
 
 
148
  for i, line_dict in enumerate(conversation):
149
  for speaker, line in line_dict.items():
150
  voice_type = speaker_voice_map.get(speaker, "female")
 
156
  voice = generate_male_voice(line, filename)
157
 
158
  final_podcast += voice + AudioSegment.silent(duration=500)
 
 
 
 
 
 
 
 
 
159
 
160
  # Export final audio
 
161
  final_podcast.export(output_path, format="mp3")
162
 
163
+ # Clean up temp files
164
+ shutil.rmtree(temp_dir)
165
 
166
+ @app.post("/convert/")
167
+ async def convert_pdf_to_audio(file: UploadFile = File(...)):
168
+ """Convert PDF to audio"""
169
  try:
170
+ # Create temporary file for PDF
171
+ temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
172
+ temp_pdf_path = temp_pdf.name
173
+
174
+ # Save uploaded PDF
175
+ with open(temp_pdf_path, "wb") as pdf_file:
176
+ shutil.copyfileobj(file.file, pdf_file)
177
 
178
  # Extract text from PDF
179
+ pdf_text = extract_text_from_pdf(temp_pdf_path)
180
  if not pdf_text.strip():
181
+ os.unlink(temp_pdf_path)
182
+ raise HTTPException(status_code=400, detail="No text extracted from PDF")
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  # Generate conversation
185
  conversation = generate_conversation(pdf_text)
186
 
187
+ # Create audio file
188
+ output_filename = f"temp/output_{file.filename.split('.')[0]}.mp3"
189
+ create_audio_from_conversation(conversation, output_filename)
 
 
 
 
 
 
 
190
 
191
+ # Clean up PDF file
192
+ os.unlink(temp_pdf_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ # Return audio file
195
+ return FileResponse(
196
+ path=output_filename,
197
+ media_type="audio/mpeg",
198
+ filename=f"audio_{file.filename.split('.')[0]}.mp3"
199
  )
200
 
201
  except Exception as e: