emmajeed commited on
Commit
5fdd6f4
Β·
verified Β·
1 Parent(s): be49b91

Update transcribe_core.py

Browse files
Files changed (1) hide show
  1. transcribe_core.py +7 -43
transcribe_core.py CHANGED
@@ -255,70 +255,44 @@ def create_transcript_markdown(audio_filename: str, transcription: str, summary:
255
 
256
 
257
  def process_audio_file(audio_file_path: str, gemini_provider: TranscriptionProvider, openrouter_provider: TranscriptionProvider = None, progress_callback=None) -> Tuple[str, str]:
258
- """
259
- Process an audio file and return the markdown content or ZIP of multiple files.
 
 
260
 
261
- Args:
262
- audio_file_path: Path to audio file
263
- gemini_provider: GeminiProvider for transcription
264
- openrouter_provider: Optional OpenRouterProvider for summary/ideas (if None, uses gemini_provider)
265
- progress_callback: Optional callback function for progress updates
266
-
267
- Returns:
268
- Tuple of (output_file_path, is_zip_boolean_as_string)
269
- - If single file: ("path/to/file.md", "False")
270
- - If chunked: ("path/to/file.zip", "True")
271
- """
272
  audio_filename = os.path.basename(audio_file_path)
273
  base_name = os.path.splitext(audio_filename)[0]
274
 
275
- # Check file size
276
  file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
277
- print(f"\nProcessing: {audio_filename} ({file_size_mb:.2f}MB)")
278
 
279
- # Determine if chunking is needed
280
  files_to_transcribe = []
281
  if file_size_mb > 30:
282
- print(f"File is larger than 30MB. Chunking into smaller parts...")
283
  if progress_callback:
284
  progress_callback("πŸ“¦ Chunking large audio file...", 0.1)
285
-
286
  chunked_files = chunk_audio_file(audio_file_path)
287
  files_to_transcribe.extend(chunked_files)
288
  else:
289
- print("File is small enough to process directly")
290
  files_to_transcribe.append(audio_file_path)
291
 
292
- # Process each file (chunk or original)
293
  markdown_files = []
294
  total_files = len(files_to_transcribe)
295
 
296
  for idx, file_path in enumerate(files_to_transcribe, 1):
297
  file_name = os.path.basename(file_path)
298
- print(f"\nTranscribing {idx}/{total_files}: {file_name}")
299
 
300
  if progress_callback:
301
  progress = 0.2 + (0.6 * (idx - 1) / total_files)
302
  progress_callback(f"πŸŽ™οΈ Transcribing part {idx}/{total_files}...", progress)
303
 
304
- # Transcribe using Gemini
305
  transcription = generate_transcription(file_path, gemini_provider)
306
 
307
- if progress_callback:
308
- progress_callback(f"πŸ“ Generating metadata for part {idx}/{total_files}...", progress + 0.1)
309
-
310
- # Generate metadata using OpenRouter if available, otherwise Gemini
311
  text_provider = openrouter_provider if openrouter_provider else gemini_provider
312
  summary = generate_summary(transcription, text_provider)
313
  key_ideas = generate_key_ideas(transcription, text_provider)
314
 
315
- # Create markdown
316
  markdown_content = create_transcript_markdown(file_name, transcription, summary, key_ideas)
317
 
318
- # Save markdown file to outputs directory
319
- output_dir = "outputs"
320
- os.makedirs(output_dir, exist_ok=True)
321
-
322
  output_filename = os.path.splitext(file_name)[0] + ".md"
323
  markdown_path = os.path.join(output_dir, output_filename)
324
 
@@ -327,39 +301,29 @@ def process_audio_file(audio_file_path: str, gemini_provider: TranscriptionProvi
327
 
328
  markdown_files.append(markdown_path)
329
 
330
- # Clean up chunk audio file
331
  if "_part" in file_name:
332
  try:
333
  os.remove(file_path)
334
- print(f"Deleted chunk: {file_name}")
335
  except Exception as e:
336
  print(f"Warning: Could not delete chunk {file_name}: {e}")
337
 
338
- # Return result
339
  if len(markdown_files) == 1:
340
- # Single file - return as-is
341
  return markdown_files[0], "False"
342
  else:
343
- # Multiple files - create ZIP
344
  if progress_callback:
345
  progress_callback("πŸ“¦ Creating ZIP file...", 0.9)
346
 
347
- output_dir = "outputs"
348
- os.makedirs(output_dir, exist_ok=True)
349
-
350
  zip_filename = f"{base_name}_transcripts.zip"
351
  zip_path = os.path.join(output_dir, zip_filename)
352
 
353
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
354
  for md_file in markdown_files:
355
- # Add with proper filename
356
  basename = os.path.basename(md_file)
357
  zipf.write(md_file, basename)
358
- # Delete individual md files after adding to ZIP
359
  try:
360
  os.remove(md_file)
361
  except Exception as e:
362
  print(f"Warning: Could not delete {md_file}: {e}")
363
 
364
- print(f"\nβœ… Created ZIP with {len(markdown_files)} transcripts: {zip_filename}")
365
- return zip_path, "True"
 
255
 
256
 
257
  def process_audio_file(audio_file_path: str, gemini_provider: TranscriptionProvider, openrouter_provider: TranscriptionProvider = None, progress_callback=None) -> Tuple[str, str]:
258
+ # 1. SETUP ABSOLUTE PATH (Keep this)
259
+ current_dir = os.path.dirname(os.path.abspath(__file__))
260
+ output_dir = os.path.join(current_dir, "outputs")
261
+ os.makedirs(output_dir, exist_ok=True)
262
 
 
 
 
 
 
 
 
 
 
 
 
263
  audio_filename = os.path.basename(audio_file_path)
264
  base_name = os.path.splitext(audio_filename)[0]
265
 
 
266
  file_size_mb = os.path.getsize(audio_file_path) / (1024 * 1024)
 
267
 
 
268
  files_to_transcribe = []
269
  if file_size_mb > 30:
 
270
  if progress_callback:
271
  progress_callback("πŸ“¦ Chunking large audio file...", 0.1)
 
272
  chunked_files = chunk_audio_file(audio_file_path)
273
  files_to_transcribe.extend(chunked_files)
274
  else:
 
275
  files_to_transcribe.append(audio_file_path)
276
 
 
277
  markdown_files = []
278
  total_files = len(files_to_transcribe)
279
 
280
  for idx, file_path in enumerate(files_to_transcribe, 1):
281
  file_name = os.path.basename(file_path)
 
282
 
283
  if progress_callback:
284
  progress = 0.2 + (0.6 * (idx - 1) / total_files)
285
  progress_callback(f"πŸŽ™οΈ Transcribing part {idx}/{total_files}...", progress)
286
 
 
287
  transcription = generate_transcription(file_path, gemini_provider)
288
 
 
 
 
 
289
  text_provider = openrouter_provider if openrouter_provider else gemini_provider
290
  summary = generate_summary(transcription, text_provider)
291
  key_ideas = generate_key_ideas(transcription, text_provider)
292
 
 
293
  markdown_content = create_transcript_markdown(file_name, transcription, summary, key_ideas)
294
 
295
+ # 2. FIX: Use the absolute output_dir established at the top
 
 
 
296
  output_filename = os.path.splitext(file_name)[0] + ".md"
297
  markdown_path = os.path.join(output_dir, output_filename)
298
 
 
301
 
302
  markdown_files.append(markdown_path)
303
 
 
304
  if "_part" in file_name:
305
  try:
306
  os.remove(file_path)
 
307
  except Exception as e:
308
  print(f"Warning: Could not delete chunk {file_name}: {e}")
309
 
 
310
  if len(markdown_files) == 1:
 
311
  return markdown_files[0], "False"
312
  else:
 
313
  if progress_callback:
314
  progress_callback("πŸ“¦ Creating ZIP file...", 0.9)
315
 
316
+ # 3. FIX: Use absolute zip path
 
 
317
  zip_filename = f"{base_name}_transcripts.zip"
318
  zip_path = os.path.join(output_dir, zip_filename)
319
 
320
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
321
  for md_file in markdown_files:
 
322
  basename = os.path.basename(md_file)
323
  zipf.write(md_file, basename)
 
324
  try:
325
  os.remove(md_file)
326
  except Exception as e:
327
  print(f"Warning: Could not delete {md_file}: {e}")
328
 
329
+ return zip_path, "True"