Pragmaticl commited on
Commit
63396da
·
verified ·
1 Parent(s): 930eb08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -12
app.py CHANGED
@@ -172,7 +172,11 @@ def process_audio_background(task_id: str, input_file: str, original_filename: s
172
  """Xử lý audio trong background"""
173
  try:
174
  update_task_status(task_id, TaskStatus.PROCESSING, {
175
- 'progress': 'Đang giải nén và phát hiện file audio...'
 
 
 
 
176
  })
177
 
178
  task_dir = os.path.join(STORAGE_DIR, task_id)
@@ -191,21 +195,34 @@ def process_audio_background(task_id: str, input_file: str, original_filename: s
191
 
192
  if not audio_files:
193
  update_task_status(task_id, TaskStatus.ERROR, {
194
- 'error': 'Không tìm thấy file audio nào trong file tải lên!'
 
195
  })
196
  return
197
 
198
  update_task_status(task_id, TaskStatus.PROCESSING, {
199
- 'progress': f'Tìm thấy {len(audio_files)} file audio. Đang transcribe...',
200
- 'total_files': len(audio_files)
 
 
 
 
201
  })
202
 
203
  all_records = []
204
 
205
  # Xử lý từng file audio
206
  for idx, audio_file in enumerate(audio_files):
 
 
207
  update_task_status(task_id, TaskStatus.PROCESSING, {
208
- 'progress': f'Đang xử file {idx+1}/{len(audio_files)}: {Path(audio_file).name}'
 
 
 
 
 
 
209
  })
210
 
211
  base_name = Path(audio_file).stem
@@ -213,6 +230,17 @@ def process_audio_background(task_id: str, input_file: str, original_filename: s
213
  # Transcribe
214
  segments = transcribe_with_timestamps(audio_file)
215
 
 
 
 
 
 
 
 
 
 
 
 
216
  # Gộp các segment ngắn
217
  merged_segments = merge_short_segments(segments, min_duration=2.0)
218
 
@@ -227,12 +255,25 @@ def process_audio_background(task_id: str, input_file: str, original_filename: s
227
  all_records.extend(records)
228
 
229
  update_task_status(task_id, TaskStatus.PROCESSING, {
230
- 'progress': f'Đã tạo {len(all_records)} segments. Đang lưu vào Parquet...'
 
 
 
 
 
231
  })
232
 
233
  # Lưu vào parquet
234
  parquet_files = save_to_parquet(all_records, final_output_dir)
235
 
 
 
 
 
 
 
 
 
236
  # Copy audio folder vào output
237
  final_audio_dir = os.path.join(final_output_dir, 'audio')
238
  shutil.copytree(audio_output_dir, final_audio_dir)
@@ -256,17 +297,22 @@ def process_audio_background(task_id: str, input_file: str, original_filename: s
256
 
257
  update_task_status(task_id, TaskStatus.SUCCESS, {
258
  'progress': 'Hoàn thành!',
 
 
 
259
  'input_files': len(audio_files),
260
  'total_segments': len(all_records),
261
  'parquet_files': len(parquet_files),
262
  'output_zip': zip_path,
263
- 'zip_size_mb': round(zip_size_mb, 2)
 
264
  })
265
 
266
  except Exception as e:
267
  error_msg = f"{str(e)}\n\n{traceback.format_exc()}"
268
  update_task_status(task_id, TaskStatus.ERROR, {
269
- 'error': error_msg
 
270
  })
271
 
272
  def submit_task(input_file):
@@ -333,6 +379,10 @@ def get_task_info(task_id):
333
 
334
  task = TASKS[task_id]
335
 
 
 
 
 
336
  info = f"""
337
  ## 📋 Thông tin Task: {task_id}
338
 
@@ -343,30 +393,77 @@ def get_task_info(task_id):
343
 
344
  ---
345
 
346
- ### 📊 Chi tiết
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
 
348
- **Tiến trình:** {task.get('progress', 'N/A')}
349
  """
 
 
 
 
 
350
 
351
  if task['status'] == TaskStatus.SUCCESS:
 
 
 
 
 
352
  info += f"""
353
  **Số file audio đầu vào:** {task.get('input_files', 'N/A')}
354
- **Tổng số segments:** {task.get('total_segments', 'N/A')}
355
  **Số file Parquet:** {task.get('parquet_files', 'N/A')}
356
  **Kích thước ZIP:** {task.get('zip_size_mb', 'N/A')} MB
 
 
357
  """
358
  zip_path = task.get('output_zip')
359
  if zip_path and os.path.exists(zip_path):
360
  return info, zip_path
361
 
362
  elif task['status'] == TaskStatus.ERROR:
 
 
 
 
 
363
  info += f"""
364
- **Lỗi:**
365
  ```
366
  {task.get('error', 'Unknown error')}
367
  ```
368
  """
369
 
 
 
 
370
  return info, None
371
 
372
  def refresh_task_list():
 
172
  """Xử lý audio trong background"""
173
  try:
174
  update_task_status(task_id, TaskStatus.PROCESSING, {
175
+ 'progress': 'Đang giải nén và phát hiện file audio...',
176
+ 'step': 1,
177
+ 'total_steps': 5,
178
+ 'step_name': 'Giải nén file',
179
+ 'percent': 0
180
  })
181
 
182
  task_dir = os.path.join(STORAGE_DIR, task_id)
 
195
 
196
  if not audio_files:
197
  update_task_status(task_id, TaskStatus.ERROR, {
198
+ 'error': 'Không tìm thấy file audio nào trong file tải lên!',
199
+ 'percent': 0
200
  })
201
  return
202
 
203
  update_task_status(task_id, TaskStatus.PROCESSING, {
204
+ 'progress': f'Tìm thấy {len(audio_files)} file audio. Chuẩn bị transcribe...',
205
+ 'step': 2,
206
+ 'total_steps': 5,
207
+ 'step_name': 'Phát hiện file audio',
208
+ 'total_files': len(audio_files),
209
+ 'percent': 20
210
  })
211
 
212
  all_records = []
213
 
214
  # Xử lý từng file audio
215
  for idx, audio_file in enumerate(audio_files):
216
+ file_percent = 20 + int((idx / len(audio_files)) * 50)
217
+
218
  update_task_status(task_id, TaskStatus.PROCESSING, {
219
+ 'progress': f'Đang transcribe file {idx+1}/{len(audio_files)}: {Path(audio_file).name}',
220
+ 'step': 3,
221
+ 'total_steps': 5,
222
+ 'step_name': f'Transcribe audio ({idx+1}/{len(audio_files)})',
223
+ 'current_file': idx + 1,
224
+ 'total_files': len(audio_files),
225
+ 'percent': file_percent
226
  })
227
 
228
  base_name = Path(audio_file).stem
 
230
  # Transcribe
231
  segments = transcribe_with_timestamps(audio_file)
232
 
233
+ update_task_status(task_id, TaskStatus.PROCESSING, {
234
+ 'progress': f'Đang cắt audio file {idx+1}/{len(audio_files)}: {len(segments)} segments',
235
+ 'step': 3,
236
+ 'total_steps': 5,
237
+ 'step_name': f'Cắt audio ({idx+1}/{len(audio_files)})',
238
+ 'current_file': idx + 1,
239
+ 'total_files': len(audio_files),
240
+ 'segments_found': len(segments),
241
+ 'percent': file_percent + 2
242
+ })
243
+
244
  # Gộp các segment ngắn
245
  merged_segments = merge_short_segments(segments, min_duration=2.0)
246
 
 
255
  all_records.extend(records)
256
 
257
  update_task_status(task_id, TaskStatus.PROCESSING, {
258
+ 'progress': f'Đã tạo {len(all_records)} segments. Đang lưu vào Parquet...',
259
+ 'step': 4,
260
+ 'total_steps': 5,
261
+ 'step_name': 'Lưu Parquet',
262
+ 'total_segments': len(all_records),
263
+ 'percent': 75
264
  })
265
 
266
  # Lưu vào parquet
267
  parquet_files = save_to_parquet(all_records, final_output_dir)
268
 
269
+ update_task_status(task_id, TaskStatus.PROCESSING, {
270
+ 'progress': f'Đang tạo file ZIP...',
271
+ 'step': 5,
272
+ 'total_steps': 5,
273
+ 'step_name': 'Tạo file ZIP',
274
+ 'percent': 85
275
+ })
276
+
277
  # Copy audio folder vào output
278
  final_audio_dir = os.path.join(final_output_dir, 'audio')
279
  shutil.copytree(audio_output_dir, final_audio_dir)
 
297
 
298
  update_task_status(task_id, TaskStatus.SUCCESS, {
299
  'progress': 'Hoàn thành!',
300
+ 'step': 5,
301
+ 'total_steps': 5,
302
+ 'step_name': 'Hoàn thành',
303
  'input_files': len(audio_files),
304
  'total_segments': len(all_records),
305
  'parquet_files': len(parquet_files),
306
  'output_zip': zip_path,
307
+ 'zip_size_mb': round(zip_size_mb, 2),
308
+ 'percent': 100
309
  })
310
 
311
  except Exception as e:
312
  error_msg = f"{str(e)}\n\n{traceback.format_exc()}"
313
  update_task_status(task_id, TaskStatus.ERROR, {
314
+ 'error': error_msg,
315
+ 'percent': 0
316
  })
317
 
318
  def submit_task(input_file):
 
379
 
380
  task = TASKS[task_id]
381
 
382
+ # Progress bar
383
+ percent = task.get('percent', 0)
384
+ progress_bar = "🟦" * (percent // 5) + "⬜" * (20 - percent // 5)
385
+
386
  info = f"""
387
  ## 📋 Thông tin Task: {task_id}
388
 
 
393
 
394
  ---
395
 
396
+ ### 📊 Tiến độ tổng thể
397
+
398
+ {progress_bar} **{percent}%**
399
+
400
+ """
401
+
402
+ # Hiển thị steps
403
+ if 'step' in task and 'total_steps' in task:
404
+ step = task.get('step', 0)
405
+ total_steps = task.get('total_steps', 5)
406
+ step_name = task.get('step_name', 'N/A')
407
+
408
+ info += f"""
409
+ **Bước hiện tại:** {step}/{total_steps} - {step_name}
410
+
411
+ """
412
+
413
+ # Hiển thị tiến trình con
414
+ if task['status'] == TaskStatus.PROCESSING:
415
+ info += "### 🔄 Chi tiết tiến trình\n\n"
416
+
417
+ if 'current_file' in task and 'total_files' in task:
418
+ current = task.get('current_file', 0)
419
+ total = task.get('total_files', 0)
420
+ file_progress = int((current / total) * 100) if total > 0 else 0
421
+ file_bar = "🟩" * (file_progress // 5) + "⬜" * (20 - file_progress // 5)
422
+
423
+ info += f"""
424
+ **Xử lý file:** {current}/{total}
425
+ {file_bar} {file_progress}%
426
 
 
427
  """
428
+
429
+ if 'segments_found' in task:
430
+ info += f"**Segments phát hiện:** {task['segments_found']}\n"
431
+
432
+ info += f"\n**Trạng thái:** {task.get('progress', 'Đang xử lý...')}\n"
433
 
434
  if task['status'] == TaskStatus.SUCCESS:
435
+ info += """
436
+ ---
437
+
438
+ ### ✅ Kết quả
439
+ """
440
  info += f"""
441
  **Số file audio đầu vào:** {task.get('input_files', 'N/A')}
442
+ **Tổng số segments đã tạo:** {task.get('total_segments', 'N/A')}
443
  **Số file Parquet:** {task.get('parquet_files', 'N/A')}
444
  **Kích thước ZIP:** {task.get('zip_size_mb', 'N/A')} MB
445
+
446
+ 💾 **File ZIP sẵn sàng tải về bên dưới!**
447
  """
448
  zip_path = task.get('output_zip')
449
  if zip_path and os.path.exists(zip_path):
450
  return info, zip_path
451
 
452
  elif task['status'] == TaskStatus.ERROR:
453
+ info += """
454
+ ---
455
+
456
+ ### ❌ Lỗi
457
+ """
458
  info += f"""
 
459
  ```
460
  {task.get('error', 'Unknown error')}
461
  ```
462
  """
463
 
464
+ elif task['status'] == TaskStatus.WAITING:
465
+ info += "\n⏳ Task đang trong hàng đợi...\n"
466
+
467
  return info, None
468
 
469
  def refresh_task_list():