jmisak commited on
Commit
27f0acd
Β·
verified Β·
1 Parent(s): 1afb380

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -56
app.py CHANGED
@@ -9,7 +9,7 @@ from llm import query_llm, extract_structured_data
9
  from reporting import generate_enhanced_csv, generate_enhanced_pdf
10
  from dashboard import generate_comprehensive_dashboard
11
  from validation import validate_transcript_quality, check_data_completeness
12
- from audio_transcriber import transcribe_with_diarization_streaming
13
 
14
  # HuggingFace Spaces Configuration
15
  import os
@@ -18,27 +18,7 @@ os.environ["LLM_TIMEOUT"] = "25"
18
  os.environ["MAX_TOKENS_PER_REQUEST"] = "100"
19
  print("πŸš€ Running on HuggingFace Spaces - Optimized Configuration Loaded")
20
 
21
- def preprocess_audio(audio_files, num_speakers):
22
- """Convert audio to transcripts"""
23
- if not audio_files:
24
- return None, "No audio files provided"
25
-
26
- transcript_paths = []
27
- status = ""
28
-
29
- for audio in audio_files:
30
- try:
31
- # Get the actual file path
32
- audio_path = audio.name if hasattr(audio, 'name') else str(audio)
33
-
34
- transcript_path = transcribe_with_diarization(audio_path, num_speakers)
35
- transcript_paths.append(transcript_path)
36
- status += f"βœ“ {os.path.basename(audio_path)} β†’ {transcript_path}\n"
37
- except Exception as e:
38
- status += f"βœ— {os.path.basename(audio_path)}: {str(e)}\n"
39
-
40
- # Return list of paths for file component
41
- return transcript_paths if transcript_paths else None, status
42
 
43
 
44
  def analyze(files, file_type, user_comments, role_hint, debug_mode, interviewee_type, progress=gr.Progress()):
@@ -510,40 +490,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
510
 
511
  with gr.Tabs():
512
 
513
- with gr.TabItem("🎀 Audio Preprocessing"):
514
- gr.Markdown("""
515
- Upload audio interviews to auto-transcribe with speaker identification.
516
- Outputs DOCX files ready for analysis.
517
- """)
518
-
519
- with gr.Row():
520
- audio_input = gr.File(
521
- label="Upload Audio Files",
522
- file_types=[".mp3", ".wav", ".m4a", ".flac"],
523
- file_count="multiple"
524
- )
525
- num_speakers_input = gr.Slider(
526
- minimum=1,
527
- maximum=5,
528
- value=2,
529
- step=1,
530
- label="Number of Speakers"
531
- )
532
-
533
- transcribe_btn = gr.Button("πŸŽ™οΈ Transcribe Audio", variant="primary")
534
- transcribe_status = gr.Textbox(label="Status", lines=10)
535
- transcript_files = gr.File(label="Download Transcripts", file_count="multiple")
536
-
537
- transcribe_btn.click(
538
- fn=preprocess_audio,
539
- inputs=[audio_input, num_speakers_input],
540
- outputs=[transcript_files, transcribe_status]
541
- )
542
-
543
- gr.Markdown("""
544
- **Next:** Download transcripts, then go to "Transcript Analysis" tab to analyze them.
545
- """)
546
-
547
 
548
 
549
  with gr.TabItem("πŸ“Š Transcript Analysis"):
 
9
  from reporting import generate_enhanced_csv, generate_enhanced_pdf
10
  from dashboard import generate_comprehensive_dashboard
11
  from validation import validate_transcript_quality, check_data_completeness
12
+
13
 
14
  # HuggingFace Spaces Configuration
15
  import os
 
18
  os.environ["MAX_TOKENS_PER_REQUEST"] = "100"
19
  print("πŸš€ Running on HuggingFace Spaces - Optimized Configuration Loaded")
20
 
21
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  def analyze(files, file_type, user_comments, role_hint, debug_mode, interviewee_type, progress=gr.Progress()):
 
490
 
491
  with gr.Tabs():
492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
 
494
 
495
  with gr.TabItem("πŸ“Š Transcript Analysis"):