norhan12 commited on
Commit
83e778c
·
verified ·
1 Parent(s): 07e46f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -128
app.py CHANGED
@@ -10,149 +10,92 @@ from process_interview import process_interview
10
  from typing import Tuple, Optional, List, Dict
11
  from concurrent.futures import ThreadPoolExecutor
12
 
13
- # Setup logging
14
- logging.basicConfig(
15
- level=logging.INFO,
16
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
17
- logger = logging.getLogger(__name__)
18
- logging.getLogger("nemo_logging").setLevel(logging.ERROR)
19
- logging.getLogger("nemo").setLevel(logging.ERROR)
20
-
21
- # Configuration
22
- OUTPUT_DIR = "./processed_audio"
23
- os.makedirs(OUTPUT_DIR, exist_ok=True)
24
-
25
- # Constants
26
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
27
- MAX_FILE_SIZE = 300 * 1024 * 1024 # 300MB
28
 
 
29
  def check_health() -> str:
30
- """Check system health, similar to FastAPI /health endpoint"""
31
  try:
32
- for directory in [OUTPUT_DIR]:
33
- if not os.path.exists(directory):
34
- raise Exception(f"Directory {directory} does not exist")
35
  return "System is healthy"
36
  except Exception as e:
37
- logger.error(f"Health check failed: {str(e)}")
38
- return f"System is unhealthy: {str(e)}"
39
 
40
- # A helper function to process a single audio file
41
- def process_single_audio(file_path_or_url: str) -> Dict:
42
- """Processes a single audio file and returns its analysis."""
43
  try:
44
- if not file_path_or_url:
45
- return {"error": "No audio provided for processing."}
46
-
47
- temp_audio_path = Path(file_path_or_url)
48
- file_ext = temp_audio_path.suffix.lower()
49
-
 
 
 
 
 
 
50
  if file_ext not in VALID_EXTENSIONS:
51
- return {"error": f"Invalid file format: {file_ext}. Supported formats: {', '.join(VALID_EXTENSIONS)}"}
52
-
53
- file_size = os.path.getsize(temp_audio_path)
54
- if file_size > MAX_FILE_SIZE:
55
- return {
56
- "error": f"File too large: {file_size / (1024 * 1024):.2f}MB. Max size: {MAX_FILE_SIZE // (1024 * 1024)}MB"}
57
-
58
- logger.info(f"Processing audio from: {temp_audio_path}")
59
- result = process_interview(str(temp_audio_path))
60
-
61
- if not result or 'pdf_path' not in result or 'json_path' not in result:
62
- return {"error": "Processing failed - invalid result format."}
63
-
64
- pdf_path = Path(result['pdf_path'])
65
- json_path = Path(result['json_path'])
66
-
67
- if not pdf_path.exists() or not json_path.exists():
68
- return {"error": "Processing failed - output files not found."}
69
-
70
- with json_path.open('r') as f:
71
  analysis_data = json.load(f)
72
-
73
- voice_analysis = analysis_data.get('voice_analysis', {})
 
74
  summary = (
 
75
  f"Speakers: {', '.join(analysis_data['speakers'])}\n"
76
- f"Interview Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds\n"
77
- f"Confidence Level: {voice_analysis.get('interpretation', {}).get('confidence_level', 'Unknown')}\n"
78
- f"Anxiety Level: {voice_analysis.get('interpretation', {}).get('anxiety_level', 'Unknown')}"
79
  )
 
80
  json_data = json.dumps(analysis_data, indent=2)
81
-
82
- return {
83
- "summary": summary,
84
- "json_data": json_data,
85
- "pdf_path": str(pdf_path),
86
- "original_input": file_path_or_url
87
- }
88
  except Exception as e:
89
- logger.error(f"Error processing single audio: {str(e)}", exc_info=True)
90
- return {"error": f"Error during processing: {str(e)}"}
91
-
92
- # Main function to handle multiple audio files/URLs
93
- def analyze_multiple_audios(file_paths_or_urls: List[str]) -> Tuple[str, str, List[str]]:
94
- """
95
- Analyzes multiple interview audio files/URLs in parallel.
96
- Returns combined summary, combined JSON, and a list of PDF paths.
97
- """
98
- if not file_paths_or_urls:
99
- return "No audio files/URLs provided.", "[]", []
100
-
101
- all_summaries = []
102
- all_json_data = []
103
- all_pdf_paths = []
104
-
105
- with ThreadPoolExecutor(max_workers=5) as executor:
106
- futures = {executor.submit(process_single_audio, item): item for item in file_paths_or_urls}
107
- for future in futures:
108
- item = futures[future]
109
- try:
110
- result = future.result()
111
- if "error" in result:
112
- all_summaries.append(f"Error processing {item}: {result['error']}")
113
- all_json_data.append(json.dumps({"input": item, "error": result['error']}, indent=2))
114
- else:
115
- all_summaries.append(f"Analysis for {os.path.basename(item)}:\n{result['summary']}")
116
- all_json_data.append(result['json_data'])
117
- all_pdf_paths.append(result['pdf_path'])
118
- except Exception as exc:
119
- logger.error(f"Item {item} generated an unexpected exception: {exc}", exc_info=True)
120
- all_summaries.append(f"Error processing {item}: An unexpected error occurred.")
121
- all_json_data.append(json.dumps({"input": item, "error": str(exc)}, indent=2))
122
-
123
- combined_summary = "\n\n---\n\n".join(all_summaries)
124
- combined_json_list = "[\n" + ",\n".join(all_json_data) + "\n]"
125
-
126
- return combined_summary, combined_json_list, all_pdf_paths
127
-
128
- # Gradio interface
129
- with gr.Blocks(title="EvalBot Interview Analysis System", theme=gr.themes.Soft()) as demo:
130
- gr.Markdown("""
131
- # 🎤 EvalBot: Automated Interview Analysis System
132
- Provide multiple audio file URLs or upload multiple audio files to analyze speaker performance.
133
- Supported formats: WAV, MP3, M4A, FLAC (max 300MB per file).
134
- """)
135
- with gr.Row():
136
- with gr.Column():
137
- health_status = gr.Textbox(label="System Status", value=check_health(), interactive=False)
138
- audio_inputs = gr.File(
139
- label="Provide Audio URLs or Upload Files (Multiple allowed)",
140
- type="filepath",
141
- file_count="multiple"
142
- )
143
- submit_btn = gr.Button("Start Analysis", variant="primary")
144
- with gr.Column():
145
- output_summary = gr.Textbox(label="Combined Analysis Summary", interactive=False, lines=10)
146
- output_json = gr.Textbox(label="Detailed Analysis (JSON Array)", interactive=False, lines=20)
147
- pdf_outputs = gr.File(label="Download All Reports", type="filepath", file_count="multiple")
148
-
149
- submit_btn.click(
150
- fn=analyze_multiple_audios,
151
- inputs=audio_inputs,
152
- outputs=[output_summary, output_json, pdf_outputs],
153
- api_name="analyze_multiple_audios"
154
  )
155
 
156
- # Run the interface
157
  if __name__ == "__main__":
158
- demo.launch(server_port=7860, server_name="0.0.0.0")
 
10
  from typing import Tuple, Optional, List, Dict
11
  from concurrent.futures import ThreadPoolExecutor
12
 
13
+ logging.basicConfig(level=logging.INFO)
14
+
15
+ # إعداد مجلد مؤقت للتحميل
16
+ TEMP_DIR = "./temp_files"
17
+ os.makedirs(TEMP_DIR, exist_ok=True)
18
+
 
 
 
 
 
 
 
19
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
20
+ MAX_FILE_SIZE_MB = 300
21
 
22
+ # Health check
23
  def check_health() -> str:
 
24
  try:
25
+ if not os.path.exists(TEMP_DIR):
26
+ os.makedirs(TEMP_DIR)
 
27
  return "System is healthy"
28
  except Exception as e:
29
+ logging.error(f"Health check failed: {e}")
30
+ return "System unhealthy"
31
 
32
+ # Main Processing Function
33
+ def process_audio(file_url: str, user_id: str) -> Tuple[str, str, str]:
 
34
  try:
35
+ # تحميل الملف
36
+ filename = f"{user_id}_{os.path.basename(file_url)}"
37
+ local_path = os.path.join(TEMP_DIR, filename)
38
+
39
+ logging.info(f"Downloading file: {file_url}")
40
+ response = requests.get(file_url, stream=True)
41
+ with open(local_path, 'wb') as f:
42
+ for chunk in response.iter_content(chunk_size=8192):
43
+ f.write(chunk)
44
+
45
+ # التحقق من الامتداد والحجم
46
+ file_ext = Path(local_path).suffix.lower()
47
  if file_ext not in VALID_EXTENSIONS:
48
+ return f" Invalid file type: {file_ext}", "", ""
49
+
50
+ file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
51
+ if file_size_mb > MAX_FILE_SIZE_MB:
52
+ return f"❌ File too large: {file_size_mb:.2f}MB", "", ""
53
+
54
+ # بدء المعالجة
55
+ result = process_interview(local_path)
56
+ if not result:
57
+ return "❌ Processing failed.", "", ""
58
+
59
+ pdf_path = result['pdf_path']
60
+ with open(result['json_path'], 'r') as f:
 
 
 
 
 
 
 
61
  analysis_data = json.load(f)
62
+
63
+ # بناء الـ Summary
64
+ voice = analysis_data['voice_analysis']['interpretation']
65
  summary = (
66
+ f"User ID: {user_id}\n"
67
  f"Speakers: {', '.join(analysis_data['speakers'])}\n"
68
+ f"Duration: {analysis_data['text_analysis']['total_duration']:.2f} sec\n"
69
+ f"Confidence: {voice['confidence_level']}\n"
70
+ f"Anxiety: {voice['anxiety_level']}"
71
  )
72
+
73
  json_data = json.dumps(analysis_data, indent=2)
74
+ return summary, json_data, pdf_path
75
+
 
 
 
 
 
76
  except Exception as e:
77
+ logging.error(f"Error processing audio: {e}", exc_info=True)
78
+ return f" Internal Error: {str(e)}", "", ""
79
+
80
+ # Gradio Interface
81
+ with gr.Blocks(title="EvalBot Audio Analysis") as demo:
82
+ gr.Markdown("# 🎙️ EvalBot Audio Analysis (Single URL Mode)")
83
+
84
+ file_url = gr.Textbox(label="Audio File URL (single)")
85
+ user_id = gr.Textbox(label="User ID")
86
+
87
+ analyze_btn = gr.Button("Analyze Audio")
88
+
89
+ output_summary = gr.Textbox(label="Summary")
90
+ output_json = gr.Textbox(label="JSON Output")
91
+ output_pdf = gr.Textbox(label="PDF Path")
92
+
93
+ analyze_btn.click(
94
+ fn=process_audio,
95
+ inputs=[file_url, user_id],
96
+ outputs=[output_summary, output_json, output_pdf],
97
+ api_name="analyze_single_audio"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
 
 
100
  if __name__ == "__main__":
101
+ demo.launch(server_port=7860, server_name="0.0.0.0")