norhan12 commited on
Commit
f233230
·
verified ·
1 Parent(s): 79c5881

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -63
app.py CHANGED
@@ -4,12 +4,11 @@ import shutil
4
  import json
5
  import requests
6
  import logging
7
-
8
  from fastapi import FastAPI, HTTPException, Body
9
  from fastapi.staticfiles import StaticFiles
10
  from fastapi.responses import FileResponse
11
  from pydantic import BaseModel, HttpUrl
12
- from process_interview import process_interview
13
 
14
  # Logging setup
15
  logging.basicConfig(level=logging.INFO)
@@ -22,129 +21,183 @@ app = FastAPI()
22
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
23
  TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
24
  STATIC_DIR = os.path.join(BASE_DIR, "static")
25
- OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs")
26
  JSON_DIR = os.path.join(OUTPUT_DIR, "json")
27
  PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
28
 
 
29
  for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
30
  os.makedirs(folder, exist_ok=True)
31
 
32
- # Mount static files
33
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
34
 
35
- # Config
36
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
37
  MAX_FILE_SIZE_MB = 300
38
- BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space") # بدون /static في النهاية
39
 
40
- # Models
 
 
 
 
 
 
41
  class ProcessResponse(BaseModel):
 
42
  summary: str
43
  json_url: str
44
  pdf_url: str
45
 
46
  class ProcessAudioRequest(BaseModel):
47
- file_url: HttpUrl
48
- user_id: str
 
49
 
50
- # Helpers
51
  def download_file(file_url: str, dest_path: str):
52
- logger.info(f"Downloading file from {file_url}")
 
53
  try:
54
- resp = requests.get(file_url, stream=True, timeout=60)
55
- resp.raise_for_status()
 
 
 
 
56
  with open(dest_path, "wb") as f:
57
  for chunk in resp.iter_content(chunk_size=8192):
58
- if chunk:
59
  f.write(chunk)
 
 
 
 
60
  except Exception as e:
61
- logger.error(f"Error downloading file: {e}")
62
- raise HTTPException(status_code=400, detail="Failed to download file")
63
 
64
  def validate_file_size(file_path: str):
 
65
  file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
66
  if file_size_mb > MAX_FILE_SIZE_MB:
67
- logger.warning(f"File too large: {file_size_mb} MB")
68
- os.remove(file_path)
69
- raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB")
70
-
71
- def generate_public_url(sub_path: str) -> str:
72
- # هنا مهم جداً ما تضيفش /static مرتين
73
- return f"{BASE_URL}/static/{sub_path}"
74
-
75
- # Main endpoint
 
 
 
 
 
 
 
 
 
 
 
 
76
  @app.post("/process-audio", response_model=ProcessResponse)
77
  async def process_audio(request: ProcessAudioRequest = Body(...)):
 
 
 
 
 
78
  file_url = str(request.file_url)
79
  user_id = request.user_id
80
-
 
81
  file_ext = os.path.splitext(file_url)[1].lower()
82
  if file_ext not in VALID_EXTENSIONS:
83
- logger.error("Invalid file extension")
84
- raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}")
85
-
 
86
  temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
87
  temp_path = os.path.join(TEMP_DIR, temp_filename)
88
-
89
  try:
 
90
  download_file(file_url, temp_path)
 
 
91
  validate_file_size(temp_path)
92
-
93
- logger.info("Processing interview")
94
- result = process_interview(temp_path)
95
-
 
 
 
96
  if not result:
97
- raise HTTPException(status_code=500, detail="Processing failed")
 
98
 
 
99
  json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
100
  pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
101
-
102
- json_dest = os.path.join(JSON_DIR, json_filename)
103
- pdf_dest = os.path.join(PDF_DIR, pdf_filename)
104
-
105
- shutil.copyfile(result['json_path'], json_dest)
106
- shutil.copyfile(result['pdf_path'], pdf_dest)
107
-
108
- with open(result['json_path'], "r") as jf:
 
 
109
  analysis_data = json.load(jf)
110
-
111
- voice = analysis_data.get('voice_analysis', {}).get('interpretation', {})
112
- speakers = analysis_data.get('speakers', [])
113
  total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
114
-
115
- summary = (
116
  f"User ID: {user_id}\n"
117
- f"Speakers: {', '.join(speakers)}\n"
118
  f"Duration: {total_duration:.2f} sec\n"
119
- f"Confidence: {voice.get('confidence_level', 'N/A')}\n"
120
- f"Anxiety: {voice.get('anxiety_level', 'N/A')}"
121
  )
122
-
123
- json_url = generate_public_url(f"outputs/json/{json_filename}")
124
- pdf_url = generate_public_url(f"outputs/pdf/{pdf_filename}")
125
-
126
- logger.info("Processing completed successfully")
127
- return ProcessResponse(summary=summary, json_url=json_url, pdf_url=pdf_url)
128
 
129
  except HTTPException as e:
 
130
  raise e
131
  except Exception as e:
132
- logger.exception("Unexpected error during processing")
133
- raise HTTPException(status_code=500, detail="Internal server error")
 
134
  finally:
 
135
  if os.path.exists(temp_path):
136
  os.remove(temp_path)
137
-
138
- # Serve output files (optional if using StaticFiles)
139
- @app.get("/outputs/json/{filename}")
 
 
140
  async def get_json_file(filename: str):
 
141
  file_path = os.path.join(JSON_DIR, filename)
142
  if not os.path.exists(file_path):
143
  raise HTTPException(status_code=404, detail="JSON file not found")
144
  return FileResponse(file_path, media_type="application/json", filename=filename)
145
 
146
- @app.get("/outputs/pdf/{filename}")
147
  async def get_pdf_file(filename: str):
 
148
  file_path = os.path.join(PDF_DIR, filename)
149
  if not os.path.exists(file_path):
150
  raise HTTPException(status_code=404, detail="PDF file not found")
 
4
  import json
5
  import requests
6
  import logging
 
7
  from fastapi import FastAPI, HTTPException, Body
8
  from fastapi.staticfiles import StaticFiles
9
  from fastapi.responses import FileResponse
10
  from pydantic import BaseModel, HttpUrl
11
+ from process_interview import process_interview # Assuming process_interview is in a separate file
12
 
13
  # Logging setup
14
  logging.basicConfig(level=logging.INFO)
 
21
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
22
  TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
23
  STATIC_DIR = os.path.join(BASE_DIR, "static")
24
+ OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs") # Outputs are within static to be servable
25
  JSON_DIR = os.path.join(OUTPUT_DIR, "json")
26
  PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
27
 
28
+ # Create necessary directories
29
  for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
30
  os.makedirs(folder, exist_ok=True)
31
 
32
+ # Mount static files directory to be accessible via /static URL
33
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
34
 
35
+ # Configuration Constants
36
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
37
  MAX_FILE_SIZE_MB = 300
 
38
 
39
+ # Base URL for the deployed application (e.g., from Hugging Face Space or ngrok)
40
+ # This should NOT include /static or any subpaths.
41
+ # Example: https://evalbot-audio-evalbot.hf.space
42
+ # Example: https://your-ngrok-url.ngrok-free.app
43
+ BASE_URL = os.getenv("BASE_URL", "http://localhost:7860") # Default for local testing
44
+
45
+ # Pydantic Models for Request/Response validation
46
  class ProcessResponse(BaseModel):
47
+ """Response model for the /process-audio endpoint."""
48
  summary: str
49
  json_url: str
50
  pdf_url: str
51
 
52
  class ProcessAudioRequest(BaseModel):
53
+ """Request model for the /process-audio endpoint."""
54
+ file_url: HttpUrl # URL of the audio file to process
55
+ user_id: str # Identifier for the user submitting the audio
56
 
57
+ # Helper Functions
58
  def download_file(file_url: str, dest_path: str):
59
+ """Downloads a file from a given URL to a specified destination path."""
60
+ logger.info(f"Attempting to download file from {file_url}")
61
  try:
62
+ resp = requests.get(file_url, stream=True, timeout=60) # Increased timeout
63
+ resp.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
64
+
65
+ # Ensure the destination directory exists
66
+ os.makedirs(os.path.dirname(dest_path), exist_ok=True)
67
+
68
  with open(dest_path, "wb") as f:
69
  for chunk in resp.iter_content(chunk_size=8192):
70
+ if chunk: # Filter out keep-alive new chunks
71
  f.write(chunk)
72
+ logger.info(f"File downloaded successfully to {dest_path}")
73
+ except requests.exceptions.RequestException as e:
74
+ logger.error(f"Error downloading file from {file_url}: {e}")
75
+ raise HTTPException(status_code=400, detail=f"Failed to download file from URL: {e}")
76
  except Exception as e:
77
+ logger.error(f"Unexpected error during file download: {e}", exc_info=True)
78
+ raise HTTPException(status_code=500, detail="Internal server error during file download")
79
 
80
  def validate_file_size(file_path: str):
81
+ """Validates the size of a file against MAX_FILE_SIZE_MB."""
82
  file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
83
  if file_size_mb > MAX_FILE_SIZE_MB:
84
+ logger.warning(f"File too large: {file_size_mb:.2f} MB. Max allowed: {MAX_FILE_SIZE_MB} MB")
85
+ os.remove(file_path) # Clean up the oversized file
86
+ raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB. Max size: {MAX_FILE_SIZE_MB} MB")
87
+
88
+ def generate_public_url(full_local_path: str) -> str:
89
+ """
90
+ Generates a public URL for a locally stored file.
91
+ Assumes the file is within the STATIC_DIR.
92
+ """
93
+ # Calculate the path relative to STATIC_DIR
94
+ # Example: if STATIC_DIR is /app/static and full_local_path is /app/static/outputs/json/file.json
95
+ # relative_path will be outputs/json/file.json
96
+ relative_path = os.path.relpath(full_local_path, STATIC_DIR)
97
+
98
+ # Replace backslashes with forward slashes for web compatibility (Windows paths)
99
+ web_path = relative_path.replace(os.path.sep, "/")
100
+
101
+ # Construct the full public URL using the BASE_URL and the mounted static path
102
+ return f"{BASE_URL}/static/{web_path}"
103
+
104
+ # Main API Endpoint
105
  @app.post("/process-audio", response_model=ProcessResponse)
106
  async def process_audio(request: ProcessAudioRequest = Body(...)):
107
+ """
108
+ Endpoint to process an audio file from a given URL.
109
+ Downloads the audio, processes it through the interview analysis pipeline,
110
+ and returns URLs for the generated JSON analysis and PDF report.
111
+ """
112
  file_url = str(request.file_url)
113
  user_id = request.user_id
114
+
115
+ # Validate file extension based on URL
116
  file_ext = os.path.splitext(file_url)[1].lower()
117
  if file_ext not in VALID_EXTENSIONS:
118
+ logger.error(f"Invalid file extension: {file_ext}. Supported: {VALID_EXTENSIONS}")
119
+ raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}. Supported formats: {', '.join(VALID_EXTENSIONS)}")
120
+
121
+ # Create a unique temporary path for the downloaded audio file
122
  temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
123
  temp_path = os.path.join(TEMP_DIR, temp_filename)
124
+
125
  try:
126
+ # 1. Download the audio file
127
  download_file(file_url, temp_path)
128
+
129
+ # 2. Validate downloaded file size
130
  validate_file_size(temp_path)
131
+
132
+ logger.info(f"Starting interview processing for user: {user_id} from {temp_path}")
133
+
134
+ # 3. Process the interview audio using the external process_interview module
135
+ # process_interview returns a dictionary with local paths to the generated JSON and PDF
136
+ result = process_interview(temp_path)
137
+
138
  if not result:
139
+ logger.error(f"process_interview returned no result for {user_id}")
140
+ raise HTTPException(status_code=500, detail="Audio processing failed: No result from analysis pipeline.")
141
 
142
+ # 4. Generate unique filenames for outputs and copy them to static outputs directory
143
  json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
144
  pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
145
+
146
+ json_dest_path = os.path.join(JSON_DIR, json_filename)
147
+ pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
148
+
149
+ shutil.copyfile(result['json_path'], json_dest_path)
150
+ shutil.copyfile(result['pdf_path'], pdf_dest_path)
151
+ logger.info(f"Analysis outputs copied to: {json_dest_path} and {pdf_dest_path}")
152
+
153
+ # 5. Load analysis data for summary and generate public URLs
154
+ with open(json_dest_path, "r") as jf: # Use json_dest_path to read the *copied* file
155
  analysis_data = json.load(jf)
156
+
157
+ voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
158
+ speakers_list = analysis_data.get('speakers', [])
159
  total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
160
+
161
+ summary_text = (
162
  f"User ID: {user_id}\n"
163
+ f"Speakers: {', '.join(speakers_list)}\n"
164
  f"Duration: {total_duration:.2f} sec\n"
165
+ f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
166
+ f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
167
  )
168
+
169
+ json_public_url = generate_public_url(json_dest_path)
170
+ pdf_public_url = generate_public_url(pdf_dest_path)
171
+
172
+ logger.info("Audio processing and URL generation completed successfully.")
173
+ return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
174
 
175
  except HTTPException as e:
176
+ # Re-raise HTTPException directly as it already contains appropriate status/detail
177
  raise e
178
  except Exception as e:
179
+ # Catch any other unexpected errors during the process
180
+ logger.exception(f"Unexpected error during audio processing for user {user_id}: {e}")
181
+ raise HTTPException(status_code=500, detail=f"Internal server error during processing: {e}")
182
  finally:
183
+ # Clean up the temporary downloaded audio file
184
  if os.path.exists(temp_path):
185
  os.remove(temp_path)
186
+ logger.info(f"Cleaned up temporary file: {temp_path}")
187
+
188
+ # Routes to serve output files directly if needed (though /static mount handles this)
189
+ # These are redundant if /static mount works correctly, but can be kept for explicit control or debugging.
190
+ @app.get("/outputs/json/{filename}", response_class=FileResponse)
191
  async def get_json_file(filename: str):
192
+ """Serves a JSON analysis file from the outputs directory."""
193
  file_path = os.path.join(JSON_DIR, filename)
194
  if not os.path.exists(file_path):
195
  raise HTTPException(status_code=404, detail="JSON file not found")
196
  return FileResponse(file_path, media_type="application/json", filename=filename)
197
 
198
+ @app.get("/outputs/pdf/{filename}", response_class=FileResponse)
199
  async def get_pdf_file(filename: str):
200
+ """Serves a PDF report file from the outputs directory."""
201
  file_path = os.path.join(PDF_DIR, filename)
202
  if not os.path.exists(file_path):
203
  raise HTTPException(status_code=404, detail="PDF file not found")