norhan12 commited on
Commit
e707fd7
·
verified ·
1 Parent(s): 3cd4231

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -96
app.py CHANGED
@@ -8,7 +8,7 @@ from fastapi import FastAPI, HTTPException, Body
8
  from fastapi.staticfiles import StaticFiles
9
  from fastapi.responses import FileResponse
10
  from pydantic import BaseModel, HttpUrl
11
- from process_interview import process_interview # Assuming process_interview is in a separate file
12
 
13
  # Logging setup
14
  logging.basicConfig(level=logging.INFO)
@@ -21,7 +21,7 @@ app = FastAPI()
21
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
22
  TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
23
  STATIC_DIR = os.path.join(BASE_DIR, "static")
24
- OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs") # Outputs are within static to be servable
25
  JSON_DIR = os.path.join(OUTPUT_DIR, "json")
26
  PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
27
 
@@ -29,135 +29,94 @@ PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
29
  for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
30
  os.makedirs(folder, exist_ok=True)
31
 
32
- # Mount static files directory to be accessible via /static URL
33
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
34
 
35
  # Configuration Constants
36
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
37
  MAX_FILE_SIZE_MB = 300
38
 
39
- # Base URL for the deployed application (e.g., from Hugging Face Space or ngrok)
40
- # This should NOT include /static or any subpaths.
41
- # Example: https://evalbot-audio-evalbot.hf.space
42
- # Example: https://your-ngrok-url.ngrok-free.app
43
- BASE_URL = os.getenv("BASE_URL", "http://localhost:7860") # Default for local testing
44
 
45
- # Pydantic Models for Request/Response validation
46
  class ProcessResponse(BaseModel):
47
- """Response model for the /process-audio endpoint."""
48
  summary: str
49
  json_url: str
50
  pdf_url: str
51
 
52
  class ProcessAudioRequest(BaseModel):
53
- """Request model for the /process-audio endpoint."""
54
- file_url: HttpUrl # URL of the audio file to process
55
- user_id: str # Identifier for the user submitting the audio
56
 
57
  # Helper Functions
58
  def download_file(file_url: str, dest_path: str):
59
- """Downloads a file from a given URL to a specified destination path."""
60
- logger.info(f"Attempting to download file from {file_url}")
61
  try:
62
- resp = requests.get(file_url, stream=True, timeout=60) # Increased timeout
63
- resp.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
64
-
65
- # Ensure the destination directory exists
66
  os.makedirs(os.path.dirname(dest_path), exist_ok=True)
67
-
68
  with open(dest_path, "wb") as f:
69
  for chunk in resp.iter_content(chunk_size=8192):
70
- if chunk: # Filter out keep-alive new chunks
71
  f.write(chunk)
72
- logger.info(f"File downloaded successfully to {dest_path}")
73
  except requests.exceptions.RequestException as e:
74
- logger.error(f"Error downloading file from {file_url}: {e}")
75
- raise HTTPException(status_code=400, detail=f"Failed to download file from URL: {e}")
76
  except Exception as e:
77
- logger.error(f"Unexpected error during file download: {e}", exc_info=True)
78
- raise HTTPException(status_code=500, detail="Internal server error during file download")
79
 
80
  def validate_file_size(file_path: str):
81
- """Validates the size of a file against MAX_FILE_SIZE_MB."""
82
  file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
83
  if file_size_mb > MAX_FILE_SIZE_MB:
84
- logger.warning(f"File too large: {file_size_mb:.2f} MB. Max allowed: {MAX_FILE_SIZE_MB} MB")
85
- os.remove(file_path) # Clean up the oversized file
86
- raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB. Max size: {MAX_FILE_SIZE_MB} MB")
87
-
88
- def generate_public_url(full_local_path: str) -> str:
89
- """
90
- Generates a public URL for a locally stored file.
91
- Assumes the file is within the STATIC_DIR.
92
- """
93
- # Calculate the path relative to STATIC_DIR
94
- # Example: if STATIC_DIR is /app/static and full_local_path is /app/static/outputs/json/file.json
95
- # relative_path will be outputs/json/file.json
96
- relative_path = os.path.relpath(full_local_path, STATIC_DIR)
97
-
98
- # Replace backslashes with forward slashes for web compatibility (Windows paths)
99
- web_path = relative_path.replace(os.path.sep, "/")
100
-
101
- # Construct the full public URL using the BASE_URL and the mounted static path
102
- return f"{BASE_URL}/static/{web_path}"
103
-
104
- # Main API Endpoint
105
  @app.post("/process-audio", response_model=ProcessResponse)
106
  async def process_audio(request: ProcessAudioRequest = Body(...)):
107
- """
108
- Endpoint to process an audio file from a given URL.
109
- Downloads the audio, processes it through the interview analysis pipeline,
110
- and returns URLs for the generated JSON analysis and PDF report.
111
- """
112
  file_url = str(request.file_url)
113
  user_id = request.user_id
114
-
115
- # Validate file extension based on URL
116
  file_ext = os.path.splitext(file_url)[1].lower()
117
  if file_ext not in VALID_EXTENSIONS:
118
- logger.error(f"Invalid file extension: {file_ext}. Supported: {VALID_EXTENSIONS}")
119
- raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}. Supported formats: {', '.join(VALID_EXTENSIONS)}")
120
-
121
- # Create a unique temporary path for the downloaded audio file
122
  temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
123
  temp_path = os.path.join(TEMP_DIR, temp_filename)
124
-
125
  try:
126
- # 1. Download the audio file
127
  download_file(file_url, temp_path)
128
-
129
- # 2. Validate downloaded file size
130
  validate_file_size(temp_path)
131
-
132
- logger.info(f"Starting interview processing for user: {user_id} from {temp_path}")
133
-
134
- # 3. Process the interview audio using the external process_interview module
135
- # process_interview returns a dictionary with local paths to the generated JSON and PDF
136
- result = process_interview(temp_path)
137
-
138
  if not result:
139
- logger.error(f"process_interview returned no result for {user_id}")
140
- raise HTTPException(status_code=500, detail="Audio processing failed: No result from analysis pipeline.")
141
 
142
- # 4. Generate unique filenames for outputs and copy them to static outputs directory
143
  json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
144
  pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
145
-
146
  json_dest_path = os.path.join(JSON_DIR, json_filename)
147
  pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
148
-
149
  shutil.copyfile(result['json_path'], json_dest_path)
150
  shutil.copyfile(result['pdf_path'], pdf_dest_path)
151
- logger.info(f"Analysis outputs copied to: {json_dest_path} and {pdf_dest_path}")
152
 
153
- # 5. Load analysis data for summary and generate public URLs
154
- with open(json_dest_path, "r") as jf: # Use json_dest_path to read the *copied* file
155
  analysis_data = json.load(jf)
156
-
157
  voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
158
  speakers_list = analysis_data.get('speakers', [])
159
  total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
160
-
161
  summary_text = (
162
  f"User ID: {user_id}\n"
163
  f"Speakers: {', '.join(speakers_list)}\n"
@@ -165,31 +124,26 @@ async def process_audio(request: ProcessAudioRequest = Body(...)):
165
  f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
166
  f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
167
  )
168
-
169
- json_public_url = generate_public_url(json_dest_path)
170
- pdf_public_url = generate_public_url(pdf_dest_path)
171
-
172
- logger.info("Audio processing and URL generation completed successfully.")
173
  return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
174
 
175
  except HTTPException as e:
176
- # Re-raise HTTPException directly as it already contains appropriate status/detail
177
  raise e
178
  except Exception as e:
179
- # Catch any other unexpected errors during the process
180
- logger.exception(f"Unexpected error during audio processing for user {user_id}: {e}")
181
- raise HTTPException(status_code=500, detail=f"Internal server error during processing: {e}")
182
  finally:
183
- # Clean up the temporary downloaded audio file
184
  if os.path.exists(temp_path):
185
  os.remove(temp_path)
186
- logger.info(f"Cleaned up temporary file: {temp_path}")
187
-
188
- # Routes to serve output files directly if needed (though /static mount handles this)
189
- # These are redundant if /static mount works correctly, but can be kept for explicit control or debugging.
190
  @app.get("/outputs/json/{filename}", response_class=FileResponse)
191
  async def get_json_file(filename: str):
192
- """Serves a JSON analysis file from the outputs directory."""
193
  file_path = os.path.join(JSON_DIR, filename)
194
  if not os.path.exists(file_path):
195
  raise HTTPException(status_code=404, detail="JSON file not found")
@@ -197,7 +151,6 @@ async def get_json_file(filename: str):
197
 
198
  @app.get("/outputs/pdf/{filename}", response_class=FileResponse)
199
  async def get_pdf_file(filename: str):
200
- """Serves a PDF report file from the outputs directory."""
201
  file_path = os.path.join(PDF_DIR, filename)
202
  if not os.path.exists(file_path):
203
  raise HTTPException(status_code=404, detail="PDF file not found")
 
8
  from fastapi.staticfiles import StaticFiles
9
  from fastapi.responses import FileResponse
10
  from pydantic import BaseModel, HttpUrl
11
+ from process_interview import process_interview # Assuming process_interview is in a separate file
12
 
13
  # Logging setup
14
  logging.basicConfig(level=logging.INFO)
 
21
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
22
  TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
23
  STATIC_DIR = os.path.join(BASE_DIR, "static")
24
+ OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs")
25
  JSON_DIR = os.path.join(OUTPUT_DIR, "json")
26
  PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
27
 
 
29
  for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
30
  os.makedirs(folder, exist_ok=True)
31
 
32
+ # Mount static files directory
33
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
34
 
35
  # Configuration Constants
36
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
37
  MAX_FILE_SIZE_MB = 300
38
 
39
+ # Base URL (set your domain or huggingface space URL here)
40
+ BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space")
 
 
 
41
 
42
+ # Pydantic Models
43
  class ProcessResponse(BaseModel):
 
44
  summary: str
45
  json_url: str
46
  pdf_url: str
47
 
48
  class ProcessAudioRequest(BaseModel):
49
+ file_url: HttpUrl
50
+ user_id: str
 
51
 
52
  # Helper Functions
53
  def download_file(file_url: str, dest_path: str):
54
+ logger.info(f"Downloading file from {file_url}")
 
55
  try:
56
+ resp = requests.get(file_url, stream=True, timeout=60)
57
+ resp.raise_for_status()
 
 
58
  os.makedirs(os.path.dirname(dest_path), exist_ok=True)
 
59
  with open(dest_path, "wb") as f:
60
  for chunk in resp.iter_content(chunk_size=8192):
61
+ if chunk:
62
  f.write(chunk)
63
+ logger.info(f"File downloaded to {dest_path}")
64
  except requests.exceptions.RequestException as e:
65
+ logger.error(f"Download failed: {e}")
66
+ raise HTTPException(status_code=400, detail=f"Failed to download file: {e}")
67
  except Exception as e:
68
+ logger.error(f"Unexpected download error: {e}", exc_info=True)
69
+ raise HTTPException(status_code=500, detail="Internal server error during download")
70
 
71
  def validate_file_size(file_path: str):
 
72
  file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
73
  if file_size_mb > MAX_FILE_SIZE_MB:
74
+ os.remove(file_path)
75
+ logger.warning(f"File too large: {file_size_mb:.2f} MB")
76
+ raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB (max: {MAX_FILE_SIZE_MB} MB)")
77
+
78
+ # Main Endpoint
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  @app.post("/process-audio", response_model=ProcessResponse)
80
  async def process_audio(request: ProcessAudioRequest = Body(...)):
 
 
 
 
 
81
  file_url = str(request.file_url)
82
  user_id = request.user_id
83
+
 
84
  file_ext = os.path.splitext(file_url)[1].lower()
85
  if file_ext not in VALID_EXTENSIONS:
86
+ logger.error(f"Invalid file extension: {file_ext}")
87
+ raise HTTPException(status_code=400, detail=f"Invalid extension: {file_ext}. Supported: {', '.join(VALID_EXTENSIONS)}")
88
+
 
89
  temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
90
  temp_path = os.path.join(TEMP_DIR, temp_filename)
91
+
92
  try:
 
93
  download_file(file_url, temp_path)
 
 
94
  validate_file_size(temp_path)
95
+
96
+ logger.info(f"Processing interview for user: {user_id}")
97
+
98
+ result = process_interview(temp_path)
 
 
 
99
  if not result:
100
+ logger.error("process_interview returned no result")
101
+ raise HTTPException(status_code=500, detail="Audio processing failed")
102
 
 
103
  json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
104
  pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
105
+
106
  json_dest_path = os.path.join(JSON_DIR, json_filename)
107
  pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
108
+
109
  shutil.copyfile(result['json_path'], json_dest_path)
110
  shutil.copyfile(result['pdf_path'], pdf_dest_path)
111
+ logger.info(f"Files saved: {json_dest_path}, {pdf_dest_path}")
112
 
113
+ with open(json_dest_path, "r") as jf:
 
114
  analysis_data = json.load(jf)
115
+
116
  voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
117
  speakers_list = analysis_data.get('speakers', [])
118
  total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
119
+
120
  summary_text = (
121
  f"User ID: {user_id}\n"
122
  f"Speakers: {', '.join(speakers_list)}\n"
 
124
  f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
125
  f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
126
  )
127
+
128
+ json_public_url = f"{BASE_URL}/static/outputs/json/{json_filename}"
129
+ pdf_public_url = f"{BASE_URL}/static/outputs/pdf/{pdf_filename}"
130
+
131
+ logger.info("Processing completed successfully.")
132
  return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
133
 
134
  except HTTPException as e:
 
135
  raise e
136
  except Exception as e:
137
+ logger.exception(f"Unexpected error: {e}")
138
+ raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
 
139
  finally:
 
140
  if os.path.exists(temp_path):
141
  os.remove(temp_path)
142
+ logger.info(f"Temporary file cleaned: {temp_path}")
143
+
144
+ # Serve files directly (optional explicit routes)
 
145
  @app.get("/outputs/json/{filename}", response_class=FileResponse)
146
  async def get_json_file(filename: str):
 
147
  file_path = os.path.join(JSON_DIR, filename)
148
  if not os.path.exists(file_path):
149
  raise HTTPException(status_code=404, detail="JSON file not found")
 
151
 
152
  @app.get("/outputs/pdf/{filename}", response_class=FileResponse)
153
  async def get_pdf_file(filename: str):
 
154
  file_path = os.path.join(PDF_DIR, filename)
155
  if not os.path.exists(file_path):
156
  raise HTTPException(status_code=404, detail="PDF file not found")