norhan12 commited on
Commit
93878a6
·
verified ·
1 Parent(s): e707fd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -109
app.py CHANGED
@@ -1,157 +1,192 @@
 
 
1
  import os
2
  import uuid
3
  import shutil
4
  import json
 
 
 
5
  import requests
6
- import logging
7
- from fastapi import FastAPI, HTTPException, Body
8
- from fastapi.staticfiles import StaticFiles
9
- from fastapi.responses import FileResponse
10
- from pydantic import BaseModel, HttpUrl
11
- from process_interview import process_interview # Assuming process_interview is in a separate file
12
 
13
- # Logging setup
14
- logging.basicConfig(level=logging.INFO)
15
- logger = logging.getLogger("EvalBot-Audio-Processor")
16
 
17
  # Initialize FastAPI app
18
  app = FastAPI()
19
 
20
- # Directories
21
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
22
- TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
23
- STATIC_DIR = os.path.join(BASE_DIR, "static")
24
- OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs")
25
- JSON_DIR = os.path.join(OUTPUT_DIR, "json")
26
- PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
27
 
28
- # Create necessary directories
29
- for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
30
- os.makedirs(folder, exist_ok=True)
31
 
32
- # Mount static files directory
 
33
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
34
 
35
- # Configuration Constants
36
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
37
  MAX_FILE_SIZE_MB = 300
38
 
39
- # Base URL (set your domain or huggingface space URL here)
 
 
 
40
  BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space")
41
 
42
- # Pydantic Models
43
  class ProcessResponse(BaseModel):
 
 
 
 
44
  summary: str
45
  json_url: str
46
  pdf_url: str
47
 
48
- class ProcessAudioRequest(BaseModel):
49
- file_url: HttpUrl
50
- user_id: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # Helper Functions
53
- def download_file(file_url: str, dest_path: str):
54
- logger.info(f"Downloading file from {file_url}")
55
- try:
56
- resp = requests.get(file_url, stream=True, timeout=60)
57
- resp.raise_for_status()
58
- os.makedirs(os.path.dirname(dest_path), exist_ok=True)
59
- with open(dest_path, "wb") as f:
60
- for chunk in resp.iter_content(chunk_size=8192):
61
- if chunk:
62
- f.write(chunk)
63
- logger.info(f"File downloaded to {dest_path}")
64
- except requests.exceptions.RequestException as e:
65
- logger.error(f"Download failed: {e}")
66
- raise HTTPException(status_code=400, detail=f"Failed to download file: {e}")
67
- except Exception as e:
68
- logger.error(f"Unexpected download error: {e}", exc_info=True)
69
- raise HTTPException(status_code=500, detail="Internal server error during download")
70
-
71
- def validate_file_size(file_path: str):
72
- file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
73
- if file_size_mb > MAX_FILE_SIZE_MB:
74
- os.remove(file_path)
75
- logger.warning(f"File too large: {file_size_mb:.2f} MB")
76
- raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB (max: {MAX_FILE_SIZE_MB} MB)")
77
-
78
- # Main Endpoint
79
  @app.post("/process-audio", response_model=ProcessResponse)
80
- async def process_audio(request: ProcessAudioRequest = Body(...)):
81
- file_url = str(request.file_url)
82
- user_id = request.user_id
83
-
84
- file_ext = os.path.splitext(file_url)[1].lower()
85
- if file_ext not in VALID_EXTENSIONS:
86
- logger.error(f"Invalid file extension: {file_ext}")
87
- raise HTTPException(status_code=400, detail=f"Invalid extension: {file_ext}. Supported: {', '.join(VALID_EXTENSIONS)}")
88
-
89
- temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
90
- temp_path = os.path.join(TEMP_DIR, temp_filename)
91
-
92
  try:
93
- download_file(file_url, temp_path)
94
- validate_file_size(temp_path)
95
-
96
- logger.info(f"Processing interview for user: {user_id}")
97
-
98
- result = process_interview(temp_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  if not result:
100
- logger.error("process_interview returned no result")
101
- raise HTTPException(status_code=500, detail="Audio processing failed")
102
-
103
- json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
104
- pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
105
-
106
- json_dest_path = os.path.join(JSON_DIR, json_filename)
107
- pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
108
-
109
- shutil.copyfile(result['json_path'], json_dest_path)
110
- shutil.copyfile(result['pdf_path'], pdf_dest_path)
111
- logger.info(f"Files saved: {json_dest_path}, {pdf_dest_path}")
112
-
113
- with open(json_dest_path, "r") as jf:
 
 
 
 
 
 
 
 
 
 
 
114
  analysis_data = json.load(jf)
115
 
116
- voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
117
- speakers_list = analysis_data.get('speakers', [])
118
  total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
119
 
120
- summary_text = (
121
  f"User ID: {user_id}\n"
122
- f"Speakers: {', '.join(speakers_list)}\n"
123
  f"Duration: {total_duration:.2f} sec\n"
124
- f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
125
- f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
126
  )
127
 
128
- json_public_url = f"{BASE_URL}/static/outputs/json/{json_filename}"
129
- pdf_public_url = f"{BASE_URL}/static/outputs/pdf/{pdf_filename}"
 
 
 
 
130
 
131
- logger.info("Processing completed successfully.")
132
- return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
133
 
134
  except HTTPException as e:
 
135
  raise e
136
  except Exception as e:
137
- logger.exception(f"Unexpected error: {e}")
138
- raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
139
- finally:
140
- if os.path.exists(temp_path):
141
- os.remove(temp_path)
142
- logger.info(f"Temporary file cleaned: {temp_path}")
143
-
144
- # Serve files directly (optional explicit routes)
145
- @app.get("/outputs/json/{filename}", response_class=FileResponse)
146
- async def get_json_file(filename: str):
147
- file_path = os.path.join(JSON_DIR, filename)
 
 
 
 
 
 
148
  if not os.path.exists(file_path):
149
  raise HTTPException(status_code=404, detail="JSON file not found")
150
  return FileResponse(file_path, media_type="application/json", filename=filename)
151
 
152
- @app.get("/outputs/pdf/{filename}", response_class=FileResponse)
153
- async def get_pdf_file(filename: str):
154
- file_path = os.path.join(PDF_DIR, filename)
 
 
 
 
155
  if not os.path.exists(file_path):
156
  raise HTTPException(status_code=404, detail="PDF file not found")
157
  return FileResponse(file_path, media_type="application/pdf", filename=filename)
 
1
+
2
+
3
  import os
4
  import uuid
5
  import shutil
6
  import json
7
+ from fastapi import FastAPI, HTTPException, Query
8
+ from fastapi.responses import JSONResponse, FileResponse
9
+ from pydantic import BaseModel
10
  import requests
11
+ from process_interview import process_interview # Ensure process_interview function is available
 
 
 
 
 
12
 
13
+ from fastapi.staticfiles import StaticFiles
 
 
14
 
15
  # Initialize FastAPI app
16
  app = FastAPI()
17
 
18
+ # --- Directory Setup ---
19
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # Get the directory of the current script
20
+ TEMP_DIR = os.path.join(BASE_DIR, "temp_files") # Temporary storage for downloaded audio files
21
+ STATIC_DIR = os.path.join(BASE_DIR, "static") # Root directory for all static assets
22
+ OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs") # Subdirectory within static for generated reports
 
 
23
 
24
+ # Create directories if they don't exist
25
+ os.makedirs(TEMP_DIR, exist_ok=True)
26
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
27
 
28
+ # Mount the 'static' directory to be served at the '/static' URL path.
29
+ # This means files in ./static/outputs/filename will be accessible at /static/outputs/filename
30
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
31
 
32
+ # --- Configuration Constants ---
33
  VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
34
  MAX_FILE_SIZE_MB = 300
35
 
36
+ # BASE_URL should be the root URL of your deployed application.
37
+ # Example: "https://evalbot-audio-evalbot.hf.space" when deployed on Hugging Face.
38
+ # Example: "http://localhost:8000" when running locally with Uvicorn.
39
+ # It should NOT include "/static" or any subpaths.
40
  BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space")
41
 
42
+ # --- Pydantic Models ---
43
  class ProcessResponse(BaseModel):
44
+ """
45
+ Defines the structure of the response returned by the /process-audio endpoint.
46
+ Includes a summary of the analysis and public URLs to the detailed JSON and PDF reports.
47
+ """
48
  summary: str
49
  json_url: str
50
  pdf_url: str
51
 
52
+ # --- Helper Function for URL Generation ---
53
+ def generate_public_output_url(filename: str, file_type: str) -> str:
54
+ """
55
+ Constructs a public URL for a generated output file (JSON or PDF).
56
+ The files are stored in STATIC_DIR/outputs/.
57
+ """
58
+ # The path relative to the mounted /static directory will be "outputs/json/filename" or "outputs/pdf/filename"
59
+ # We join it directly to BASE_URL/static/
60
+ return f"{BASE_URL}/static/outputs/{file_type}/{filename}"
61
+
62
+ # --- API Endpoints ---
63
+ @app.get("/")
64
+ async def root():
65
+ """
66
+ Root endpoint for the API. Returns a simple message to confirm the API is running.
67
+ """
68
+ return {"message": "EvalBot API is running"}
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  @app.post("/process-audio", response_model=ProcessResponse)
71
+ async def process_audio(
72
+ file_url: str = Query(..., description="URL of the audio file to be processed"),
73
+ user_id: str = Query(..., description="Unique identifier for the user")
74
+ ):
75
+ """
76
+ Main endpoint to process an audio file.
77
+ Downloads the audio, performs interview analysis using `process_interview`,
78
+ saves the generated reports, and returns their public URLs.
79
+ """
 
 
 
80
  try:
81
+ # Validate audio file extension
82
+ file_ext = os.path.splitext(file_url)[1].lower()
83
+ if file_ext not in VALID_EXTENSIONS:
84
+ raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}. Supported: {', '.join(VALID_EXTENSIONS)}")
85
+
86
+ # Generate a unique temporary filename for the downloaded audio
87
+ local_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
88
+ local_path = os.path.join(TEMP_DIR, local_filename)
89
+
90
+ # Download the audio file from the provided URL
91
+ try:
92
+ resp = requests.get(file_url, stream=True, timeout=30)
93
+ resp.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
94
+ with open(local_path, "wb") as f:
95
+ for chunk in resp.iter_content(chunk_size=8192):
96
+ if chunk: # Write only non-empty chunks
97
+ f.write(chunk)
98
+ except requests.exceptions.RequestException as e:
99
+ raise HTTPException(status_code=400, detail=f"Failed to download the file from URL: {e}")
100
+
101
+ # Validate the size of the downloaded file
102
+ file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
103
+ if file_size_mb > MAX_FILE_SIZE_MB:
104
+ os.remove(local_path) # Clean up the oversized file
105
+ raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB. Max allowed: {MAX_FILE_SIZE_MB} MB")
106
+
107
+ # Process the interview audio using the `process_interview` module
108
+ result = process_interview(local_path)
109
  if not result:
110
+ os.remove(local_path) # Clean up temporary audio file
111
+ raise HTTPException(status_code=500, detail="Audio processing failed: `process_interview` returned no result.")
112
+
113
+ # Get the source paths of the generated JSON and PDF reports from `process_interview`
114
+ json_src_path = result['json_path']
115
+ pdf_src_path = result['pdf_path']
116
+
117
+ # Generate unique destination filenames for the reports within the public 'static/outputs' directory
118
+ json_dest_name = f"{user_id}_{uuid.uuid4().hex}.json"
119
+ pdf_dest_name = f"{user_id}_{uuid.uuid4().hex}.pdf"
120
+
121
+ # Define the full destination paths for copying the reports
122
+ json_dest_full_path = os.path.join(OUTPUT_DIR, "json", json_dest_name) # Ensure sub-folders
123
+ pdf_dest_full_path = os.path.join(OUTPUT_DIR, "pdf", pdf_dest_name) # Ensure sub-folders
124
+
125
+ # Create subdirectories if they don't exist
126
+ os.makedirs(os.path.dirname(json_dest_full_path), exist_ok=True)
127
+ os.makedirs(os.path.dirname(pdf_dest_full_path), exist_ok=True)
128
+
129
+ # Copy the generated reports to their final public locations
130
+ shutil.copyfile(json_src_path, json_dest_full_path)
131
+ shutil.copyfile(pdf_src_path, pdf_dest_full_path)
132
+
133
+ # Read analysis data from the JSON report to create the summary for the response
134
+ with open(json_src_path, "r") as jf: # Read from the source path, as it's guaranteed to be complete
135
  analysis_data = json.load(jf)
136
 
137
+ voice = analysis_data.get('voice_analysis', {}).get('interpretation', {})
138
+ speakers = analysis_data.get('speakers', [])
139
  total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
140
 
141
+ summary = (
142
  f"User ID: {user_id}\n"
143
+ f"Speakers: {', '.join(speakers)}\n"
144
  f"Duration: {total_duration:.2f} sec\n"
145
+ f"Confidence: {voice.get('confidence_level', 'N/A')}\n"
146
+ f"Anxiety: {voice.get('anxiety_level', 'N/A')}"
147
  )
148
 
149
+ # Generate the public URLs using the helper function
150
+ json_url = generate_public_output_url(json_dest_name, "json")
151
+ pdf_url = generate_public_output_url(pdf_dest_name, "pdf")
152
+
153
+ # Clean up the temporary downloaded audio file
154
+ os.remove(local_path)
155
 
156
+ return ProcessResponse(summary=summary, json_url=json_url, pdf_url=pdf_url)
 
157
 
158
  except HTTPException as e:
159
+ # Re-raise explicit HTTPExceptions (e.g., 400 for bad requests)
160
  raise e
161
  except Exception as e:
162
+ # Catch any other unexpected errors and return a 500 Internal Server Error
163
+ # Log the full traceback for debugging
164
+ import traceback
165
+ traceback.print_exc()
166
+ raise HTTPException(status_code=500, detail=f"An internal server error occurred: {str(e)}")
167
+
168
+ # --- GET Endpoints for Direct File Access ---
169
+ # These endpoints allow direct access to the generated JSON and PDF reports.
170
+ # The URL paths are designed to match how files are mounted by StaticFiles.
171
+
172
+ @app.get("/static/outputs/json/{filename}")
173
+ async def get_json_report(filename: str):
174
+ """
175
+ Serves a JSON analysis file directly from the 'static/outputs/json' directory.
176
+ Example URL: https://evalbot-audio-evalbot.hf.space/static/outputs/json/candidate-123_uuid.json
177
+ """
178
+ file_path = os.path.join(OUTPUT_DIR, "json", filename) # Construct full path
179
  if not os.path.exists(file_path):
180
  raise HTTPException(status_code=404, detail="JSON file not found")
181
  return FileResponse(file_path, media_type="application/json", filename=filename)
182
 
183
+ @app.get("/static/outputs/pdf/{filename}")
184
+ async def get_pdf_report(filename: str):
185
+ """
186
+ Serves a PDF report file directly from the 'static/outputs/pdf' directory.
187
+ Example URL: https://evalbot-audio-evalbot.hf.space/static/outputs/pdf/candidate-123_uuid.pdf
188
+ """
189
+ file_path = os.path.join(OUTPUT_DIR, "pdf", filename) # Construct full path
190
  if not os.path.exists(file_path):
191
  raise HTTPException(status_code=404, detail="PDF file not found")
192
  return FileResponse(file_path, media_type="application/pdf", filename=filename)