Spaces:

EvalBot
/

Audio-EvalBot

Sleeping

App Files Files Community

norhan12 commited on Jun 12, 2025

Commit

e707fd7

verified ·

1 Parent(s): 3cd4231

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -96

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from fastapi import FastAPI, HTTPException, Body
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, HttpUrl
-from process_interview import process_interview # Assuming process_interview is in a separate file
 # Logging setup
 logging.basicConfig(level=logging.INFO)
@@ -21,7 +21,7 @@ app = FastAPI()
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
 STATIC_DIR = os.path.join(BASE_DIR, "static")
-OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs") # Outputs are within static to be servable
 JSON_DIR = os.path.join(OUTPUT_DIR, "json")
 PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
@@ -29,135 +29,94 @@ PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
 for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
     os.makedirs(folder, exist_ok=True)
-# Mount static files directory to be accessible via /static URL
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 # Configuration Constants
 VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
 MAX_FILE_SIZE_MB = 300
-# Base URL for the deployed application (e.g., from Hugging Face Space or ngrok)
-# This should NOT include /static or any subpaths.
-# Example: https://evalbot-audio-evalbot.hf.space
-# Example: https://your-ngrok-url.ngrok-free.app
-BASE_URL = os.getenv("BASE_URL", "http://localhost:7860") # Default for local testing
-# Pydantic Models for Request/Response validation
 class ProcessResponse(BaseModel):
-    """Response model for the /process-audio endpoint."""
     summary: str
     json_url: str
     pdf_url: str
 class ProcessAudioRequest(BaseModel):
-    """Request model for the /process-audio endpoint."""
-    file_url: HttpUrl # URL of the audio file to process
-    user_id: str      # Identifier for the user submitting the audio
 # Helper Functions
 def download_file(file_url: str, dest_path: str):
-    """Downloads a file from a given URL to a specified destination path."""
-    logger.info(f"Attempting to download file from {file_url}")
     try:
-        resp = requests.get(file_url, stream=True, timeout=60) # Increased timeout
-        resp.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
-        # Ensure the destination directory exists
         os.makedirs(os.path.dirname(dest_path), exist_ok=True)
         with open(dest_path, "wb") as f:
             for chunk in resp.iter_content(chunk_size=8192):
-                if chunk: # Filter out keep-alive new chunks
                     f.write(chunk)
-        logger.info(f"File downloaded successfully to {dest_path}")
     except requests.exceptions.RequestException as e:
-        logger.error(f"Error downloading file from {file_url}: {e}")
-        raise HTTPException(status_code=400, detail=f"Failed to download file from URL: {e}")
     except Exception as e:
-        logger.error(f"Unexpected error during file download: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail="Internal server error during file download")
 def validate_file_size(file_path: str):
-    """Validates the size of a file against MAX_FILE_SIZE_MB."""
     file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
     if file_size_mb > MAX_FILE_SIZE_MB:
-        logger.warning(f"File too large: {file_size_mb:.2f} MB. Max allowed: {MAX_FILE_SIZE_MB} MB")
-        os.remove(file_path) # Clean up the oversized file
-        raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB. Max size: {MAX_FILE_SIZE_MB} MB")
-def generate_public_url(full_local_path: str) -> str:
-    """
-    Generates a public URL for a locally stored file.
-    Assumes the file is within the STATIC_DIR.
-    """
-    # Calculate the path relative to STATIC_DIR
-    # Example: if STATIC_DIR is /app/static and full_local_path is /app/static/outputs/json/file.json
-    # relative_path will be outputs/json/file.json
-    relative_path = os.path.relpath(full_local_path, STATIC_DIR)
-    # Replace backslashes with forward slashes for web compatibility (Windows paths)
-    web_path = relative_path.replace(os.path.sep, "/")
-    # Construct the full public URL using the BASE_URL and the mounted static path
-    return f"{BASE_URL}/static/{web_path}"
-# Main API Endpoint
 @app.post("/process-audio", response_model=ProcessResponse)
 async def process_audio(request: ProcessAudioRequest = Body(...)):
-    """
-    Endpoint to process an audio file from a given URL.
-    Downloads the audio, processes it through the interview analysis pipeline,
-    and returns URLs for the generated JSON analysis and PDF report.
-    """
     file_url = str(request.file_url)
     user_id = request.user_id
-    # Validate file extension based on URL
     file_ext = os.path.splitext(file_url)[1].lower()
     if file_ext not in VALID_EXTENSIONS:
-        logger.error(f"Invalid file extension: {file_ext}. Supported: {VALID_EXTENSIONS}")
-        raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}. Supported formats: {', '.join(VALID_EXTENSIONS)}")
-    # Create a unique temporary path for the downloaded audio file
     temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
     temp_path = os.path.join(TEMP_DIR, temp_filename)
     try:
-        # 1. Download the audio file
         download_file(file_url, temp_path)
-        # 2. Validate downloaded file size
         validate_file_size(temp_path)
-        logger.info(f"Starting interview processing for user: {user_id} from {temp_path}")
-        # 3. Process the interview audio using the external process_interview module
-        # process_interview returns a dictionary with local paths to the generated JSON and PDF
-        result = process_interview(temp_path)
         if not result:
-            logger.error(f"process_interview returned no result for {user_id}")
-            raise HTTPException(status_code=500, detail="Audio processing failed: No result from analysis pipeline.")
-        # 4. Generate unique filenames for outputs and copy them to static outputs directory
         json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
         pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
         json_dest_path = os.path.join(JSON_DIR, json_filename)
         pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
         shutil.copyfile(result['json_path'], json_dest_path)
         shutil.copyfile(result['pdf_path'], pdf_dest_path)
-        logger.info(f"Analysis outputs copied to: {json_dest_path} and {pdf_dest_path}")
-        # 5. Load analysis data for summary and generate public URLs
-        with open(json_dest_path, "r") as jf: # Use json_dest_path to read the *copied* file
             analysis_data = json.load(jf)
         voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
         speakers_list = analysis_data.get('speakers', [])
         total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
         summary_text = (
             f"User ID: {user_id}\n"
             f"Speakers: {', '.join(speakers_list)}\n"
@@ -165,31 +124,26 @@ async def process_audio(request: ProcessAudioRequest = Body(...)):
             f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
             f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
         )
-        json_public_url = generate_public_url(json_dest_path)
-        pdf_public_url = generate_public_url(pdf_dest_path)
-        logger.info("Audio processing and URL generation completed successfully.")
         return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
     except HTTPException as e:
-        # Re-raise HTTPException directly as it already contains appropriate status/detail
         raise e
     except Exception as e:
-        # Catch any other unexpected errors during the process
-        logger.exception(f"Unexpected error during audio processing for user {user_id}: {e}")
-        raise HTTPException(status_code=500, detail=f"Internal server error during processing: {e}")
     finally:
-        # Clean up the temporary downloaded audio file
         if os.path.exists(temp_path):
             os.remove(temp_path)
-            logger.info(f"Cleaned up temporary file: {temp_path}")
-# Routes to serve output files directly if needed (though /static mount handles this)
-# These are redundant if /static mount works correctly, but can be kept for explicit control or debugging.
 @app.get("/outputs/json/{filename}", response_class=FileResponse)
 async def get_json_file(filename: str):
-    """Serves a JSON analysis file from the outputs directory."""
     file_path = os.path.join(JSON_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="JSON file not found")
@@ -197,7 +151,6 @@ async def get_json_file(filename: str):
 @app.get("/outputs/pdf/{filename}", response_class=FileResponse)
 async def get_pdf_file(filename: str):
-    """Serves a PDF report file from the outputs directory."""
     file_path = os.path.join(PDF_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="PDF file not found")

 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, HttpUrl
+from process_interview import process_interview  # Assuming process_interview is in a separate file
 # Logging setup
 logging.basicConfig(level=logging.INFO)
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
 STATIC_DIR = os.path.join(BASE_DIR, "static")
+OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs")
 JSON_DIR = os.path.join(OUTPUT_DIR, "json")
 PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
 for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
     os.makedirs(folder, exist_ok=True)
+# Mount static files directory
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
 # Configuration Constants
 VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
 MAX_FILE_SIZE_MB = 300
+# Base URL (set your domain or huggingface space URL here)
+BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space")
+# Pydantic Models
 class ProcessResponse(BaseModel):
     summary: str
     json_url: str
     pdf_url: str
 class ProcessAudioRequest(BaseModel):
+    file_url: HttpUrl
+    user_id: str
 # Helper Functions
 def download_file(file_url: str, dest_path: str):
+    logger.info(f"Downloading file from {file_url}")
     try:
+        resp = requests.get(file_url, stream=True, timeout=60)
+        resp.raise_for_status()
         os.makedirs(os.path.dirname(dest_path), exist_ok=True)
         with open(dest_path, "wb") as f:
             for chunk in resp.iter_content(chunk_size=8192):
+                if chunk:
                     f.write(chunk)
+        logger.info(f"File downloaded to {dest_path}")
     except requests.exceptions.RequestException as e:
+        logger.error(f"Download failed: {e}")
+        raise HTTPException(status_code=400, detail=f"Failed to download file: {e}")
     except Exception as e:
+        logger.error(f"Unexpected download error: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Internal server error during download")
 def validate_file_size(file_path: str):
     file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
     if file_size_mb > MAX_FILE_SIZE_MB:
+        os.remove(file_path)
+        logger.warning(f"File too large: {file_size_mb:.2f} MB")
+        raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB (max: {MAX_FILE_SIZE_MB} MB)")
+# Main Endpoint
 @app.post("/process-audio", response_model=ProcessResponse)
 async def process_audio(request: ProcessAudioRequest = Body(...)):
     file_url = str(request.file_url)
     user_id = request.user_id
     file_ext = os.path.splitext(file_url)[1].lower()
     if file_ext not in VALID_EXTENSIONS:
+        logger.error(f"Invalid file extension: {file_ext}")
+        raise HTTPException(status_code=400, detail=f"Invalid extension: {file_ext}. Supported: {', '.join(VALID_EXTENSIONS)}")
     temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
     temp_path = os.path.join(TEMP_DIR, temp_filename)
     try:
         download_file(file_url, temp_path)
         validate_file_size(temp_path)
+        logger.info(f"Processing interview for user: {user_id}")
+        result = process_interview(temp_path)
         if not result:
+            logger.error("process_interview returned no result")
+            raise HTTPException(status_code=500, detail="Audio processing failed")
         json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
         pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
         json_dest_path = os.path.join(JSON_DIR, json_filename)
         pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
         shutil.copyfile(result['json_path'], json_dest_path)
         shutil.copyfile(result['pdf_path'], pdf_dest_path)
+        logger.info(f"Files saved: {json_dest_path}, {pdf_dest_path}")
+        with open(json_dest_path, "r") as jf:
             analysis_data = json.load(jf)
         voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
         speakers_list = analysis_data.get('speakers', [])
         total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
         summary_text = (
             f"User ID: {user_id}\n"
             f"Speakers: {', '.join(speakers_list)}\n"
             f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
             f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
         )
+        json_public_url = f"{BASE_URL}/static/outputs/json/{json_filename}"
+        pdf_public_url = f"{BASE_URL}/static/outputs/pdf/{pdf_filename}"
+        logger.info("Processing completed successfully.")
         return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
     except HTTPException as e:
         raise e
     except Exception as e:
+        logger.exception(f"Unexpected error: {e}")
+        raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
     finally:
         if os.path.exists(temp_path):
             os.remove(temp_path)
+            logger.info(f"Temporary file cleaned: {temp_path}")
+# Serve files directly (optional explicit routes)
 @app.get("/outputs/json/{filename}", response_class=FileResponse)
 async def get_json_file(filename: str):
     file_path = os.path.join(JSON_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="JSON file not found")
 @app.get("/outputs/pdf/{filename}", response_class=FileResponse)
 async def get_pdf_file(filename: str):
     file_path = os.path.join(PDF_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="PDF file not found")