Spaces:

EvalBot
/

Audio-EvalBot

Sleeping

App Files Files Community

norhan12 commited on Jun 12, 2025

Commit

93878a6

verified ·

1 Parent(s): e707fd7

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -109

app.py CHANGED Viewed

@@ -1,157 +1,192 @@
 import os
 import uuid
 import shutil
 import json
 import requests
-import logging
-from fastapi import FastAPI, HTTPException, Body
-from fastapi.staticfiles import StaticFiles
-from fastapi.responses import FileResponse
-from pydantic import BaseModel, HttpUrl
-from process_interview import process_interview  # Assuming process_interview is in a separate file
-# Logging setup
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("EvalBot-Audio-Processor")
 # Initialize FastAPI app
 app = FastAPI()
-# Directories
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
-STATIC_DIR = os.path.join(BASE_DIR, "static")
-OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs")
-JSON_DIR = os.path.join(OUTPUT_DIR, "json")
-PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
-# Create necessary directories
-for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
-    os.makedirs(folder, exist_ok=True)
-# Mount static files directory
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
-# Configuration Constants
 VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
 MAX_FILE_SIZE_MB = 300
-# Base URL (set your domain or huggingface space URL here)
 BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space")
-# Pydantic Models
 class ProcessResponse(BaseModel):
     summary: str
     json_url: str
     pdf_url: str
-class ProcessAudioRequest(BaseModel):
-    file_url: HttpUrl
-    user_id: str
-# Helper Functions
-def download_file(file_url: str, dest_path: str):
-    logger.info(f"Downloading file from {file_url}")
-    try:
-        resp = requests.get(file_url, stream=True, timeout=60)
-        resp.raise_for_status()
-        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
-        with open(dest_path, "wb") as f:
-            for chunk in resp.iter_content(chunk_size=8192):
-                if chunk:
-                    f.write(chunk)
-        logger.info(f"File downloaded to {dest_path}")
-    except requests.exceptions.RequestException as e:
-        logger.error(f"Download failed: {e}")
-        raise HTTPException(status_code=400, detail=f"Failed to download file: {e}")
-    except Exception as e:
-        logger.error(f"Unexpected download error: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail="Internal server error during download")
-def validate_file_size(file_path: str):
-    file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
-    if file_size_mb > MAX_FILE_SIZE_MB:
-        os.remove(file_path)
-        logger.warning(f"File too large: {file_size_mb:.2f} MB")
-        raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB (max: {MAX_FILE_SIZE_MB} MB)")
-# Main Endpoint
 @app.post("/process-audio", response_model=ProcessResponse)
-async def process_audio(request: ProcessAudioRequest = Body(...)):
-    file_url = str(request.file_url)
-    user_id = request.user_id
-    file_ext = os.path.splitext(file_url)[1].lower()
-    if file_ext not in VALID_EXTENSIONS:
-        logger.error(f"Invalid file extension: {file_ext}")
-        raise HTTPException(status_code=400, detail=f"Invalid extension: {file_ext}. Supported: {', '.join(VALID_EXTENSIONS)}")
-    temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
-    temp_path = os.path.join(TEMP_DIR, temp_filename)
     try:
-        download_file(file_url, temp_path)
-        validate_file_size(temp_path)
-        logger.info(f"Processing interview for user: {user_id}")
-        result = process_interview(temp_path)
         if not result:
-            logger.error("process_interview returned no result")
-            raise HTTPException(status_code=500, detail="Audio processing failed")
-        json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
-        pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
-        json_dest_path = os.path.join(JSON_DIR, json_filename)
-        pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
-        shutil.copyfile(result['json_path'], json_dest_path)
-        shutil.copyfile(result['pdf_path'], pdf_dest_path)
-        logger.info(f"Files saved: {json_dest_path}, {pdf_dest_path}")
-        with open(json_dest_path, "r") as jf:
             analysis_data = json.load(jf)
-        voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
-        speakers_list = analysis_data.get('speakers', [])
         total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
-        summary_text = (
             f"User ID: {user_id}\n"
-            f"Speakers: {', '.join(speakers_list)}\n"
             f"Duration: {total_duration:.2f} sec\n"
-            f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
-            f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
         )
-        json_public_url = f"{BASE_URL}/static/outputs/json/{json_filename}"
-        pdf_public_url = f"{BASE_URL}/static/outputs/pdf/{pdf_filename}"
-        logger.info("Processing completed successfully.")
-        return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
     except HTTPException as e:
         raise e
     except Exception as e:
-        logger.exception(f"Unexpected error: {e}")
-        raise HTTPException(status_code=500, detail=f"Internal server error: {e}")
-    finally:
-        if os.path.exists(temp_path):
-            os.remove(temp_path)
-            logger.info(f"Temporary file cleaned: {temp_path}")
-# Serve files directly (optional explicit routes)
-@app.get("/outputs/json/{filename}", response_class=FileResponse)
-async def get_json_file(filename: str):
-    file_path = os.path.join(JSON_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="JSON file not found")
     return FileResponse(file_path, media_type="application/json", filename=filename)
-@app.get("/outputs/pdf/{filename}", response_class=FileResponse)
-async def get_pdf_file(filename: str):
-    file_path = os.path.join(PDF_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="PDF file not found")
     return FileResponse(file_path, media_type="application/pdf", filename=filename)

 import os
 import uuid
 import shutil
 import json
+from fastapi import FastAPI, HTTPException, Query
+from fastapi.responses import JSONResponse, FileResponse
+from pydantic import BaseModel
 import requests
+from process_interview import process_interview  # Ensure process_interview function is available
+from fastapi.staticfiles import StaticFiles
 # Initialize FastAPI app
 app = FastAPI()
+# --- Directory Setup ---
+BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # Get the directory of the current script
+TEMP_DIR = os.path.join(BASE_DIR, "temp_files") # Temporary storage for downloaded audio files
+STATIC_DIR = os.path.join(BASE_DIR, "static") # Root directory for all static assets
+OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs") # Subdirectory within static for generated reports
+# Create directories if they don't exist
+os.makedirs(TEMP_DIR, exist_ok=True)
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+# Mount the 'static' directory to be served at the '/static' URL path.
+# This means files in ./static/outputs/filename will be accessible at /static/outputs/filename
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+# --- Configuration Constants ---
 VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
 MAX_FILE_SIZE_MB = 300
+# BASE_URL should be the root URL of your deployed application.
+# Example: "https://evalbot-audio-evalbot.hf.space" when deployed on Hugging Face.
+# Example: "http://localhost:8000" when running locally with Uvicorn.
+# It should NOT include "/static" or any subpaths.
 BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space")
+# --- Pydantic Models ---
 class ProcessResponse(BaseModel):
+    """
+    Defines the structure of the response returned by the /process-audio endpoint.
+    Includes a summary of the analysis and public URLs to the detailed JSON and PDF reports.
+    """
     summary: str
     json_url: str
     pdf_url: str
+# --- Helper Function for URL Generation ---
+def generate_public_output_url(filename: str, file_type: str) -> str:
+    """
+    Constructs a public URL for a generated output file (JSON or PDF).
+    The files are stored in STATIC_DIR/outputs/.
+    """
+    # The path relative to the mounted /static directory will be "outputs/json/filename" or "outputs/pdf/filename"
+    # We join it directly to BASE_URL/static/
+    return f"{BASE_URL}/static/outputs/{file_type}/{filename}"
+# --- API Endpoints ---
+@app.get("/")
+async def root():
+    """
+    Root endpoint for the API. Returns a simple message to confirm the API is running.
+    """
+    return {"message": "EvalBot API is running"}
 @app.post("/process-audio", response_model=ProcessResponse)
+async def process_audio(
+    file_url: str = Query(..., description="URL of the audio file to be processed"),
+    user_id: str = Query(..., description="Unique identifier for the user")
+):
+    """
+    Main endpoint to process an audio file.
+    Downloads the audio, performs interview analysis using `process_interview`,
+    saves the generated reports, and returns their public URLs.
+    """
     try:
+        # Validate audio file extension
+        file_ext = os.path.splitext(file_url)[1].lower()
+        if file_ext not in VALID_EXTENSIONS:
+            raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}. Supported: {', '.join(VALID_EXTENSIONS)}")
+        # Generate a unique temporary filename for the downloaded audio
+        local_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
+        local_path = os.path.join(TEMP_DIR, local_filename)
+        # Download the audio file from the provided URL
+        try:
+            resp = requests.get(file_url, stream=True, timeout=30)
+            resp.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
+            with open(local_path, "wb") as f:
+                for chunk in resp.iter_content(chunk_size=8192):
+                    if chunk: # Write only non-empty chunks
+                        f.write(chunk)
+        except requests.exceptions.RequestException as e:
+            raise HTTPException(status_code=400, detail=f"Failed to download the file from URL: {e}")
+        # Validate the size of the downloaded file
+        file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
+        if file_size_mb > MAX_FILE_SIZE_MB:
+            os.remove(local_path) # Clean up the oversized file
+            raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB. Max allowed: {MAX_FILE_SIZE_MB} MB")
+        # Process the interview audio using the `process_interview` module
+        result = process_interview(local_path)
         if not result:
+            os.remove(local_path) # Clean up temporary audio file
+            raise HTTPException(status_code=500, detail="Audio processing failed: `process_interview` returned no result.")
+        # Get the source paths of the generated JSON and PDF reports from `process_interview`
+        json_src_path = result['json_path']
+        pdf_src_path = result['pdf_path']
+        # Generate unique destination filenames for the reports within the public 'static/outputs' directory
+        json_dest_name = f"{user_id}_{uuid.uuid4().hex}.json"
+        pdf_dest_name = f"{user_id}_{uuid.uuid4().hex}.pdf"
+        # Define the full destination paths for copying the reports
+        json_dest_full_path = os.path.join(OUTPUT_DIR, "json", json_dest_name) # Ensure sub-folders
+        pdf_dest_full_path = os.path.join(OUTPUT_DIR, "pdf", pdf_dest_name)   # Ensure sub-folders
+        # Create subdirectories if they don't exist
+        os.makedirs(os.path.dirname(json_dest_full_path), exist_ok=True)
+        os.makedirs(os.path.dirname(pdf_dest_full_path), exist_ok=True)
+        # Copy the generated reports to their final public locations
+        shutil.copyfile(json_src_path, json_dest_full_path)
+        shutil.copyfile(pdf_src_path, pdf_dest_full_path)
+        # Read analysis data from the JSON report to create the summary for the response
+        with open(json_src_path, "r") as jf: # Read from the source path, as it's guaranteed to be complete
             analysis_data = json.load(jf)
+        voice = analysis_data.get('voice_analysis', {}).get('interpretation', {})
+        speakers = analysis_data.get('speakers', [])
         total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
+        summary = (
             f"User ID: {user_id}\n"
+            f"Speakers: {', '.join(speakers)}\n"
             f"Duration: {total_duration:.2f} sec\n"
+            f"Confidence: {voice.get('confidence_level', 'N/A')}\n"
+            f"Anxiety: {voice.get('anxiety_level', 'N/A')}"
         )
+        # Generate the public URLs using the helper function
+        json_url = generate_public_output_url(json_dest_name, "json")
+        pdf_url = generate_public_output_url(pdf_dest_name, "pdf")
+        # Clean up the temporary downloaded audio file
+        os.remove(local_path)
+        return ProcessResponse(summary=summary, json_url=json_url, pdf_url=pdf_url)
     except HTTPException as e:
+        # Re-raise explicit HTTPExceptions (e.g., 400 for bad requests)
         raise e
     except Exception as e:
+        # Catch any other unexpected errors and return a 500 Internal Server Error
+        # Log the full traceback for debugging
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=f"An internal server error occurred: {str(e)}")
+# --- GET Endpoints for Direct File Access ---
+# These endpoints allow direct access to the generated JSON and PDF reports.
+# The URL paths are designed to match how files are mounted by StaticFiles.
+@app.get("/static/outputs/json/{filename}")
+async def get_json_report(filename: str):
+    """
+    Serves a JSON analysis file directly from the 'static/outputs/json' directory.
+    Example URL: https://evalbot-audio-evalbot.hf.space/static/outputs/json/candidate-123_uuid.json
+    """
+    file_path = os.path.join(OUTPUT_DIR, "json", filename) # Construct full path
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="JSON file not found")
     return FileResponse(file_path, media_type="application/json", filename=filename)
+@app.get("/static/outputs/pdf/{filename}")
+async def get_pdf_report(filename: str):
+    """
+    Serves a PDF report file directly from the 'static/outputs/pdf' directory.
+    Example URL: https://evalbot-audio-evalbot.hf.space/static/outputs/pdf/candidate-123_uuid.pdf
+    """
+    file_path = os.path.join(OUTPUT_DIR, "pdf", filename) # Construct full path
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="PDF file not found")
     return FileResponse(file_path, media_type="application/pdf", filename=filename)