Spaces:

EvalBot
/

Audio-EvalBot

Sleeping

App Files Files Community

norhan12 commited on Jun 12, 2025

Commit

f233230

verified ·

1 Parent(s): 79c5881

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -63

app.py CHANGED Viewed

@@ -4,12 +4,11 @@ import shutil
 import json
 import requests
 import logging
 from fastapi import FastAPI, HTTPException, Body
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, HttpUrl
-from process_interview import process_interview
 # Logging setup
 logging.basicConfig(level=logging.INFO)
@@ -22,129 +21,183 @@ app = FastAPI()
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
 STATIC_DIR = os.path.join(BASE_DIR, "static")
-OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs")
 JSON_DIR = os.path.join(OUTPUT_DIR, "json")
 PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
 for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
     os.makedirs(folder, exist_ok=True)
-# Mount static files
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
-# Config
 VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
 MAX_FILE_SIZE_MB = 300
-BASE_URL = os.getenv("BASE_URL", "https://evalbot-audio-evalbot.hf.space")  # بدون /static في النهاية
-# Models
 class ProcessResponse(BaseModel):
     summary: str
     json_url: str
     pdf_url: str
 class ProcessAudioRequest(BaseModel):
-    file_url: HttpUrl
-    user_id: str
-# Helpers
 def download_file(file_url: str, dest_path: str):
-    logger.info(f"Downloading file from {file_url}")
     try:
-        resp = requests.get(file_url, stream=True, timeout=60)
-        resp.raise_for_status()
         with open(dest_path, "wb") as f:
             for chunk in resp.iter_content(chunk_size=8192):
-                if chunk:
                     f.write(chunk)
     except Exception as e:
-        logger.error(f"Error downloading file: {e}")
-        raise HTTPException(status_code=400, detail="Failed to download file")
 def validate_file_size(file_path: str):
     file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
     if file_size_mb > MAX_FILE_SIZE_MB:
-        logger.warning(f"File too large: {file_size_mb} MB")
-        os.remove(file_path)
-        raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB")
-def generate_public_url(sub_path: str) -> str:
-    # هنا مهم جداً ما تضيفش /static مرتين
-    return f"{BASE_URL}/static/{sub_path}"
-# Main endpoint
 @app.post("/process-audio", response_model=ProcessResponse)
 async def process_audio(request: ProcessAudioRequest = Body(...)):
     file_url = str(request.file_url)
     user_id = request.user_id
     file_ext = os.path.splitext(file_url)[1].lower()
     if file_ext not in VALID_EXTENSIONS:
-        logger.error("Invalid file extension")
-        raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}")
     temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
     temp_path = os.path.join(TEMP_DIR, temp_filename)
     try:
         download_file(file_url, temp_path)
         validate_file_size(temp_path)
-        logger.info("Processing interview")
-        result = process_interview(temp_path)
         if not result:
-            raise HTTPException(status_code=500, detail="Processing failed")
         json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
         pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
-        json_dest = os.path.join(JSON_DIR, json_filename)
-        pdf_dest = os.path.join(PDF_DIR, pdf_filename)
-        shutil.copyfile(result['json_path'], json_dest)
-        shutil.copyfile(result['pdf_path'], pdf_dest)
-        with open(result['json_path'], "r") as jf:
             analysis_data = json.load(jf)
-        voice = analysis_data.get('voice_analysis', {}).get('interpretation', {})
-        speakers = analysis_data.get('speakers', [])
         total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
-        summary = (
             f"User ID: {user_id}\n"
-            f"Speakers: {', '.join(speakers)}\n"
             f"Duration: {total_duration:.2f} sec\n"
-            f"Confidence: {voice.get('confidence_level', 'N/A')}\n"
-            f"Anxiety: {voice.get('anxiety_level', 'N/A')}"
         )
-        json_url = generate_public_url(f"outputs/json/{json_filename}")
-        pdf_url = generate_public_url(f"outputs/pdf/{pdf_filename}")
-        logger.info("Processing completed successfully")
-        return ProcessResponse(summary=summary, json_url=json_url, pdf_url=pdf_url)
     except HTTPException as e:
         raise e
     except Exception as e:
-        logger.exception("Unexpected error during processing")
-        raise HTTPException(status_code=500, detail="Internal server error")
     finally:
         if os.path.exists(temp_path):
             os.remove(temp_path)
-# Serve output files (optional if using StaticFiles)
-@app.get("/outputs/json/{filename}")
 async def get_json_file(filename: str):
     file_path = os.path.join(JSON_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="JSON file not found")
     return FileResponse(file_path, media_type="application/json", filename=filename)
-@app.get("/outputs/pdf/{filename}")
 async def get_pdf_file(filename: str):
     file_path = os.path.join(PDF_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="PDF file not found")

 import json
 import requests
 import logging
 from fastapi import FastAPI, HTTPException, Body
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import FileResponse
 from pydantic import BaseModel, HttpUrl
+from process_interview import process_interview # Assuming process_interview is in a separate file
 # Logging setup
 logging.basicConfig(level=logging.INFO)
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 TEMP_DIR = os.path.join(BASE_DIR, "temp_files")
 STATIC_DIR = os.path.join(BASE_DIR, "static")
+OUTPUT_DIR = os.path.join(STATIC_DIR, "outputs") # Outputs are within static to be servable
 JSON_DIR = os.path.join(OUTPUT_DIR, "json")
 PDF_DIR = os.path.join(OUTPUT_DIR, "pdf")
+# Create necessary directories
 for folder in [TEMP_DIR, JSON_DIR, PDF_DIR]:
     os.makedirs(folder, exist_ok=True)
+# Mount static files directory to be accessible via /static URL
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+# Configuration Constants
 VALID_EXTENSIONS = ('.wav', '.mp3', '.m4a', '.flac')
 MAX_FILE_SIZE_MB = 300
+# Base URL for the deployed application (e.g., from Hugging Face Space or ngrok)
+# This should NOT include /static or any subpaths.
+# Example: https://evalbot-audio-evalbot.hf.space
+# Example: https://your-ngrok-url.ngrok-free.app
+BASE_URL = os.getenv("BASE_URL", "http://localhost:7860") # Default for local testing
+# Pydantic Models for Request/Response validation
 class ProcessResponse(BaseModel):
+    """Response model for the /process-audio endpoint."""
     summary: str
     json_url: str
     pdf_url: str
 class ProcessAudioRequest(BaseModel):
+    """Request model for the /process-audio endpoint."""
+    file_url: HttpUrl # URL of the audio file to process
+    user_id: str      # Identifier for the user submitting the audio
+# Helper Functions
 def download_file(file_url: str, dest_path: str):
+    """Downloads a file from a given URL to a specified destination path."""
+    logger.info(f"Attempting to download file from {file_url}")
     try:
+        resp = requests.get(file_url, stream=True, timeout=60) # Increased timeout
+        resp.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
+        # Ensure the destination directory exists
+        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
         with open(dest_path, "wb") as f:
             for chunk in resp.iter_content(chunk_size=8192):
+                if chunk: # Filter out keep-alive new chunks
                     f.write(chunk)
+        logger.info(f"File downloaded successfully to {dest_path}")
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Error downloading file from {file_url}: {e}")
+        raise HTTPException(status_code=400, detail=f"Failed to download file from URL: {e}")
     except Exception as e:
+        logger.error(f"Unexpected error during file download: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Internal server error during file download")
 def validate_file_size(file_path: str):
+    """Validates the size of a file against MAX_FILE_SIZE_MB."""
     file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
     if file_size_mb > MAX_FILE_SIZE_MB:
+        logger.warning(f"File too large: {file_size_mb:.2f} MB. Max allowed: {MAX_FILE_SIZE_MB} MB")
+        os.remove(file_path) # Clean up the oversized file
+        raise HTTPException(status_code=400, detail=f"File too large: {file_size_mb:.2f} MB. Max size: {MAX_FILE_SIZE_MB} MB")
+def generate_public_url(full_local_path: str) -> str:
+    """
+    Generates a public URL for a locally stored file.
+    Assumes the file is within the STATIC_DIR.
+    """
+    # Calculate the path relative to STATIC_DIR
+    # Example: if STATIC_DIR is /app/static and full_local_path is /app/static/outputs/json/file.json
+    # relative_path will be outputs/json/file.json
+    relative_path = os.path.relpath(full_local_path, STATIC_DIR)
+    # Replace backslashes with forward slashes for web compatibility (Windows paths)
+    web_path = relative_path.replace(os.path.sep, "/")
+    # Construct the full public URL using the BASE_URL and the mounted static path
+    return f"{BASE_URL}/static/{web_path}"
+# Main API Endpoint
 @app.post("/process-audio", response_model=ProcessResponse)
 async def process_audio(request: ProcessAudioRequest = Body(...)):
+    """
+    Endpoint to process an audio file from a given URL.
+    Downloads the audio, processes it through the interview analysis pipeline,
+    and returns URLs for the generated JSON analysis and PDF report.
+    """
     file_url = str(request.file_url)
     user_id = request.user_id
+    # Validate file extension based on URL
     file_ext = os.path.splitext(file_url)[1].lower()
     if file_ext not in VALID_EXTENSIONS:
+        logger.error(f"Invalid file extension: {file_ext}. Supported: {VALID_EXTENSIONS}")
+        raise HTTPException(status_code=400, detail=f"Invalid file extension: {file_ext}. Supported formats: {', '.join(VALID_EXTENSIONS)}")
+    # Create a unique temporary path for the downloaded audio file
     temp_filename = f"{user_id}_{uuid.uuid4().hex}{file_ext}"
     temp_path = os.path.join(TEMP_DIR, temp_filename)
     try:
+        # 1. Download the audio file
         download_file(file_url, temp_path)
+        # 2. Validate downloaded file size
         validate_file_size(temp_path)
+        logger.info(f"Starting interview processing for user: {user_id} from {temp_path}")
+        # 3. Process the interview audio using the external process_interview module
+        # process_interview returns a dictionary with local paths to the generated JSON and PDF
+        result = process_interview(temp_path)
         if not result:
+            logger.error(f"process_interview returned no result for {user_id}")
+            raise HTTPException(status_code=500, detail="Audio processing failed: No result from analysis pipeline.")
+        # 4. Generate unique filenames for outputs and copy them to static outputs directory
         json_filename = f"{user_id}_{uuid.uuid4().hex}.json"
         pdf_filename = f"{user_id}_{uuid.uuid4().hex}.pdf"
+        json_dest_path = os.path.join(JSON_DIR, json_filename)
+        pdf_dest_path = os.path.join(PDF_DIR, pdf_filename)
+        shutil.copyfile(result['json_path'], json_dest_path)
+        shutil.copyfile(result['pdf_path'], pdf_dest_path)
+        logger.info(f"Analysis outputs copied to: {json_dest_path} and {pdf_dest_path}")
+        # 5. Load analysis data for summary and generate public URLs
+        with open(json_dest_path, "r") as jf: # Use json_dest_path to read the *copied* file
             analysis_data = json.load(jf)
+        voice_interpretation = analysis_data.get('voice_analysis', {}).get('interpretation', {})
+        speakers_list = analysis_data.get('speakers', [])
         total_duration = analysis_data.get('text_analysis', {}).get('total_duration', 0.0)
+        summary_text = (
             f"User ID: {user_id}\n"
+            f"Speakers: {', '.join(speakers_list)}\n"
             f"Duration: {total_duration:.2f} sec\n"
+            f"Confidence: {voice_interpretation.get('confidence_level', 'N/A')}\n"
+            f"Anxiety: {voice_interpretation.get('anxiety_level', 'N/A')}"
         )
+        json_public_url = generate_public_url(json_dest_path)
+        pdf_public_url = generate_public_url(pdf_dest_path)
+        logger.info("Audio processing and URL generation completed successfully.")
+        return ProcessResponse(summary=summary_text, json_url=json_public_url, pdf_url=pdf_public_url)
     except HTTPException as e:
+        # Re-raise HTTPException directly as it already contains appropriate status/detail
         raise e
     except Exception as e:
+        # Catch any other unexpected errors during the process
+        logger.exception(f"Unexpected error during audio processing for user {user_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Internal server error during processing: {e}")
     finally:
+        # Clean up the temporary downloaded audio file
         if os.path.exists(temp_path):
             os.remove(temp_path)
+            logger.info(f"Cleaned up temporary file: {temp_path}")
+# Routes to serve output files directly if needed (though /static mount handles this)
+# These are redundant if /static mount works correctly, but can be kept for explicit control or debugging.
+@app.get("/outputs/json/{filename}", response_class=FileResponse)
 async def get_json_file(filename: str):
+    """Serves a JSON analysis file from the outputs directory."""
     file_path = os.path.join(JSON_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="JSON file not found")
     return FileResponse(file_path, media_type="application/json", filename=filename)
+@app.get("/outputs/pdf/{filename}", response_class=FileResponse)
 async def get_pdf_file(filename: str):
+    """Serves a PDF report file from the outputs directory."""
     file_path = os.path.join(PDF_DIR, filename)
     if not os.path.exists(file_path):
         raise HTTPException(status_code=404, detail="PDF file not found")