civic-ml-backend / app /main.py
mohxn49's picture
Upload folder using huggingface_hub
4e33b5f verified
from fastapi import FastAPI, HTTPException, Request, File, UploadFile, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from typing import Optional
import traceback
import os
import sys
import json
# Initialize app first - this must work
app = FastAPI(title="Civic ML Backend API", version="1.0.0")
# Try to import ML modules - make them optional so app can start even if they fail
classify_report = None
ml_available = False
try:
from app.pipeline import classify_report
ml_available = True
print("✅ ML modules loaded successfully")
except Exception as e:
print(f"⚠️ ML modules not available (non-critical): {e}")
print("⚠️ API will return default responses")
# Log startup information
print("=" * 50)
print("ML Backend API Starting...")
print(f"Python version: {sys.version}")
print(f"Working directory: {os.getcwd()}")
print(f"ML Available: {ml_available}")
print("=" * 50)
# CORS configuration - SIMPLIFIED AND RELIABLE
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow all origins
allow_credentials=False, # CRITICAL: Must be False when using "*"
allow_methods=["*"], # Allow all HTTP methods
allow_headers=["*"], # Allow all headers
expose_headers=["*"], # Expose all headers
max_age=3600, # Cache preflight for 1 hour
)
print("=" * 50)
print("CORS Configuration:")
print(" allow_origins: ['*'] (all origins)")
print(" allow_credentials: False")
print("=" * 50)
@app.get("/")
def health():
return {"status": "ML API running", "version": "1.0.0", "ml_available": ml_available}
@app.get("/health")
def health_check():
"""Health check endpoint for Render"""
return {"status": "healthy", "service": "ML Backend", "ml_available": ml_available}
@app.options("/submit")
async def submit_options():
"""Handle CORS preflight requests"""
return {"status": "ok"}
@app.post("/submit")
async def submit_report(
report_id: str = Form(..., description="Unique identifier for the report"),
description: str = Form(..., description="Description of the issue"),
user_id: Optional[str] = Form(None, description="Optional user identifier"),
latitude: Optional[str] = Form(None, description="Optional latitude as string (e.g., '37.7749')"),
longitude: Optional[str] = Form(None, description="Optional longitude as string (e.g., '-122.4194')"),
image: Optional[UploadFile] = File(None, description="Optional image file (JPEG, PNG, etc.)")
):
"""
Submit a report for ML validation and classification.
Accepts multipart/form-data with the following fields:
- report_id (required, string): Unique identifier for the report
- description (required, string): Description of the issue
- user_id (optional, string): User identifier
- latitude (optional, string): Latitude coordinate (-90 to 90), will be converted to float
- longitude (optional, string): Longitude coordinate (-180 to 180), will be converted to float
- image (optional, file): Image file (JPEG, PNG, etc.)
Returns a JSON response with classification results.
"""
try:
print(f"Received ML validation request: report_id={report_id}, description_length={len(description or '')}")
# Validate required fields with clear error messages
if not report_id or not report_id.strip():
raise HTTPException(
status_code=422,
detail="Validation error: 'report_id' is required and cannot be empty"
)
if not description or not description.strip():
raise HTTPException(
status_code=422,
detail="Validation error: 'description' is required and cannot be empty"
)
# Clean up string fields
report_id = report_id.strip()
description = description.strip()
user_id = user_id.strip() if user_id else None
# Validate and convert latitude
latitude_float = None
if latitude:
try:
latitude_float = float(latitude.strip())
if not (-90 <= latitude_float <= 90):
raise HTTPException(
status_code=422,
detail=f"Validation error: 'latitude' must be between -90 and 90, got {latitude_float}"
)
except HTTPException:
raise # Re-raise HTTPException as-is
except (ValueError, TypeError) as e:
raise HTTPException(
status_code=422,
detail=f"Validation error: 'latitude' must be a valid number, got '{latitude}'"
)
# Validate and convert longitude
longitude_float = None
if longitude:
try:
longitude_float = float(longitude.strip())
if not (-180 <= longitude_float <= 180):
raise HTTPException(
status_code=422,
detail=f"Validation error: 'longitude' must be between -180 and 180, got {longitude_float}"
)
except HTTPException:
raise # Re-raise HTTPException as-is
except (ValueError, TypeError) as e:
raise HTTPException(
status_code=422,
detail=f"Validation error: 'longitude' must be a valid number, got '{longitude}'"
)
# Read and validate image file if provided
image_bytes = None
MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10MB
if image:
try:
# Check content type if available (informational only, not strict)
if image.content_type:
allowed_types = ['image/jpeg', 'image/jpg', 'image/png', 'image/gif', 'image/webp']
if image.content_type not in allowed_types:
print(f"Warning: Unexpected content type '{image.content_type}', continuing anyway")
# Read image with size limit
image_bytes = await image.read()
if len(image_bytes) > MAX_IMAGE_SIZE:
raise HTTPException(
status_code=422,
detail=f"Validation error: Image file too large. Maximum size is {MAX_IMAGE_SIZE / (1024*1024):.1f}MB, got {len(image_bytes) / (1024*1024):.1f}MB"
)
if len(image_bytes) == 0:
raise HTTPException(
status_code=422,
detail="Validation error: Image file is empty"
)
print(f"Received image: {len(image_bytes)} bytes, content_type: {image.content_type}")
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=422,
detail=f"Validation error: Failed to read image file: {str(e)}"
)
# Prepare report data
report_data = {
"report_id": report_id,
"description": description,
"user_id": user_id,
"image_bytes": image_bytes, # Changed from image_url to image_bytes
"latitude": latitude_float,
"longitude": longitude_float
}
# Classify the report using ML
print("Starting ML classification...")
print(f"Report data keys: {list(report_data.keys())}")
print(f"Has image_bytes: {bool(report_data.get('image_bytes'))}")
if report_data.get('image_bytes'):
print(f"Image bytes size: {len(report_data.get('image_bytes'))} bytes")
try:
result = classify_report(report_data)
print(f"ML classification complete: status={result.get('status')}, category={result.get('category')}, confidence={result.get('confidence')}")
# Ensure result has all required fields
if not isinstance(result, dict):
raise ValueError(f"classify_report returned non-dict: {type(result)}")
# Ensure result has required keys
if 'report_id' not in result:
result['report_id'] = report_id
if 'accept' not in result:
result['accept'] = False
if 'status' not in result:
result['status'] = 'error'
if 'category' not in result:
result['category'] = 'Other'
if 'confidence' not in result:
result['confidence'] = 0.0
return result
except Exception as ml_error:
print(f"ERROR in classify_report: {str(ml_error)}")
print(traceback.format_exc())
# Return error response with 200 status (not 500) so frontend can handle it
return {
"report_id": report_id,
"accept": False,
"status": "error",
"category": "Other",
"confidence": 0.0,
"reason": f"ML classification error: {str(ml_error)}"
}
except HTTPException:
raise
except Exception as e:
print(f"ERROR in submit_report: {str(e)}")
print(traceback.format_exc())
# Return error response with 200 status (not 500) so frontend can handle it
error_report_id = report_id if 'report_id' in locals() else "unknown"
return {
"report_id": error_report_id,
"accept": False,
"status": "error",
"category": "Other",
"confidence": 0.0,
"reason": f"ML processing error: {str(e)}"
}
@app.post("/remove")
async def remove_report_endpoint(
report_id: Optional[str] = Form(None, description="Report ID to remove from dataset"),
description: Optional[str] = Form(None, description="Description to match (fallback)"),
user_id: Optional[str] = Form(None, description="User ID to match (fallback)")
):
"""
Remove a report from dataset.jsonl by report_id, or by description/user_id as fallback.
Called when an issue is resolved by an employee.
"""
try:
from app import dataset
report_id_clean = report_id.strip() if report_id else None
description_clean = description.strip() if description else None
user_id_clean = user_id.strip() if user_id else None
# Must have either report_id OR (description AND user_id)
if not report_id_clean and not (description_clean and user_id_clean):
raise HTTPException(
status_code=422,
detail="Validation error: Must provide either 'report_id' OR both 'description' and 'user_id'"
)
print(f"[REMOVE] Attempting to remove report. report_id={report_id_clean}, description={description_clean[:50] if description_clean else None}..., user_id={user_id_clean}")
# Remove the report from dataset
removed = dataset.remove_report(
report_id=report_id_clean,
description=description_clean,
user_id=user_id_clean
)
if removed:
return {
"success": True,
"message": f"Report removed from dataset",
"report_id": report_id_clean or "matched_by_description"
}
else:
return {
"success": False,
"message": f"Report not found in dataset",
"report_id": report_id_clean or "unknown"
}
except HTTPException:
raise
except Exception as e:
print(f"ERROR in remove_report_endpoint: {str(e)}")
print(traceback.format_exc())
raise HTTPException(
status_code=500,
detail=f"Failed to remove report from dataset: {str(e)}"
)