Hamza4100's picture
Upload 23 files
aa8e38b verified
"""
FastAPI route handlers for PDF processing API.
"""
from pathlib import Path
from typing import Optional
from fastapi import APIRouter, UploadFile, File, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse
from app.api.schemas import (
ProcessingRequest,
ProcessingResult,
UploadResponse,
BatchProcessingRequest,
BatchProcessingResult,
MappingConfig,
MappingEntry,
TemplateValidation,
FormFieldInfo,
ExtractedData,
HealthResponse
)
from app.api.processor_service import processor_service
from app.config import settings
from app.utils.logging import get_logger
from app.utils.exceptions import PDFProcessorError, FileValidationError
logger = get_logger(__name__)
router = APIRouter(prefix="/api/v1", tags=["pdf"])
@router.get("/health", response_model=HealthResponse)
async def health_check():
"""Check API health and service availability."""
return HealthResponse(
status="healthy",
version=settings.app_version,
ocr_available=processor_service.ocr_available,
gemini_available=settings.google_api_key is not None
)
@router.post("/upload", response_model=UploadResponse)
async def upload_files(
source_pdf: UploadFile = File(..., description="T1 tax return PDF to extract data from"),
template_pdf: UploadFile = File(..., description="Target PDF form to fill")
):
"""
Upload source T1 PDF and target template PDF.
Returns a session ID for subsequent processing.
"""
try:
# Create new session
session = processor_service.create_session()
# Save source PDF
source_content = await source_pdf.read()
processor_service.save_uploaded_file(
session, source_content, source_pdf.filename, "source"
)
# Save template PDF
template_content = await template_pdf.read()
processor_service.save_uploaded_file(
session, template_content, template_pdf.filename, "template"
)
return UploadResponse(
session_id=session.session_id,
source_filename=source_pdf.filename,
template_filename=template_pdf.filename,
message="Files uploaded successfully. Use /process to extract and fill data."
)
except FileValidationError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Upload failed: {e}")
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
@router.post("/process", response_model=ProcessingResult)
async def process_pdfs(request: ProcessingRequest):
"""
Process uploaded PDFs: extract data and fill template.
Extracts T1 tax line values from the source PDF and fills the template PDF form.
"""
try:
session = processor_service.get_session(request.session_id)
result = processor_service.process(
session=session,
line_numbers=request.line_numbers,
use_ocr=request.use_ocr,
flatten=request.flatten_output
)
extracted = result["extracted_data"]
return ProcessingResult(
session_id=session.session_id,
status="success",
extracted_data=ExtractedData(
line_values=extracted["line_values"],
extraction_method=extracted["extraction_method"],
has_text_content=extracted["has_text"],
raw_text_preview=extracted.get("text_preview")
),
mapped_fields=result["mapped_fields"],
output_filename=result["output_filename"],
errors=session.errors,
warnings=session.warnings
)
except FileValidationError as e:
raise HTTPException(status_code=400, detail=str(e))
except PDFProcessorError as e:
logger.error(f"Processing error: {e}")
raise HTTPException(status_code=422, detail=str(e))
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise HTTPException(status_code=500, detail=f"Processing failed: {str(e)}")
@router.get("/download/{session_id}")
async def download_result(session_id: str):
"""
Download the filled PDF for a completed session.
"""
try:
session = processor_service.get_session(session_id)
output_path = processor_service.get_output_path(session)
if not output_path:
raise HTTPException(
status_code=404,
detail="Output file not found. Ensure processing is complete."
)
return FileResponse(
path=output_path,
filename=f"filled_t1_{session_id[:8]}.pdf",
media_type="application/pdf"
)
except FileValidationError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:
logger.error(f"Download error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/batch/process", response_model=BatchProcessingResult)
async def batch_process(request: BatchProcessingRequest):
"""
Process multiple uploaded PDF sessions in batch.
"""
results = []
successful = 0
failed = 0
for session_id in request.session_ids:
try:
session = processor_service.get_session(session_id)
result = processor_service.process(
session=session,
line_numbers=request.line_numbers,
use_ocr=request.use_ocr,
flatten=request.flatten_output
)
extracted = result["extracted_data"]
results.append(ProcessingResult(
session_id=session_id,
status="success",
extracted_data=ExtractedData(
line_values=extracted["line_values"],
extraction_method=extracted["extraction_method"],
has_text_content=extracted["has_text"],
raw_text_preview=extracted.get("text_preview")
),
mapped_fields=result["mapped_fields"],
output_filename=result["output_filename"],
errors=session.errors,
warnings=session.warnings
))
successful += 1
except Exception as e:
logger.error(f"Batch processing failed for {session_id}: {e}")
results.append(ProcessingResult(
session_id=session_id,
status="failed",
errors=[str(e)]
))
failed += 1
return BatchProcessingResult(
total=len(request.session_ids),
successful=successful,
failed=failed,
results=results
)
@router.get("/template/fields/{session_id}", response_model=TemplateValidation)
async def get_template_fields(session_id: str):
"""
Get form fields from the uploaded template PDF.
"""
try:
session = processor_service.get_session(session_id)
fields = processor_service.get_template_fields(session)
field_list = [
FormFieldInfo(
name=name,
field_type=str(info.get("type", "Unknown")),
current_value=str(info.get("value", "")) if info.get("value") else None
)
for name, info in fields.items()
]
# Check which mapped fields are missing
all_mappings = processor_service.get_all_mappings()
mapped_field_names = {m["field"] for m in all_mappings.values()}
existing_field_names = set(fields.keys())
missing = list(mapped_field_names - existing_field_names)
return TemplateValidation(
valid=len(missing) == 0,
total_fields=len(fields),
fields=field_list,
missing_fields=missing
)
except FileValidationError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Template fields error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/mappings")
async def get_mappings():
"""
Get all current T1 line to field mappings.
"""
return processor_service.get_all_mappings()
@router.post("/mappings")
async def update_mappings(config: MappingConfig):
"""
Update T1 line to field mappings.
"""
try:
for entry in config.mappings:
processor_service.update_mapping(
entry.line_number,
entry.field_name,
entry.description or ""
)
# Save to config file
processor_service.save_mapping_config()
return {"message": f"Updated {len(config.mappings)} mappings"}
except Exception as e:
logger.error(f"Mapping update error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/session/{session_id}")
async def cleanup_session(session_id: str, background_tasks: BackgroundTasks):
"""
Clean up session files and data.
"""
try:
background_tasks.add_task(processor_service.cleanup_session, session_id)
return {"message": f"Session {session_id} cleanup scheduled"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))