|
|
|
|
|
""" |
|
|
Enhanced DOCX to PDF Converter |
|
|
Professional FastAPI Backend with Docker Support |
|
|
""" |
|
|
|
|
|
import os |
|
|
import logging |
|
|
import uuid |
|
|
from pathlib import Path |
|
|
from typing import Optional, List |
|
|
import base64 |
|
|
import json |
|
|
|
|
|
from fastapi import FastAPI, File, UploadFile, Form, HTTPException, BackgroundTasks |
|
|
from fastapi.responses import FileResponse, JSONResponse |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
from fastapi.staticfiles import StaticFiles |
|
|
from pydantic import BaseModel |
|
|
|
|
|
|
|
|
from src.utils.config import Config |
|
|
from src.utils.file_handler import FileHandler |
|
|
from src.utils.converter import DocumentConverter |
|
|
|
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.INFO, |
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
|
|
) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
file_handler = FileHandler(Config.TEMP_DIR) |
|
|
converter = DocumentConverter() |
|
|
|
|
|
app = FastAPI( |
|
|
title=Config.API_TITLE, |
|
|
description=Config.API_DESCRIPTION, |
|
|
version=Config.API_VERSION |
|
|
) |
|
|
|
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=Config.CORS_ORIGINS, |
|
|
allow_credentials=Config.CORS_CREDENTIALS, |
|
|
allow_methods=Config.CORS_METHODS, |
|
|
allow_headers=Config.CORS_HEADERS, |
|
|
) |
|
|
|
|
|
|
|
|
if os.path.exists("templates"): |
|
|
app.mount("/static", StaticFiles(directory="templates"), name="static") |
|
|
|
|
|
|
|
|
if os.path.exists("templates/index.html"): |
|
|
from fastapi.responses import HTMLResponse |
|
|
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
|
async def read_index(): |
|
|
with open("templates/index.html", "r", encoding="utf-8") as f: |
|
|
return f.read() |
|
|
|
|
|
|
|
|
class ConversionRequest(BaseModel): |
|
|
"""Request model for base64 conversion""" |
|
|
file_content: str |
|
|
filename: str |
|
|
|
|
|
class BatchConversionRequest(BaseModel): |
|
|
"""Request model for batch conversion""" |
|
|
files: List[ConversionRequest] |
|
|
|
|
|
class ConversionResponse(BaseModel): |
|
|
"""Response model for conversion results""" |
|
|
success: bool |
|
|
pdf_url: Optional[str] = None |
|
|
message: Optional[str] = None |
|
|
error: Optional[str] = None |
|
|
|
|
|
@app.on_event("startup") |
|
|
async def startup_event(): |
|
|
"""Initialize application on startup""" |
|
|
logger.info("Starting Enhanced DOCX to PDF Converter...") |
|
|
|
|
|
|
|
|
if not converter.validate_libreoffice(): |
|
|
logger.warning("LibreOffice validation failed - conversions may not work") |
|
|
|
|
|
|
|
|
os.makedirs(Config.TEMP_DIR, exist_ok=True) |
|
|
|
|
|
logger.info("Application started successfully") |
|
|
|
|
|
@app.get("/health") |
|
|
async def health_check(): |
|
|
"""Health check endpoint""" |
|
|
return {"status": "healthy", "version": Config.API_VERSION} |
|
|
|
|
|
@app.post("/convert", response_model=ConversionResponse) |
|
|
async def convert_docx( |
|
|
background_tasks: BackgroundTasks, |
|
|
file: Optional[UploadFile] = File(None), |
|
|
file_content: Optional[str] = Form(None), |
|
|
filename: Optional[str] = Form(None) |
|
|
): |
|
|
""" |
|
|
Convert DOCX to PDF |
|
|
|
|
|
Supports two input methods: |
|
|
1. Multipart file upload (file parameter) |
|
|
2. Base64 encoded content (file_content and filename parameters) |
|
|
""" |
|
|
temp_dir = None |
|
|
input_path = None |
|
|
output_path = None |
|
|
|
|
|
try: |
|
|
|
|
|
temp_dir = file_handler.create_temp_directory() |
|
|
|
|
|
|
|
|
if file and file.filename: |
|
|
|
|
|
if file.size and file.size > Config.MAX_FILE_SIZE: |
|
|
raise HTTPException(status_code=413, detail="File too large") |
|
|
|
|
|
|
|
|
if not file_handler.validate_file_extension(file.filename, Config.ALLOWED_EXTENSIONS): |
|
|
raise HTTPException(status_code=400, detail="Invalid file type") |
|
|
|
|
|
|
|
|
content = await file.read() |
|
|
input_path = file_handler.save_uploaded_file(temp_dir, file.filename, content) |
|
|
|
|
|
|
|
|
elif file_content and filename: |
|
|
|
|
|
if not file_handler.validate_file_extension(filename, Config.ALLOWED_EXTENSIONS): |
|
|
raise HTTPException(status_code=400, detail="Filename must have .docx extension") |
|
|
|
|
|
|
|
|
file_data = converter.decode_base64_content(file_content) |
|
|
if file_data is None: |
|
|
raise HTTPException(status_code=400, detail="Invalid base64 content") |
|
|
|
|
|
|
|
|
input_path = file_handler.save_uploaded_file(temp_dir, filename, file_data) |
|
|
|
|
|
else: |
|
|
raise HTTPException(status_code=400, detail="Either file or file_content+filename must be provided") |
|
|
|
|
|
|
|
|
output_filename = os.path.splitext(os.path.basename(input_path))[0] + ".pdf" |
|
|
output_path = os.path.join(temp_dir, output_filename) |
|
|
|
|
|
|
|
|
if not converter.convert_docx_to_pdf(input_path, output_path): |
|
|
raise HTTPException(status_code=500, detail="Conversion failed") |
|
|
|
|
|
|
|
|
pdf_url = f"/download/{os.path.basename(temp_dir)}/{output_filename}" |
|
|
return ConversionResponse( |
|
|
success=True, |
|
|
pdf_url=pdf_url, |
|
|
message="Conversion successful" |
|
|
) |
|
|
|
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"Conversion error: {e}") |
|
|
raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}") |
|
|
finally: |
|
|
|
|
|
pass |
|
|
|
|
|
@app.get("/download/{temp_id}/{filename}") |
|
|
async def download_pdf(temp_id: str, filename: str): |
|
|
"""Download converted PDF file""" |
|
|
try: |
|
|
file_path = f"{Config.TEMP_DIR}/{temp_id}/{filename}" |
|
|
|
|
|
if not os.path.exists(file_path): |
|
|
raise HTTPException(status_code=404, detail="File not found") |
|
|
|
|
|
return FileResponse( |
|
|
path=file_path, |
|
|
filename=filename, |
|
|
media_type='application/pdf' |
|
|
) |
|
|
except HTTPException: |
|
|
raise |
|
|
except Exception as e: |
|
|
logger.error(f"Download error: {e}") |
|
|
raise HTTPException(status_code=500, detail="Download failed") |
|
|
|
|
|
@app.post("/convert/batch", response_model=List[ConversionResponse]) |
|
|
async def batch_convert(request: BatchConversionRequest): |
|
|
""" |
|
|
Batch convert multiple DOCX files to PDF |
|
|
""" |
|
|
results = [] |
|
|
|
|
|
for file_req in request.files: |
|
|
try: |
|
|
|
|
|
temp_dir = file_handler.create_temp_directory() |
|
|
|
|
|
|
|
|
file_data = converter.decode_base64_content(file_req.file_content) |
|
|
if file_data is None: |
|
|
results.append(ConversionResponse( |
|
|
success=False, |
|
|
error="Invalid base64 content" |
|
|
)) |
|
|
continue |
|
|
|
|
|
|
|
|
input_path = file_handler.save_uploaded_file(temp_dir, file_req.filename, file_data) |
|
|
|
|
|
|
|
|
if not file_handler.validate_file_extension(file_req.filename, Config.ALLOWED_EXTENSIONS): |
|
|
results.append(ConversionResponse( |
|
|
success=False, |
|
|
error="Invalid file content" |
|
|
)) |
|
|
continue |
|
|
|
|
|
|
|
|
output_filename = os.path.splitext(os.path.basename(input_path))[0] + ".pdf" |
|
|
output_path = os.path.join(temp_dir, output_filename) |
|
|
|
|
|
|
|
|
if converter.convert_docx_to_pdf(input_path, output_path): |
|
|
pdf_url = f"/download/{os.path.basename(temp_dir)}/{output_filename}" |
|
|
results.append(ConversionResponse( |
|
|
success=True, |
|
|
pdf_url=pdf_url, |
|
|
message="Conversion successful" |
|
|
)) |
|
|
else: |
|
|
results.append(ConversionResponse( |
|
|
success=False, |
|
|
error="Conversion failed" |
|
|
)) |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Batch conversion error: {e}") |
|
|
results.append(ConversionResponse( |
|
|
success=False, |
|
|
error=str(e) |
|
|
)) |
|
|
|
|
|
return results |