pdf1 / app /main.py
kalhdrawi's picture
Upload main.py
7355768 verified
import os
import shutil
import tempfile
import uuid
from pathlib import Path
from fastapi import FastAPI, File, UploadFile, Request, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from app.config import is_allowed_extension
from app.converters import convert_to_pdf
app = FastAPI(title="Word to PDF Converter")
# Setup paths
BASE_DIR = Path(__file__).resolve().parent
TEMPLATES_DIR = BASE_DIR / "templates"
STATIC_DIR = BASE_DIR / "static"
# Mount static files
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
# Mount PDF storage dir
PDF_DIR = STATIC_DIR / "pdfs"
PDF_DIR.mkdir(parents=True, exist_ok=True)
app.mount("/pdfs", StaticFiles(directory=str(PDF_DIR)), name="pdfs")
# Setup templates
templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
@app.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
"""Render the homepage."""
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/api/convert")
async def convert_file(request: Request, file: UploadFile = File(...)):
"""
Handle file upload and conversion.
Returns: JSON with URL to the converted PDF.
"""
if not is_allowed_extension(file.filename):
raise HTTPException(
status_code=400,
detail=f"Invalid file type. Allowed: .doc, .docx, .odt, .rtf"
)
# Unique ID for request
request_id = str(uuid.uuid4())
# PERFORMANCE HACK: Use /dev/shm (RAM Disk) if available for extreme speed
# This avoids writing the input file to the slow container disk
ram_disk = "/dev/shm"
if os.path.exists(ram_disk) and os.access(ram_disk, os.W_OK):
base_tmp_dir = ram_disk
else:
base_tmp_dir = None # Default to /tmp handling
tmp_dir = tempfile.mkdtemp(dir=base_tmp_dir)
try:
# Save uploaded file to temp dir (RAM)
input_path = os.path.join(tmp_dir, file.filename)
with open(input_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
# Convert to PDF (Reading/Writing from RAM)
pdf_path = convert_to_pdf(input_path, tmp_dir)
# Move final PDF to public static dir (Disk)
# Only the final result touches the persistent disk
final_filename = f"{request_id}_{os.path.basename(pdf_path)}"
final_path = PDF_DIR / final_filename
shutil.move(pdf_path, final_path)
# Generate Full URL
# Force HTTPS for Hugging Face Spaces
base_url = str(request.base_url).rstrip("/")
if "hf.space" in base_url and base_url.startswith("http://"):
base_url = base_url.replace("http://", "https://")
full_url = f"{base_url}/pdfs/{final_filename}"
return JSONResponse({
"status": "success",
"url": full_url,
"filename": final_filename
})
except Exception as e:
print(f"Conversion Error: {e}")
raise HTTPException(status_code=500, detail=str(e))
finally:
# Cleanup temp directory (input file)
shutil.rmtree(tmp_dir, ignore_errors=True)