PD-Tools / routers /pdf_compressor.py
um41r's picture
Update routers/pdf_compressor.py
223ca22 verified
from fastapi import APIRouter, File, UploadFile, HTTPException, Query, BackgroundTasks
from fastapi.responses import FileResponse
import os
import tempfile
import subprocess
import shutil
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
router = APIRouter()
TEMP_DIR = "/tmp/conversions"
os.makedirs(TEMP_DIR, exist_ok=True)
def cleanup_files(*paths):
for path in paths:
try:
if os.path.exists(path):
os.remove(path)
except Exception:
pass
@router.post("/compress")
async def compress_pdf(
background_tasks: BackgroundTasks,
file: UploadFile = File(...),
quality: str = Query("medium", enum=["low", "medium", "high"])
):
if not shutil.which("gs"):
logger.error("Ghostscript not found! Please update Dockerfile.")
raise HTTPException(status_code=500, detail="Server Configuration Error: Ghostscript is not installed.")
if not file.filename.endswith('.pdf'):
raise HTTPException(status_code=400, detail="Only PDF files are allowed")
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
input_path = tmp_in.name
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_out:
output_path = tmp_out.name
try:
content = await file.read()
with open(input_path, 'wb') as f:
f.write(content)
# === FIXED SETTINGS ===
# We removed the manual 'Resolution' flags because they conflict
# with PDFSETTINGS in Ghostscript v10+, causing the crash.
settings = {
"low": {
"pdf_settings": "/screen", # Forces 72 DPI
"extra_flags": [
"-dDownsampleColorImages=true",
"-dDownsampleGrayImages=true",
"-dDownsampleMonoImages=true",
# We remove the explicit resolution numbers to avoid 'rangecheck' errors
]
},
"medium": {
"pdf_settings": "/ebook", # Forces 150 DPI
"extra_flags": [
"-dDownsampleColorImages=true",
"-dDownsampleGrayImages=true",
"-dDownsampleMonoImages=true",
]
},
"high": {
"pdf_settings": "/printer", # Forces 300 DPI
"extra_flags": []
}
}
selected = settings.get(quality)
# Construct Command
cmd = [
"gs",
"-dSAFER",
"-sDEVICE=pdfwrite",
"-dCompatibilityLevel=1.4",
f"-dPDFSETTINGS={selected['pdf_settings']}",
"-dNOPAUSE",
"-dBATCH",
f"-sOutputFile={output_path}"
]
# Add extra flags
cmd.extend(selected["extra_flags"])
cmd.append(input_path)
logger.info(f"Running command: {' '.join(cmd)}")
# Run Ghostscript
process = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, # Merge output to catch errors
text=True
)
if process.returncode != 0:
logger.error(f"Ghostscript Error Output:\n{process.stdout}")
raise Exception("Ghostscript processing failed. Check logs.")
# Check Output
if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
raise Exception("Compression resulted in an empty file.")
original_size = os.path.getsize(input_path)
compressed_size = os.path.getsize(output_path)
# === FAILSAFE ===
# If the file got bigger, return the original file instead of the bad one.
final_path = output_path
final_size = compressed_size
output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
if compressed_size >= original_size:
logger.info(f"Compression ineffective ({compressed_size} > {original_size}). Returning original.")
final_path = input_path
final_size = original_size
output_filename = f"{os.path.splitext(file.filename)[0]}_original.pdf"
background_tasks.add_task(cleanup_files, output_path) # Delete the useless compressed file
else:
background_tasks.add_task(cleanup_files, input_path) # Delete input file
# Ensure cleanup runs eventually
background_tasks.add_task(cleanup_files, input_path, output_path)
reduction = ((original_size - final_size) / original_size) * 100
return FileResponse(
final_path,
media_type="application/pdf",
filename=output_filename,
headers={
"X-Original-Size": str(original_size),
"X-Compressed-Size": str(final_size),
"X-Size-Reduction": f"{reduction:.2f}%"
}
)
except Exception as e:
logger.exception("Server Error")
cleanup_files(input_path, output_path)
raise HTTPException(status_code=500, detail=str(e))