Spaces:

um41r
/

PD-Tools

Running

App Files Files Community

um41r commited on Jan 29

Commit

ae4f5c7

verified ·

1 Parent(s): 1864842

Update routers/pdf_compressor.py

Browse files

Files changed (1) hide show

routers/pdf_compressor.py +70 -46

routers/pdf_compressor.py CHANGED Viewed

@@ -1,62 +1,91 @@
-from fastapi import APIRouter, File, UploadFile, HTTPException, Query
 from fastapi.responses import FileResponse
 import os
 import tempfile
-from PyPDF2 import PdfReader, PdfWriter
 router = APIRouter()
 TEMP_DIR = "/tmp/conversions"
 @router.post("/compress")
 async def compress_pdf(
     file: UploadFile = File(...),
     quality: str = Query("medium", enum=["low", "medium", "high"])
 ):
-    """
-    Compress PDF file
-    Quality levels:
-    - low: Maximum compression (smaller file, lower quality)
-    - medium: Balanced compression
-    - high: Minimum compression (larger file, higher quality)
-    """
     if not file.filename.endswith('.pdf'):
         raise HTTPException(status_code=400, detail="Only PDF files are allowed")
-    temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR)
-    temp_compressed = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR)
     try:
-        content = await file.read()
-        temp_pdf.write(content)
-        temp_pdf.close()
-        reader = PdfReader(temp_pdf.name)
-        writer = PdfWriter()
-        # Add all pages
-        for page in reader.pages:
-            writer.add_page(page)
-        # Apply compression based on quality
-        for page in writer.pages:
-            page.compress_content_streams()
-        # Write compressed PDF
-        with open(temp_compressed.name, 'wb') as output_file:
-            writer.write(output_file)
-        original_name = os.path.splitext(file.filename)[0]
-        output_filename = f"{original_name}_compressed.pdf"
-        # Get file sizes for comparison
-        original_size = os.path.getsize(temp_pdf.name)
-        compressed_size = os.path.getsize(temp_compressed.name)
         reduction = ((original_size - compressed_size) / original_size) * 100
         return FileResponse(
-            temp_compressed.name,
             media_type="application/pdf",
             filename=output_filename,
             headers={
@@ -65,13 +94,8 @@ async def compress_pdf(
                 "X-Size-Reduction": f"{reduction:.2f}%"
             }
         )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Compression failed: {str(e)}")
-    finally:
-        if os.path.exists(temp_pdf.name):
-            try:
-                os.unlink(temp_pdf.name)
-            except:
-                pass

+from fastapi import APIRouter, File, UploadFile, HTTPException, Query, BackgroundTasks
 from fastapi.responses import FileResponse
 import os
 import tempfile
+import subprocess
+import shutil
 router = APIRouter()
+# Hugging Face spaces allow writing to /tmp
 TEMP_DIR = "/tmp/conversions"
+os.makedirs(TEMP_DIR, exist_ok=True)
+def cleanup_files(*paths):
+    """Function to run in background to delete temp files"""
+    for path in paths:
+        try:
+            if os.path.exists(path):
+                os.remove(path)
+        except Exception:
+            pass
 @router.post("/compress")
 async def compress_pdf(
+    background_tasks: BackgroundTasks,
     file: UploadFile = File(...),
     quality: str = Query("medium", enum=["low", "medium", "high"])
 ):
     if not file.filename.endswith('.pdf'):
         raise HTTPException(status_code=400, detail="Only PDF files are allowed")
+    # Create temporary file paths
+    # We don't keep the file handle open, just reserve the name
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
+        input_path = tmp_in.name
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_out:
+        output_path = tmp_out.name
     try:
+        # Save uploaded file
+        with open(input_path, 'wb') as f:
+            content = await file.read()
+            f.write(content)
+        # Ghostscript settings
+        # /screen = 72 dpi (Low Quality / High Compression)
+        # /ebook = 150 dpi (Medium Quality)
+        # /printer = 300 dpi (High Quality / Low Compression)
+        quality_map = {
+            "low": "/screen",
+            "medium": "/ebook",
+            "high": "/printer"
+        }
+        gs_setting = quality_map.get(quality, "/ebook")
+        # Run Ghostscript via subprocess
+        cmd = [
+            "gs",
+            "-sDEVICE=pdfwrite",
+            "-dCompatibilityLevel=1.4",
+            f"-dPDFSETTINGS={gs_setting}",
+            "-dNOPAUSE", "-dQUIET", "-dBATCH",
+            f"-sOutputFile={output_path}",
+            input_path
+        ]
+        # Execute command
+        process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        if process.returncode != 0:
+            raise Exception(f"Ghostscript error: {process.stderr.decode()}")
+        # Calculate stats
+        original_size = os.path.getsize(input_path)
+        compressed_size = os.path.getsize(output_path)
+        if compressed_size == 0:
+            raise Exception("Compression resulted in empty file")
         reduction = ((original_size - compressed_size) / original_size) * 100
+        output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
+        # Add cleanup task to run AFTER response is sent
+        background_tasks.add_task(cleanup_files, input_path, output_path)
         return FileResponse(
+            output_path,
             media_type="application/pdf",
             filename=output_filename,
             headers={
                 "X-Size-Reduction": f"{reduction:.2f}%"
             }
         )
     except Exception as e:
+        # Clean up immediately if there was an error
+        cleanup_files(input_path, output_path)
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")