Spaces:

um41r
/

PD-Tools

Running

App Files Files Community

um41r commited on Jan 29

Commit

efaec81

verified ·

1 Parent(s): 770891a

Update routers/pdf_compressor.py

Browse files

Files changed (1) hide show

routers/pdf_compressor.py +81 -30

routers/pdf_compressor.py CHANGED Viewed

@@ -7,12 +7,10 @@ import shutil
 router = APIRouter()
-# Hugging Face spaces allow writing to /tmp
 TEMP_DIR = "/tmp/conversions"
 os.makedirs(TEMP_DIR, exist_ok=True)
 def cleanup_files(*paths):
-    """Function to run in background to delete temp files"""
     for path in paths:
         try:
             if os.path.exists(path):
@@ -29,8 +27,7 @@ async def compress_pdf(
     if not file.filename.endswith('.pdf'):
         raise HTTPException(status_code=400, detail="Only PDF files are allowed")
-    # Create temporary file paths
-    # We don't keep the file handle open, just reserve the name
     with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
         input_path = tmp_in.name
@@ -43,59 +40,113 @@ async def compress_pdf(
             content = await file.read()
             f.write(content)
-        # Ghostscript settings
-        # /screen = 72 dpi (Low Quality / High Compression)
-        # /ebook = 150 dpi (Medium Quality)
-        # /printer = 300 dpi (High Quality / Low Compression)
-        quality_map = {
-            "low": "/screen",
-            "medium": "/ebook",
-            "high": "/printer"
         }
-        gs_setting = quality_map.get(quality, "/ebook")
-        # Run Ghostscript via subprocess
         cmd = [
             "gs",
             "-sDEVICE=pdfwrite",
             "-dCompatibilityLevel=1.4",
-            f"-dPDFSETTINGS={gs_setting}",
-            "-dNOPAUSE", "-dQUIET", "-dBATCH",
             f"-sOutputFile={output_path}",
-            input_path
         ]
-        # Execute command
         process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         if process.returncode != 0:
             raise Exception(f"Ghostscript error: {process.stderr.decode()}")
-        # Calculate stats
         original_size = os.path.getsize(input_path)
         compressed_size = os.path.getsize(output_path)
-        if compressed_size == 0:
-            raise Exception("Compression resulted in empty file")
-        reduction = ((original_size - compressed_size) / original_size) * 100
-        output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
-        # Add cleanup task to run AFTER response is sent
-        background_tasks.add_task(cleanup_files, input_path, output_path)
         return FileResponse(
-            output_path,
             media_type="application/pdf",
             filename=output_filename,
             headers={
                 "X-Original-Size": str(original_size),
-                "X-Compressed-Size": str(compressed_size),
                 "X-Size-Reduction": f"{reduction:.2f}%"
             }
         )
     except Exception as e:
-        # Clean up immediately if there was an error
         cleanup_files(input_path, output_path)
         raise HTTPException(status_code=500, detail=f"Error: {str(e)}")

 router = APIRouter()
 TEMP_DIR = "/tmp/conversions"
 os.makedirs(TEMP_DIR, exist_ok=True)
 def cleanup_files(*paths):
     for path in paths:
         try:
             if os.path.exists(path):
     if not file.filename.endswith('.pdf'):
         raise HTTPException(status_code=400, detail="Only PDF files are allowed")
+    # Temp file setup
     with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
         input_path = tmp_in.name
             content = await file.read()
             f.write(content)
+        # === AGGRESSIVE SETTINGS ===
+        # We explicitly set DPI and JPEG Quality instead of relying on presets
+        settings = {
+            "low": {
+                "dpi": "72",
+                "pdf_settings": "/screen",
+                # Force heavier JPEG compression (Color and Grayscale)
+                "extra_flags": [
+                    "-dColorImageDownsampleType=/Bicubic",
+                    "-dColorImageResolution=72",
+                    "-dGrayImageDownsampleType=/Bicubic",
+                    "-dGrayImageResolution=72",
+                    "-dMonoImageDownsampleType=/Bicubic",
+                    "-dMonoImageResolution=72",
+                    "-dAutoFilterColorImages=false", # Disable auto filter to force JPEG
+                    "-dColorImageFilter=/DCTDecode", # Force JPEG
+                    "-dEncodeColorImages=true",
+                    "-dColorImageDepth=-1",
+                    "-dDownsampleColorImages=true"
+                ]
+            },
+            "medium": {
+                "dpi": "150",
+                "pdf_settings": "/ebook",
+                "extra_flags": [
+                    "-dColorImageDownsampleType=/Bicubic",
+                    "-dColorImageResolution=150",
+                    "-dGrayImageDownsampleType=/Bicubic",
+                    "-dGrayImageResolution=150",
+                    "-dDownsampleColorImages=true"
+                ]
+            },
+            "high": {
+                "dpi": "300",
+                "pdf_settings": "/printer",
+                "extra_flags": [] # High quality usually doesn't need forced downsampling beyond preset
+            }
         }
+        selected_setting = settings.get(quality)
+        # Base Command
         cmd = [
             "gs",
             "-sDEVICE=pdfwrite",
             "-dCompatibilityLevel=1.4",
+            f"-dPDFSETTINGS={selected_setting['pdf_settings']}",
+            "-dNOPAUSE",
+            "-dQUIET",
+            "-dBATCH",
             f"-sOutputFile={output_path}",
         ]
+        # Add Aggressive Flags
+        cmd.extend(selected_setting["extra_flags"])
+        # Add Input
+        cmd.append(input_path)
+        # Execute
         process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         if process.returncode != 0:
             raise Exception(f"Ghostscript error: {process.stderr.decode()}")
+        # === FAIL-SAFE CHECK ===
+        # If compression made it BIGGER (happens with already optimized files),
+        # return the original file instead.
         original_size = os.path.getsize(input_path)
         compressed_size = os.path.getsize(output_path)
+        final_path = output_path
+        final_size = compressed_size
+        # If compression failed to reduce size (or made it bigger), send original
+        is_smaller = compressed_size < original_size
+        if not is_smaller:
+            # If the "compressed" file is bigger, discard it and return original
+            final_path = input_path
+            final_size = original_size
+            reduction = 0.00
+            # Delete the useless "compressed" file
+            try:
+                os.remove(output_path)
+            except:
+                pass
+            # We don't delete input_path in background task yet, because we are sending it!
+            output_filename = f"{os.path.splitext(file.filename)[0]}_original.pdf"
+            background_tasks.add_task(cleanup_files, input_path)
+        else:
+            reduction = ((original_size - compressed_size) / original_size) * 100
+            output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
+            background_tasks.add_task(cleanup_files, input_path, output_path)
         return FileResponse(
+            final_path,
             media_type="application/pdf",
             filename=output_filename,
             headers={
                 "X-Original-Size": str(original_size),
+                "X-Compressed-Size": str(final_size),
                 "X-Size-Reduction": f"{reduction:.2f}%"
             }
         )
     except Exception as e:
         cleanup_files(input_path, output_path)
         raise HTTPException(status_code=500, detail=f"Error: {str(e)}")