um41r commited on
Commit
efaec81
·
verified ·
1 Parent(s): 770891a

Update routers/pdf_compressor.py

Browse files
Files changed (1) hide show
  1. routers/pdf_compressor.py +81 -30
routers/pdf_compressor.py CHANGED
@@ -7,12 +7,10 @@ import shutil
7
 
8
  router = APIRouter()
9
 
10
- # Hugging Face spaces allow writing to /tmp
11
  TEMP_DIR = "/tmp/conversions"
12
  os.makedirs(TEMP_DIR, exist_ok=True)
13
 
14
  def cleanup_files(*paths):
15
- """Function to run in background to delete temp files"""
16
  for path in paths:
17
  try:
18
  if os.path.exists(path):
@@ -29,8 +27,7 @@ async def compress_pdf(
29
  if not file.filename.endswith('.pdf'):
30
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
31
 
32
- # Create temporary file paths
33
- # We don't keep the file handle open, just reserve the name
34
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
35
  input_path = tmp_in.name
36
 
@@ -43,59 +40,113 @@ async def compress_pdf(
43
  content = await file.read()
44
  f.write(content)
45
 
46
- # Ghostscript settings
47
- # /screen = 72 dpi (Low Quality / High Compression)
48
- # /ebook = 150 dpi (Medium Quality)
49
- # /printer = 300 dpi (High Quality / Low Compression)
50
- quality_map = {
51
- "low": "/screen",
52
- "medium": "/ebook",
53
- "high": "/printer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
- gs_setting = quality_map.get(quality, "/ebook")
56
 
57
- # Run Ghostscript via subprocess
 
 
58
  cmd = [
59
  "gs",
60
  "-sDEVICE=pdfwrite",
61
  "-dCompatibilityLevel=1.4",
62
- f"-dPDFSETTINGS={gs_setting}",
63
- "-dNOPAUSE", "-dQUIET", "-dBATCH",
 
 
64
  f"-sOutputFile={output_path}",
65
- input_path
66
  ]
67
 
68
- # Execute command
 
 
 
 
 
 
69
  process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
70
 
71
  if process.returncode != 0:
72
  raise Exception(f"Ghostscript error: {process.stderr.decode()}")
73
 
74
- # Calculate stats
 
 
75
  original_size = os.path.getsize(input_path)
76
  compressed_size = os.path.getsize(output_path)
77
 
78
- if compressed_size == 0:
79
- raise Exception("Compression resulted in empty file")
80
-
81
- reduction = ((original_size - compressed_size) / original_size) * 100
82
- output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
83
-
84
- # Add cleanup task to run AFTER response is sent
85
- background_tasks.add_task(cleanup_files, input_path, output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  return FileResponse(
88
- output_path,
89
  media_type="application/pdf",
90
  filename=output_filename,
91
  headers={
92
  "X-Original-Size": str(original_size),
93
- "X-Compressed-Size": str(compressed_size),
94
  "X-Size-Reduction": f"{reduction:.2f}%"
95
  }
96
  )
97
 
98
  except Exception as e:
99
- # Clean up immediately if there was an error
100
  cleanup_files(input_path, output_path)
101
  raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 
7
 
8
  router = APIRouter()
9
 
 
10
  TEMP_DIR = "/tmp/conversions"
11
  os.makedirs(TEMP_DIR, exist_ok=True)
12
 
13
  def cleanup_files(*paths):
 
14
  for path in paths:
15
  try:
16
  if os.path.exists(path):
 
27
  if not file.filename.endswith('.pdf'):
28
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
29
 
30
+ # Temp file setup
 
31
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
32
  input_path = tmp_in.name
33
 
 
40
  content = await file.read()
41
  f.write(content)
42
 
43
+ # === AGGRESSIVE SETTINGS ===
44
+ # We explicitly set DPI and JPEG Quality instead of relying on presets
45
+
46
+ settings = {
47
+ "low": {
48
+ "dpi": "72",
49
+ "pdf_settings": "/screen",
50
+ # Force heavier JPEG compression (Color and Grayscale)
51
+ "extra_flags": [
52
+ "-dColorImageDownsampleType=/Bicubic",
53
+ "-dColorImageResolution=72",
54
+ "-dGrayImageDownsampleType=/Bicubic",
55
+ "-dGrayImageResolution=72",
56
+ "-dMonoImageDownsampleType=/Bicubic",
57
+ "-dMonoImageResolution=72",
58
+ "-dAutoFilterColorImages=false", # Disable auto filter to force JPEG
59
+ "-dColorImageFilter=/DCTDecode", # Force JPEG
60
+ "-dEncodeColorImages=true",
61
+ "-dColorImageDepth=-1",
62
+ "-dDownsampleColorImages=true"
63
+ ]
64
+ },
65
+ "medium": {
66
+ "dpi": "150",
67
+ "pdf_settings": "/ebook",
68
+ "extra_flags": [
69
+ "-dColorImageDownsampleType=/Bicubic",
70
+ "-dColorImageResolution=150",
71
+ "-dGrayImageDownsampleType=/Bicubic",
72
+ "-dGrayImageResolution=150",
73
+ "-dDownsampleColorImages=true"
74
+ ]
75
+ },
76
+ "high": {
77
+ "dpi": "300",
78
+ "pdf_settings": "/printer",
79
+ "extra_flags": [] # High quality usually doesn't need forced downsampling beyond preset
80
+ }
81
  }
 
82
 
83
+ selected_setting = settings.get(quality)
84
+
85
+ # Base Command
86
  cmd = [
87
  "gs",
88
  "-sDEVICE=pdfwrite",
89
  "-dCompatibilityLevel=1.4",
90
+ f"-dPDFSETTINGS={selected_setting['pdf_settings']}",
91
+ "-dNOPAUSE",
92
+ "-dQUIET",
93
+ "-dBATCH",
94
  f"-sOutputFile={output_path}",
 
95
  ]
96
 
97
+ # Add Aggressive Flags
98
+ cmd.extend(selected_setting["extra_flags"])
99
+
100
+ # Add Input
101
+ cmd.append(input_path)
102
+
103
+ # Execute
104
  process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
105
 
106
  if process.returncode != 0:
107
  raise Exception(f"Ghostscript error: {process.stderr.decode()}")
108
 
109
+ # === FAIL-SAFE CHECK ===
110
+ # If compression made it BIGGER (happens with already optimized files),
111
+ # return the original file instead.
112
  original_size = os.path.getsize(input_path)
113
  compressed_size = os.path.getsize(output_path)
114
 
115
+ final_path = output_path
116
+ final_size = compressed_size
117
+
118
+ # If compression failed to reduce size (or made it bigger), send original
119
+ is_smaller = compressed_size < original_size
120
+
121
+ if not is_smaller:
122
+ # If the "compressed" file is bigger, discard it and return original
123
+ final_path = input_path
124
+ final_size = original_size
125
+ reduction = 0.00
126
+ # Delete the useless "compressed" file
127
+ try:
128
+ os.remove(output_path)
129
+ except:
130
+ pass
131
+ # We don't delete input_path in background task yet, because we are sending it!
132
+ output_filename = f"{os.path.splitext(file.filename)[0]}_original.pdf"
133
+ background_tasks.add_task(cleanup_files, input_path)
134
+ else:
135
+ reduction = ((original_size - compressed_size) / original_size) * 100
136
+ output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
137
+ background_tasks.add_task(cleanup_files, input_path, output_path)
138
 
139
  return FileResponse(
140
+ final_path,
141
  media_type="application/pdf",
142
  filename=output_filename,
143
  headers={
144
  "X-Original-Size": str(original_size),
145
+ "X-Compressed-Size": str(final_size),
146
  "X-Size-Reduction": f"{reduction:.2f}%"
147
  }
148
  )
149
 
150
  except Exception as e:
 
151
  cleanup_files(input_path, output_path)
152
  raise HTTPException(status_code=500, detail=f"Error: {str(e)}")