um41r commited on
Commit
223ca22
·
verified ·
1 Parent(s): c51c2d8

Update routers/pdf_compressor.py

Browse files
Files changed (1) hide show
  1. routers/pdf_compressor.py +29 -47
routers/pdf_compressor.py CHANGED
@@ -12,7 +12,6 @@ logger = logging.getLogger(__name__)
12
 
13
  router = APIRouter()
14
 
15
- # Use standard /tmp directory which is always writable in Docker
16
  TEMP_DIR = "/tmp/conversions"
17
  os.makedirs(TEMP_DIR, exist_ok=True)
18
 
@@ -30,7 +29,6 @@ async def compress_pdf(
30
  file: UploadFile = File(...),
31
  quality: str = Query("medium", enum=["low", "medium", "high"])
32
  ):
33
- # 1. Check for Ghostscript
34
  if not shutil.which("gs"):
35
  logger.error("Ghostscript not found! Please update Dockerfile.")
36
  raise HTTPException(status_code=500, detail="Server Configuration Error: Ghostscript is not installed.")
@@ -38,7 +36,6 @@ async def compress_pdf(
38
  if not file.filename.endswith('.pdf'):
39
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
40
 
41
- # Create paths
42
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
43
  input_path = tmp_in.name
44
 
@@ -46,45 +43,40 @@ async def compress_pdf(
46
  output_path = tmp_out.name
47
 
48
  try:
49
- # Write input file
50
  content = await file.read()
51
  with open(input_path, 'wb') as f:
52
  f.write(content)
53
 
54
- # Settings
 
 
55
  settings = {
56
  "low": {
57
- "pdf_settings": "/screen",
58
  "extra_flags": [
59
- "-dColorImageDownsampleType=/Bicubic",
60
- "-dColorImageResolution=72",
61
- "-dGrayImageDownsampleType=/Bicubic",
62
- "-dGrayImageResolution=72",
63
- "-dAutoFilterColorImages=false",
64
- "-dColorImageFilter=/DCTDecode",
65
- "-dEncodeColorImages=true"
66
  ]
67
  },
68
  "medium": {
69
- "pdf_settings": "/ebook",
70
  "extra_flags": [
71
- "-dColorImageDownsampleType=/Bicubic",
72
- "-dColorImageResolution=150",
73
- "-dGrayImageDownsampleType=/Bicubic",
74
- "-dGrayImageResolution=150"
75
  ]
76
  },
77
  "high": {
78
- "pdf_settings": "/printer",
79
  "extra_flags": []
80
  }
81
  }
82
 
83
  selected = settings.get(quality)
84
 
85
- # Build Command
86
- # REMOVED "-dQUIET" so we can see errors
87
- # ADDED "-dSAFER" for security/compatibility
88
  cmd = [
89
  "gs",
90
  "-dSAFER",
@@ -96,57 +88,48 @@ async def compress_pdf(
96
  f"-sOutputFile={output_path}"
97
  ]
98
 
 
99
  cmd.extend(selected["extra_flags"])
100
  cmd.append(input_path)
101
 
102
  logger.info(f"Running command: {' '.join(cmd)}")
103
 
104
- # Run Subprocess
105
- # Combined stdout and stderr to capture the actual error
106
  process = subprocess.run(
107
  cmd,
108
  stdout=subprocess.PIPE,
109
- stderr=subprocess.STDOUT, # Merge stderr into stdout
110
  text=True
111
  )
112
 
113
- # Log the output regardless of success/failure for debugging
114
- if process.stdout:
115
- logger.info(f"GS Output: {process.stdout[:500]}...") # Log first 500 chars
116
-
117
  if process.returncode != 0:
118
- logger.error(f"Ghostscript Failed with code {process.returncode}")
119
- logger.error(f"Full Error Log: {process.stdout}")
120
- raise Exception(f"Ghostscript failed. Check server logs for details.")
121
 
122
- # Check File Sizes
123
  if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
124
- raise Exception("Ghostscript produced an empty file.")
125
 
126
  original_size = os.path.getsize(input_path)
127
  compressed_size = os.path.getsize(output_path)
128
 
129
- # Failsafe: Return original if compression failed to reduce size
 
130
  final_path = output_path
131
  final_size = compressed_size
132
  output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
133
 
134
  if compressed_size >= original_size:
135
- logger.info("Compression did not reduce size. Returning original.")
136
  final_path = input_path
137
  final_size = original_size
138
  output_filename = f"{os.path.splitext(file.filename)[0]}_original.pdf"
139
- background_tasks.add_task(cleanup_files, output_path)
140
  else:
141
- background_tasks.add_task(cleanup_files, input_path)
142
 
143
- # Ensure we clean up whatever we send eventually
144
- # Note: We can't delete the file we are sending immediately,
145
- # so we rely on background_tasks or OS cleanup
146
- if final_path == output_path:
147
- background_tasks.add_task(cleanup_files, input_path, output_path)
148
- else:
149
- background_tasks.add_task(cleanup_files, input_path, output_path)
150
 
151
  reduction = ((original_size - final_size) / original_size) * 100
152
 
@@ -162,8 +145,7 @@ async def compress_pdf(
162
  )
163
 
164
  except Exception as e:
165
- logger.exception("Compression Exception")
166
  cleanup_files(input_path, output_path)
167
- # Return the actual error message to the frontend for debugging
168
  raise HTTPException(status_code=500, detail=str(e))
169
 
 
12
 
13
  router = APIRouter()
14
 
 
15
  TEMP_DIR = "/tmp/conversions"
16
  os.makedirs(TEMP_DIR, exist_ok=True)
17
 
 
29
  file: UploadFile = File(...),
30
  quality: str = Query("medium", enum=["low", "medium", "high"])
31
  ):
 
32
  if not shutil.which("gs"):
33
  logger.error("Ghostscript not found! Please update Dockerfile.")
34
  raise HTTPException(status_code=500, detail="Server Configuration Error: Ghostscript is not installed.")
 
36
  if not file.filename.endswith('.pdf'):
37
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
38
 
 
39
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
40
  input_path = tmp_in.name
41
 
 
43
  output_path = tmp_out.name
44
 
45
  try:
 
46
  content = await file.read()
47
  with open(input_path, 'wb') as f:
48
  f.write(content)
49
 
50
+ # === FIXED SETTINGS ===
51
+ # We removed the manual 'Resolution' flags because they conflict
52
+ # with PDFSETTINGS in Ghostscript v10+, causing the crash.
53
  settings = {
54
  "low": {
55
+ "pdf_settings": "/screen", # Forces 72 DPI
56
  "extra_flags": [
57
+ "-dDownsampleColorImages=true",
58
+ "-dDownsampleGrayImages=true",
59
+ "-dDownsampleMonoImages=true",
60
+ # We remove the explicit resolution numbers to avoid 'rangecheck' errors
 
 
 
61
  ]
62
  },
63
  "medium": {
64
+ "pdf_settings": "/ebook", # Forces 150 DPI
65
  "extra_flags": [
66
+ "-dDownsampleColorImages=true",
67
+ "-dDownsampleGrayImages=true",
68
+ "-dDownsampleMonoImages=true",
 
69
  ]
70
  },
71
  "high": {
72
+ "pdf_settings": "/printer", # Forces 300 DPI
73
  "extra_flags": []
74
  }
75
  }
76
 
77
  selected = settings.get(quality)
78
 
79
+ # Construct Command
 
 
80
  cmd = [
81
  "gs",
82
  "-dSAFER",
 
88
  f"-sOutputFile={output_path}"
89
  ]
90
 
91
+ # Add extra flags
92
  cmd.extend(selected["extra_flags"])
93
  cmd.append(input_path)
94
 
95
  logger.info(f"Running command: {' '.join(cmd)}")
96
 
97
+ # Run Ghostscript
 
98
  process = subprocess.run(
99
  cmd,
100
  stdout=subprocess.PIPE,
101
+ stderr=subprocess.STDOUT, # Merge output to catch errors
102
  text=True
103
  )
104
 
 
 
 
 
105
  if process.returncode != 0:
106
+ logger.error(f"Ghostscript Error Output:\n{process.stdout}")
107
+ raise Exception("Ghostscript processing failed. Check logs.")
 
108
 
109
+ # Check Output
110
  if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
111
+ raise Exception("Compression resulted in an empty file.")
112
 
113
  original_size = os.path.getsize(input_path)
114
  compressed_size = os.path.getsize(output_path)
115
 
116
+ # === FAILSAFE ===
117
+ # If the file got bigger, return the original file instead of the bad one.
118
  final_path = output_path
119
  final_size = compressed_size
120
  output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
121
 
122
  if compressed_size >= original_size:
123
+ logger.info(f"Compression ineffective ({compressed_size} > {original_size}). Returning original.")
124
  final_path = input_path
125
  final_size = original_size
126
  output_filename = f"{os.path.splitext(file.filename)[0]}_original.pdf"
127
+ background_tasks.add_task(cleanup_files, output_path) # Delete the useless compressed file
128
  else:
129
+ background_tasks.add_task(cleanup_files, input_path) # Delete input file
130
 
131
+ # Ensure cleanup runs eventually
132
+ background_tasks.add_task(cleanup_files, input_path, output_path)
 
 
 
 
 
133
 
134
  reduction = ((original_size - final_size) / original_size) * 100
135
 
 
145
  )
146
 
147
  except Exception as e:
148
+ logger.exception("Server Error")
149
  cleanup_files(input_path, output_path)
 
150
  raise HTTPException(status_code=500, detail=str(e))
151