um41r commited on
Commit
ae4f5c7
·
verified ·
1 Parent(s): 1864842

Update routers/pdf_compressor.py

Browse files
Files changed (1) hide show
  1. routers/pdf_compressor.py +70 -46
routers/pdf_compressor.py CHANGED
@@ -1,62 +1,91 @@
1
- from fastapi import APIRouter, File, UploadFile, HTTPException, Query
2
  from fastapi.responses import FileResponse
3
  import os
4
  import tempfile
5
- from PyPDF2 import PdfReader, PdfWriter
 
6
 
7
  router = APIRouter()
8
 
 
9
  TEMP_DIR = "/tmp/conversions"
 
 
 
 
 
 
 
 
 
 
10
 
11
  @router.post("/compress")
12
  async def compress_pdf(
 
13
  file: UploadFile = File(...),
14
  quality: str = Query("medium", enum=["low", "medium", "high"])
15
  ):
16
- """
17
- Compress PDF file
18
-
19
- Quality levels:
20
- - low: Maximum compression (smaller file, lower quality)
21
- - medium: Balanced compression
22
- - high: Minimum compression (larger file, higher quality)
23
- """
24
  if not file.filename.endswith('.pdf'):
25
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
 
 
 
 
 
26
 
27
- temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR)
28
- temp_compressed = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR)
29
-
30
  try:
31
- content = await file.read()
32
- temp_pdf.write(content)
33
- temp_pdf.close()
34
-
35
- reader = PdfReader(temp_pdf.name)
36
- writer = PdfWriter()
37
-
38
- # Add all pages
39
- for page in reader.pages:
40
- writer.add_page(page)
41
-
42
- # Apply compression based on quality
43
- for page in writer.pages:
44
- page.compress_content_streams()
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Write compressed PDF
47
- with open(temp_compressed.name, 'wb') as output_file:
48
- writer.write(output_file)
49
 
50
- original_name = os.path.splitext(file.filename)[0]
51
- output_filename = f"{original_name}_compressed.pdf"
 
 
 
 
52
 
53
- # Get file sizes for comparison
54
- original_size = os.path.getsize(temp_pdf.name)
55
- compressed_size = os.path.getsize(temp_compressed.name)
56
  reduction = ((original_size - compressed_size) / original_size) * 100
57
-
 
 
 
 
58
  return FileResponse(
59
- temp_compressed.name,
60
  media_type="application/pdf",
61
  filename=output_filename,
62
  headers={
@@ -65,13 +94,8 @@ async def compress_pdf(
65
  "X-Size-Reduction": f"{reduction:.2f}%"
66
  }
67
  )
68
-
69
  except Exception as e:
70
- raise HTTPException(status_code=500, detail=f"Compression failed: {str(e)}")
71
-
72
- finally:
73
- if os.path.exists(temp_pdf.name):
74
- try:
75
- os.unlink(temp_pdf.name)
76
- except:
77
- pass
 
1
+ from fastapi import APIRouter, File, UploadFile, HTTPException, Query, BackgroundTasks
2
  from fastapi.responses import FileResponse
3
  import os
4
  import tempfile
5
+ import subprocess
6
+ import shutil
7
 
8
  router = APIRouter()
9
 
10
+ # Hugging Face spaces allow writing to /tmp
11
  TEMP_DIR = "/tmp/conversions"
12
+ os.makedirs(TEMP_DIR, exist_ok=True)
13
+
14
+ def cleanup_files(*paths):
15
+ """Function to run in background to delete temp files"""
16
+ for path in paths:
17
+ try:
18
+ if os.path.exists(path):
19
+ os.remove(path)
20
+ except Exception:
21
+ pass
22
 
23
  @router.post("/compress")
24
  async def compress_pdf(
25
+ background_tasks: BackgroundTasks,
26
  file: UploadFile = File(...),
27
  quality: str = Query("medium", enum=["low", "medium", "high"])
28
  ):
 
 
 
 
 
 
 
 
29
  if not file.filename.endswith('.pdf'):
30
  raise HTTPException(status_code=400, detail="Only PDF files are allowed")
31
+
32
+ # Create temporary file paths
33
+ # We don't keep the file handle open, just reserve the name
34
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_in:
35
+ input_path = tmp_in.name
36
 
37
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf', dir=TEMP_DIR) as tmp_out:
38
+ output_path = tmp_out.name
39
+
40
  try:
41
+ # Save uploaded file
42
+ with open(input_path, 'wb') as f:
43
+ content = await file.read()
44
+ f.write(content)
45
+
46
+ # Ghostscript settings
47
+ # /screen = 72 dpi (Low Quality / High Compression)
48
+ # /ebook = 150 dpi (Medium Quality)
49
+ # /printer = 300 dpi (High Quality / Low Compression)
50
+ quality_map = {
51
+ "low": "/screen",
52
+ "medium": "/ebook",
53
+ "high": "/printer"
54
+ }
55
+ gs_setting = quality_map.get(quality, "/ebook")
56
+
57
+ # Run Ghostscript via subprocess
58
+ cmd = [
59
+ "gs",
60
+ "-sDEVICE=pdfwrite",
61
+ "-dCompatibilityLevel=1.4",
62
+ f"-dPDFSETTINGS={gs_setting}",
63
+ "-dNOPAUSE", "-dQUIET", "-dBATCH",
64
+ f"-sOutputFile={output_path}",
65
+ input_path
66
+ ]
67
 
68
+ # Execute command
69
+ process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 
70
 
71
+ if process.returncode != 0:
72
+ raise Exception(f"Ghostscript error: {process.stderr.decode()}")
73
+
74
+ # Calculate stats
75
+ original_size = os.path.getsize(input_path)
76
+ compressed_size = os.path.getsize(output_path)
77
 
78
+ if compressed_size == 0:
79
+ raise Exception("Compression resulted in empty file")
80
+
81
  reduction = ((original_size - compressed_size) / original_size) * 100
82
+ output_filename = f"{os.path.splitext(file.filename)[0]}_compressed.pdf"
83
+
84
+ # Add cleanup task to run AFTER response is sent
85
+ background_tasks.add_task(cleanup_files, input_path, output_path)
86
+
87
  return FileResponse(
88
+ output_path,
89
  media_type="application/pdf",
90
  filename=output_filename,
91
  headers={
 
94
  "X-Size-Reduction": f"{reduction:.2f}%"
95
  }
96
  )
97
+
98
  except Exception as e:
99
+ # Clean up immediately if there was an error
100
+ cleanup_files(input_path, output_path)
101
+ raise HTTPException(status_code=500, detail=f"Error: {str(e)}")