Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,13 +4,7 @@ import requests
|
|
| 4 |
import io
|
| 5 |
import tempfile
|
| 6 |
import sys
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def compress_image(image, quality):
|
| 10 |
-
img_buffer = io.BytesIO()
|
| 11 |
-
image.save(img_buffer, format='JPEG', quality=quality)
|
| 12 |
-
img_buffer.seek(0)
|
| 13 |
-
return img_buffer
|
| 14 |
|
| 15 |
def compress_pdf(input_file, url, strength):
|
| 16 |
if input_file is None and (url is None or url.strip() == ""):
|
|
@@ -30,53 +24,64 @@ def compress_pdf(input_file, url, strength):
|
|
| 30 |
pdf_content = input_file
|
| 31 |
|
| 32 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
reader = PyPDF2.PdfReader(pdf_content)
|
| 34 |
writer = PyPDF2.PdfWriter()
|
| 35 |
|
| 36 |
if strength == "Low":
|
| 37 |
-
|
| 38 |
-
compression_level = 1
|
| 39 |
elif strength == "Medium":
|
| 40 |
-
|
| 41 |
-
compression_level = 2
|
| 42 |
else: # High
|
| 43 |
-
|
| 44 |
-
compression_level = 3
|
| 45 |
|
|
|
|
| 46 |
for page in reader.pages:
|
| 47 |
page.compress_content_streams() # Apply content stream compression
|
| 48 |
-
|
| 49 |
-
# Compress images on the page
|
| 50 |
-
for img_index, img in enumerate(page.images):
|
| 51 |
-
if img.image is not None:
|
| 52 |
-
try:
|
| 53 |
-
pil_image = Image.open(io.BytesIO(img.image))
|
| 54 |
-
compressed_image = compress_image(pil_image, image_quality)
|
| 55 |
-
page.replace_image(img_index, compressed_image)
|
| 56 |
-
except Exception as e:
|
| 57 |
-
print(f"Error compressing image: {e}")
|
| 58 |
-
|
| 59 |
writer.add_page(page)
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
| 62 |
writer.write(temp_file)
|
| 63 |
temp_file_path = temp_file.name
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
writer = PyPDF2.PdfWriter()
|
| 69 |
-
|
| 70 |
for page in reader.pages:
|
|
|
|
| 71 |
page.compress_content_streams()
|
| 72 |
writer.add_page(page)
|
| 73 |
-
|
| 74 |
-
writer.add_metadata(reader.metadata)
|
| 75 |
-
|
| 76 |
-
with open(temp_file_path, 'wb') as output_file:
|
| 77 |
-
writer.write(output_file)
|
| 78 |
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
except Exception as e:
|
| 81 |
return None, f"Error compressing PDF: {str(e)}"
|
| 82 |
|
|
|
|
| 4 |
import io
|
| 5 |
import tempfile
|
| 6 |
import sys
|
| 7 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
def compress_pdf(input_file, url, strength):
|
| 10 |
if input_file is None and (url is None or url.strip() == ""):
|
|
|
|
| 24 |
pdf_content = input_file
|
| 25 |
|
| 26 |
try:
|
| 27 |
+
# Get the initial file size
|
| 28 |
+
pdf_content.seek(0, os.SEEK_END)
|
| 29 |
+
initial_size = pdf_content.tell()
|
| 30 |
+
pdf_content.seek(0)
|
| 31 |
+
|
| 32 |
reader = PyPDF2.PdfReader(pdf_content)
|
| 33 |
writer = PyPDF2.PdfWriter()
|
| 34 |
|
| 35 |
if strength == "Low":
|
| 36 |
+
target_ratio = 0.75 # 25% compression
|
|
|
|
| 37 |
elif strength == "Medium":
|
| 38 |
+
target_ratio = 0.50 # 50% compression
|
|
|
|
| 39 |
else: # High
|
| 40 |
+
target_ratio = 0.25 # 75% compression
|
|
|
|
| 41 |
|
| 42 |
+
# First pass: apply basic compression
|
| 43 |
for page in reader.pages:
|
| 44 |
page.compress_content_streams() # Apply content stream compression
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
writer.add_page(page)
|
| 46 |
|
| 47 |
+
# Set compression parameters
|
| 48 |
+
writer.compress = True
|
| 49 |
+
writer._compress_streams = True
|
| 50 |
+
writer._compress_pages = True
|
| 51 |
+
|
| 52 |
+
# Write the compressed PDF to a temporary file
|
| 53 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
| 54 |
writer.write(temp_file)
|
| 55 |
temp_file_path = temp_file.name
|
| 56 |
|
| 57 |
+
# Check the compression ratio achieved
|
| 58 |
+
compressed_size = os.path.getsize(temp_file_path)
|
| 59 |
+
current_ratio = compressed_size / initial_size
|
| 60 |
+
|
| 61 |
+
# If we haven't reached the target ratio, apply additional compression
|
| 62 |
+
if current_ratio > target_ratio:
|
| 63 |
+
reader = PyPDF2.PdfReader(temp_file_path)
|
| 64 |
writer = PyPDF2.PdfWriter()
|
| 65 |
+
|
| 66 |
for page in reader.pages:
|
| 67 |
+
# Apply more aggressive compression
|
| 68 |
page.compress_content_streams()
|
| 69 |
writer.add_page(page)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
writer.compress = True
|
| 72 |
+
writer._compress_streams = True
|
| 73 |
+
writer._compress_pages = True
|
| 74 |
+
|
| 75 |
+
# Overwrite the temporary file with the more compressed version
|
| 76 |
+
with open(temp_file_path, 'wb') as temp_file:
|
| 77 |
+
writer.write(temp_file)
|
| 78 |
+
|
| 79 |
+
# Final compression ratio
|
| 80 |
+
final_size = os.path.getsize(temp_file_path)
|
| 81 |
+
final_ratio = final_size / initial_size
|
| 82 |
+
compression_percentage = (1 - final_ratio) * 100
|
| 83 |
+
|
| 84 |
+
return temp_file_path, f"PDF compressed successfully! Compression achieved: {compression_percentage:.2f}%"
|
| 85 |
except Exception as e:
|
| 86 |
return None, f"Error compressing PDF: {str(e)}"
|
| 87 |
|