import fitz # PyMuPDF from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter import io def extract_and_recreate_pdf(input_pdf, output_pdf): # Open the input PDF doc = fitz.open(input_pdf) # List to store extracted images extracted_images = [] output_streams = [] # Extract text and images from each page for page_num in range(len(doc)): page = doc[page_num] text = page.get_text("text") # Extract text images = page.get_images(full=True) # Extract images for img_index, img in enumerate(images): xref = img[0] base_image = doc.extract_image(xref) image_bytes = base_image["image"] extracted_images.append(io.BytesIO(image_bytes)) # Store text and image streams for later output_streams.append((text, extracted_images)) # Generate the new PDF c = canvas.Canvas(output_pdf, pagesize=letter) width, height = letter # Add extracted content to the new PDF for page_num, (text, images) in enumerate(output_streams): c.drawString(50, height - 50, f"Page {page_num + 1}") # Page number c.drawString(50, height - 100, text) # Add text # Add images for idx, img_stream in enumerate(images): img_stream.seek(0) img = fitz.Pixmap(img_stream) if img.n >= 4: # Convert CMYK to RGB img = fitz.Pixmap(fitz.csRGB, img) img.save(f"temp_image_{idx}.png") # Save as temp PNG c.drawImage(f"temp_image_{idx}.png", 50, height - 300 - (idx * 200), width=200, height=200) c.showPage() # Add new page c.save() # Specify input and output file paths input_pdf_path = "input.pdf" # Replace with your uploaded PDF file output_pdf_path = "output_editable.pdf" extract_and_recreate_pdf(input_pdf_path, output_pdf_path) print("Editable PDF created successfully.")