SathvikGanta commited on
Commit
979e3c2
·
verified ·
1 Parent(s): 8db3ca1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -44
app.py CHANGED
@@ -51,7 +51,7 @@ def compare_images(img1, img2):
51
 
52
  return cleaned
53
 
54
- # Compare text and generate differences
55
  def generate_text_differences(orig_text, edit_text, start_position):
56
  diff = difflib.ndiff(orig_text.splitlines(), edit_text.splitlines())
57
  changes = []
@@ -64,11 +64,11 @@ def generate_text_differences(orig_text, edit_text, start_position):
64
  position_number += 1
65
  return changes, position_number
66
 
67
- # Highlight changes and generate visual summary
68
  def highlight_visual_changes(orig_img, edit_img, mask, start_position):
69
  overlay = edit_img.copy()
70
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
71
- visual_changes = [] # To store visual changes
72
  font = cv2.FONT_HERSHEY_SIMPLEX
73
  font_scale = 0.8
74
  thickness = 2
@@ -84,13 +84,8 @@ def highlight_visual_changes(orig_img, edit_img, mask, start_position):
84
 
85
  return overlay, visual_changes, position_counter
86
 
87
- # Sanitize text for PDF compatibility
88
- def sanitize_text(text):
89
- """Sanitize text for FPDF by replacing unsupported characters."""
90
- return text.encode('latin-1', errors='replace').decode('latin-1')
91
-
92
- # Generate comparison PDF with visual and text-based summaries
93
- def generate_comparison_pdf(original_pdf, edited_pdf):
94
  original_images = convert_pdf_to_images(original_pdf)
95
  edited_images = convert_pdf_to_images(edited_pdf)
96
  combined_images = []
@@ -116,48 +111,46 @@ def generate_comparison_pdf(original_pdf, edited_pdf):
116
  highlighted_img_resized = highlighted_img[:height]
117
  combined_images.append(np.hstack((orig_img_resized, highlighted_img_resized)))
118
 
119
- output_path = "outputs/comparison_result.pdf"
120
- pdf = FPDF()
121
-
122
- # Add each comparison image to the PDF
123
  for img in combined_images:
124
- temp_path = "temp_image.png"
125
  cv2.imwrite(temp_path, img)
126
- pdf.add_page()
127
- pdf.image(temp_path, x=10, y=10, w=190)
128
  os.remove(temp_path)
129
-
130
- # Add Visual Changes section
131
- pdf.add_page()
132
- pdf.set_font("Arial", size=12)
133
- pdf.cell(0, 10, sanitize_text("Visual Changes"), ln=True, align="C")
134
- pdf.ln(10) # Add a line break
135
  for _, change in visual_changes:
136
- pdf.cell(0, 10, sanitize_text(change), ln=True)
 
137
 
138
- # Add Text Changes section
139
- pdf.add_page()
140
- pdf.cell(0, 10, sanitize_text("Text Changes"), ln=True, align="C")
141
- pdf.ln(10) # Add a line break
 
 
 
 
 
 
 
 
 
142
  for _, change in text_changes:
143
- pdf.cell(0, 10, sanitize_text(change), ln=True)
 
144
 
145
- pdf.output(output_path)
146
- return output_path
147
 
148
  # Gradio interface function
149
  def pdf_comparison(original_pdf, edited_pdf):
150
- # Get the file size in bytes
151
- original_file_size = os.path.getsize(original_pdf.name)
152
- edited_file_size = os.path.getsize(edited_pdf.name)
153
-
154
- # Check if either file exceeds 50 MB (50 * 1024 * 1024 bytes)
155
- if original_file_size > 50 * 1024 * 1024 or edited_file_size > 50 * 1024 * 1024:
156
- return "Error: File size exceeds 50 MB. Please upload smaller files."
157
-
158
- # Proceed with PDF comparison
159
- result_path = generate_comparison_pdf(original_pdf.name, edited_pdf.name)
160
- return result_path
161
 
162
  # Gradio interface
163
  interface = gr.Interface(
@@ -166,9 +159,12 @@ interface = gr.Interface(
166
  gr.File(label="Upload Original PDF", file_types=[".pdf"]),
167
  gr.File(label="Upload Edited PDF", file_types=[".pdf"])
168
  ],
169
- outputs=gr.File(label="Download Comparison Report"),
 
 
 
170
  title="PDF Comparison Tool with Separate Reports",
171
- description="Upload two PDFs: the original and the edited version. The tool highlights changes and provides separate summaries for visual and text changes."
172
  )
173
 
174
  if __name__ == "__main__":
 
51
 
52
  return cleaned
53
 
54
+ # Generate text-based differences
55
  def generate_text_differences(orig_text, edit_text, start_position):
56
  diff = difflib.ndiff(orig_text.splitlines(), edit_text.splitlines())
57
  changes = []
 
64
  position_number += 1
65
  return changes, position_number
66
 
67
+ # Highlight visual changes
68
  def highlight_visual_changes(orig_img, edit_img, mask, start_position):
69
  overlay = edit_img.copy()
70
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
71
+ visual_changes = []
72
  font = cv2.FONT_HERSHEY_SIMPLEX
73
  font_scale = 0.8
74
  thickness = 2
 
84
 
85
  return overlay, visual_changes, position_counter
86
 
87
+ # Generate separate PDFs for visual and text changes
88
+ def generate_separate_pdfs(original_pdf, edited_pdf):
 
 
 
 
 
89
  original_images = convert_pdf_to_images(original_pdf)
90
  edited_images = convert_pdf_to_images(edited_pdf)
91
  combined_images = []
 
111
  highlighted_img_resized = highlighted_img[:height]
112
  combined_images.append(np.hstack((orig_img_resized, highlighted_img_resized)))
113
 
114
+ # Generate Visual Changes PDF
115
+ visual_pdf_path = "outputs/visual_changes.pdf"
116
+ pdf_visual = FPDF()
 
117
  for img in combined_images:
118
+ temp_path = "temp_image_visual.png"
119
  cv2.imwrite(temp_path, img)
120
+ pdf_visual.add_page()
121
+ pdf_visual.image(temp_path, x=10, y=10, w=190)
122
  os.remove(temp_path)
123
+ pdf_visual.add_page()
124
+ pdf_visual.set_font("Arial", size=12)
125
+ pdf_visual.cell(0, 10, "Visual Changes", ln=True, align="C")
126
+ pdf_visual.ln(10)
 
 
127
  for _, change in visual_changes:
128
+ pdf_visual.cell(0, 10, change, ln=True)
129
+ pdf_visual.output(visual_pdf_path)
130
 
131
+ # Generate Text Changes PDF
132
+ text_pdf_path = "outputs/text_changes.pdf"
133
+ pdf_text = FPDF()
134
+ for img in combined_images:
135
+ temp_path = "temp_image_text.png"
136
+ cv2.imwrite(temp_path, img)
137
+ pdf_text.add_page()
138
+ pdf_text.image(temp_path, x=10, y=10, w=190)
139
+ os.remove(temp_path)
140
+ pdf_text.add_page()
141
+ pdf_text.set_font("Arial", size=12)
142
+ pdf_text.cell(0, 10, "Text Changes", ln=True, align="C")
143
+ pdf_text.ln(10)
144
  for _, change in text_changes:
145
+ pdf_text.cell(0, 10, change, ln=True)
146
+ pdf_text.output(text_pdf_path)
147
 
148
+ return visual_pdf_path, text_pdf_path
 
149
 
150
  # Gradio interface function
151
  def pdf_comparison(original_pdf, edited_pdf):
152
+ visual_path, text_path = generate_separate_pdfs(original_pdf.name, edited_pdf.name)
153
+ return visual_path, text_path
 
 
 
 
 
 
 
 
 
154
 
155
  # Gradio interface
156
  interface = gr.Interface(
 
159
  gr.File(label="Upload Original PDF", file_types=[".pdf"]),
160
  gr.File(label="Upload Edited PDF", file_types=[".pdf"])
161
  ],
162
+ outputs=[
163
+ gr.File(label="Download Visual Changes Report"),
164
+ gr.File(label="Download Text Changes Report")
165
+ ],
166
  title="PDF Comparison Tool with Separate Reports",
167
+ description="Upload two PDFs: the original and the edited version. The tool generates two separate reports: one for visual changes and another for text changes."
168
  )
169
 
170
  if __name__ == "__main__":