thejagstudio commited on
Commit
d7bd270
·
verified ·
1 Parent(s): a49827e

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ static/output/dest_highlighted.pdf filter=lfs diff=lfs merge=lfs -text
37
+ static/output/source_highlighted.pdf filter=lfs diff=lfs merge=lfs -text
38
+ static/uploads/2024_12_17_venubabu.bathula_F5MS214215_AM8535_F25.pdf filter=lfs diff=lfs merge=lfs -text
39
+ static/uploads/2025_4_15_venubabu.bathula_F5MS214215_AM8535_F25.pdf filter=lfs diff=lfs merge=lfs -text
40
+ static/uploads/2025_6_16_bzakaria_F8MS180510_AM0449_S26-1.pdf filter=lfs diff=lfs merge=lfs -text
41
+ static/uploads/2025_9_16_venubabu.bathula_F8MS180510_AM0449_S26.pdf filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Flask, render_template, request, jsonify, send_from_directory
3
+ from flask_cors import CORS
4
+ from werkzeug.utils import secure_filename
5
+ from pdf_logic import process_comparison
6
+
7
+ app = Flask(__name__)
8
+ CORS(app)
9
+
10
+ # Configuration
11
+ UPLOAD_FOLDER = 'static/uploads'
12
+ OUTPUT_FOLDER = 'static/output'
13
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
14
+ os.makedirs(OUTPUT_FOLDER, exist_ok=True)
15
+
16
+
17
+ @app.route('/compare', methods=['POST'])
18
+ def compare():
19
+ if 'source' not in request.files or 'destination' not in request.files:
20
+ return jsonify({'error': 'Missing files'}), 400
21
+
22
+ source_file = request.files['source']
23
+ dest_file = request.files['destination']
24
+
25
+ s_path = os.path.join(UPLOAD_FOLDER, secure_filename(source_file.filename))
26
+ d_path = os.path.join(UPLOAD_FOLDER, secure_filename(dest_file.filename))
27
+
28
+ source_file.save(s_path)
29
+ dest_file.save(d_path)
30
+
31
+ try:
32
+ # Run the comparison logic
33
+ src_out, dest_out, changes, total_pages = process_comparison(s_path, d_path, OUTPUT_FOLDER)
34
+
35
+ return jsonify({
36
+ 'status': 'success',
37
+ 'source_pdf': f"/static/output/{src_out}",
38
+ 'dest_pdf': f"/static/output/{dest_out}",
39
+ 'changes': changes,
40
+ 'total_pages': total_pages
41
+ })
42
+ except Exception as e:
43
+ print(e)
44
+ return jsonify({'error': str(e)}), 500
45
+
46
+ if __name__ == '__main__':
47
+ from waitress import serve
48
+
49
+ serve(app, host="0.0.0.0", port=7860)
pdf_logic.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PDF Comparison Logic with Robust Annotation Handling
3
+
4
+ Key insight: PDFMiner extracts text in a "normalized" coordinate system where it applies
5
+ the page's CTM (Current Transformation Matrix) internally. However, annotations in PDF
6
+ need to be placed in the page's raw coordinate system (MediaBox-based).
7
+
8
+ This rewrite:
9
+ 1. Uses pypdf to get the actual page MediaBox and rotation
10
+ 2. Transforms PDFMiner coordinates back to raw page coordinates for annotations
11
+ 3. Properly handles all rotation cases (0, 90, 180, 270)
12
+ """
13
+
14
+ import difflib
15
+ from pdfminer.high_level import extract_pages
16
+ from pdfminer.layout import LTTextContainer, LTChar, LTAnno
17
+ from pypdf import PdfReader, PdfWriter
18
+ from pypdf.generic import (
19
+ DictionaryObject, NumberObject, FloatObject,
20
+ NameObject, ArrayObject
21
+ )
22
+
23
+
24
+ class CharComponent:
25
+ """Stores character data with position information."""
26
+
27
+ def __init__(self, character, font, color, bbox, page_num,
28
+ pdfminer_page_width, pdfminer_page_height):
29
+ self.character = character
30
+ self.font = font
31
+ self.color = color
32
+ # Store the raw bbox from PDFMiner (x0, y0, x1, y1)
33
+ # x0, y0 = bottom-left; x1, y1 = top-right in PDFMiner's coordinate system
34
+ self.bbox = bbox # (x0, y0, x1, y1)
35
+ self.page_num = page_num
36
+ # PDFMiner's reported page dimensions (may be rotated)
37
+ self.pdfminer_width = pdfminer_page_width
38
+ self.pdfminer_height = pdfminer_page_height
39
+
40
+ @property
41
+ def has_position(self):
42
+ """Check if this character has valid position data."""
43
+ return self.bbox[0] != 0 or self.bbox[1] != 0 or self.bbox[2] != 0 or self.bbox[3] != 0
44
+
45
+
46
+ def get_page_info(page):
47
+ """
48
+ Extract page geometry information from a pypdf page.
49
+ Returns: (width, height, rotation, offset_x, offset_y) in raw MediaBox/CropBox coordinates
50
+ """
51
+ # Prefer CropBox as it defines the visible area which PDFMiner analyzes
52
+ box = page.cropbox if page.cropbox else page.mediabox
53
+
54
+ raw_width = float(box.width)
55
+ raw_height = float(box.height)
56
+
57
+ # Get offsets (usually 0,0 but can be non-zero for cropped pages)
58
+ offset_x = float(box.lower_left[0])
59
+ offset_y = float(box.lower_left[1])
60
+
61
+ # Get rotation value (0, 90, 180, or 270)
62
+ rotation = int(page.get('/Rotate', 0)) % 360
63
+
64
+ return raw_width, raw_height, rotation, offset_x, offset_y
65
+
66
+
67
+ def transform_pdfminer_to_annotation_coords(bbox, pdfminer_width, pdfminer_height,
68
+ raw_page_width, raw_page_height, rotation,
69
+ offset_x=0, offset_y=0):
70
+ """
71
+ Transform coordinates from PDFMiner's coordinate system to the raw page coordinate
72
+ system used by PDF annotations, accounting for rotation and CropBox offsets.
73
+ """
74
+ x0, y0, x1, y1 = bbox
75
+
76
+ # First, handle rotation transformation (PDFMiner -> Raw Page aligned at 0,0)
77
+ if rotation == 0:
78
+ tx0, ty0, tx1, ty1 = x0, y0, x1, y1
79
+ elif rotation == 90:
80
+ # 90° CW Rotation
81
+ # Visual x (0..H) maps to Raw y (0..H)
82
+ # Visual y (0..W) maps to Raw x (W..0)
83
+ # Raw X = Raw Width - Visual Y
84
+ # Raw Y = Visual X
85
+ tx0, ty0, tx1, ty1 = raw_page_width - y1, x0, raw_page_width - y0, x1
86
+
87
+ elif rotation == 180:
88
+ # 180° Rotation
89
+ tx0, ty0, tx1, ty1 = raw_page_width - x1, raw_page_height - y1, raw_page_width - x0, raw_page_height - y0
90
+
91
+ elif rotation == 270:
92
+ # 270° CW Rotation
93
+ # Visual x (0..H) maps to Raw y (H..0)
94
+ # Visual y (0..W) maps to Raw x (0..W)
95
+ # Raw X = Visual Y
96
+ # Raw Y = Raw Height - Visual X
97
+ tx0, ty0, tx1, ty1 = y0, raw_page_height - x1, y1, raw_page_height - x0
98
+ else:
99
+ tx0, ty0, tx1, ty1 = x0, y0, x1, y1
100
+
101
+ # Ensure coordinates are ordered min/max after rotation transform
102
+ rx0, ry0 = min(tx0, tx1), min(ty0, ty1)
103
+ rx1, ry1 = max(tx0, tx1), max(ty0, ty1)
104
+
105
+ # Finally, apply the CropBox/MediaBox offset of the raw page
106
+ # Annotations are placed relative to the PDF's global coordinate system origin
107
+ final_x0 = rx0 + offset_x
108
+ final_y0 = ry0 + offset_y
109
+ final_x1 = rx1 + offset_x
110
+ final_y1 = ry1 + offset_y
111
+
112
+ return (final_x0, final_y0, final_x1, final_y1)
113
+
114
+
115
+ def create_highlight_annotation(x0, y0, x1, y1, color):
116
+ """
117
+ Create a highlight annotation with the given coordinates and color.
118
+
119
+ Args:
120
+ x0, y0: Bottom-left corner
121
+ x1, y1: Top-right corner
122
+ color: RGB color as [r, g, b] with values 0-1
123
+
124
+ Returns:
125
+ DictionaryObject representing the highlight annotation
126
+ """
127
+ # Add small padding for visibility
128
+ padding = 1.0
129
+ x0 -= padding
130
+ y0 -= padding
131
+ x1 += padding
132
+ y1 += padding
133
+
134
+ # Ensure coordinates are properly ordered
135
+ rect_x0 = min(x0, x1)
136
+ rect_y0 = min(y0, y1)
137
+ rect_x1 = max(x0, x1)
138
+ rect_y1 = max(y0, y1)
139
+
140
+ annotation = DictionaryObject()
141
+ annotation.update({
142
+ NameObject("/Type"): NameObject("/Annot"),
143
+ NameObject("/Subtype"): NameObject("/Highlight"),
144
+ NameObject("/F"): NumberObject(4), # Print flag
145
+ NameObject("/C"): ArrayObject([FloatObject(c) for c in color]),
146
+ NameObject("/CA"): FloatObject(0.4), # 40% opacity
147
+ NameObject("/Rect"): ArrayObject([
148
+ FloatObject(rect_x0), FloatObject(rect_y0),
149
+ FloatObject(rect_x1), FloatObject(rect_y1)
150
+ ]),
151
+ # QuadPoints: defines the quadrilateral for the highlight
152
+ # Order: x1,y1 (top-left), x2,y2 (top-right), x3,y3 (bottom-left), x4,y4 (bottom-right)
153
+ NameObject("/QuadPoints"): ArrayObject([
154
+ FloatObject(rect_x0), FloatObject(rect_y1), # top-left
155
+ FloatObject(rect_x1), FloatObject(rect_y1), # top-right
156
+ FloatObject(rect_x0), FloatObject(rect_y0), # bottom-left
157
+ FloatObject(rect_x1), FloatObject(rect_y0) # bottom-right
158
+ ]),
159
+ })
160
+
161
+ return annotation
162
+
163
+
164
+ def extract_pdf_data(pdf_path, pdf_reader):
165
+ """
166
+ Extract text and character position data from PDF.
167
+
168
+ Args:
169
+ pdf_path: Path to the PDF file
170
+ pdf_reader: PdfReader instance for the same file (to get raw page info)
171
+
172
+ Returns:
173
+ (char_list, full_text, page_info_map): List of CharComponents, full text, and page info
174
+ """
175
+ char_list = []
176
+ full_text = ""
177
+
178
+ # Pre-compute page info from pypdf for coordinate transformation
179
+ page_info_cache = {}
180
+ for i, page in enumerate(pdf_reader.pages):
181
+ page_info_cache[i] = get_page_info(page)
182
+
183
+ for page_layout in extract_pages(pdf_path):
184
+ page_num = page_layout.pageid - 1
185
+ pdfminer_height = page_layout.height
186
+ pdfminer_width = page_layout.width
187
+
188
+ for element in page_layout:
189
+ if isinstance(element, LTTextContainer):
190
+ for text_line in element:
191
+ # Handle different element types
192
+ if isinstance(text_line, LTChar):
193
+ char_list.append(CharComponent(
194
+ text_line.get_text(),
195
+ text_line.fontname,
196
+ str(text_line.graphicstate.ncolor),
197
+ text_line.bbox,
198
+ page_num,
199
+ pdfminer_width,
200
+ pdfminer_height
201
+ ))
202
+ full_text += text_line.get_text()
203
+
204
+ elif isinstance(text_line, LTAnno):
205
+ full_text += text_line.get_text()
206
+ char_list.append(CharComponent(
207
+ text_line.get_text(), "Anno", "0",
208
+ (0, 0, 0, 0), page_num, pdfminer_width, pdfminer_height
209
+ ))
210
+
211
+ else:
212
+ # text_line is a container (LTTextLine), iterate children
213
+ for char in text_line:
214
+ if isinstance(char, LTChar):
215
+ char_list.append(CharComponent(
216
+ char.get_text(),
217
+ char.fontname,
218
+ str(char.graphicstate.ncolor),
219
+ char.bbox,
220
+ page_num,
221
+ pdfminer_width,
222
+ pdfminer_height
223
+ ))
224
+ full_text += char.get_text()
225
+
226
+ elif isinstance(char, LTAnno):
227
+ full_text += char.get_text()
228
+ char_list.append(CharComponent(
229
+ char.get_text(), "Anno", "0",
230
+ (0, 0, 0, 0), page_num, pdfminer_width, pdfminer_height
231
+ ))
232
+
233
+ return char_list, full_text, page_info_cache
234
+
235
+
236
+ def merge_char_components(chars):
237
+ """
238
+ Merge adjacent character components into bounding boxes to reduce annotation count.
239
+ Returns list of (x0, y0, x1, y1) tuples.
240
+ """
241
+ if not chars:
242
+ return []
243
+
244
+ # Trust the input order (reading order from PDFMiner/diff)
245
+
246
+ merged_bboxes = []
247
+
248
+ if not chars:
249
+ return []
250
+
251
+ curr_bbox = list(chars[0].bbox)
252
+
253
+ for i in range(1, len(chars)):
254
+ next_char = chars[i]
255
+ next_bbox = next_char.bbox
256
+
257
+ # Check vertical alignment (same line)
258
+ # Calculate overlap
259
+ y_overlap = min(curr_bbox[3], next_bbox[3]) - max(curr_bbox[1], next_bbox[1])
260
+ h1 = curr_bbox[3] - curr_bbox[1]
261
+ h2 = next_bbox[3] - next_bbox[1]
262
+ min_h = min(h1, h2) if min(h1, h2) > 0 else 1
263
+
264
+ is_same_line = (y_overlap / min_h) > 0.4 # 40% vertical overlap
265
+
266
+ # Check horizontal proximity (gap check)
267
+ # Allow negative gap (slight overlap) up to distinct separation
268
+ x_gap = next_bbox[0] - curr_bbox[2]
269
+
270
+ # Threshold: 20% of height or fixed small value?
271
+ # A space character is usually width of a char.
272
+ # If the gap is massive, it's a separate block.
273
+ # Let's say gap < height comes from a space.
274
+ is_close = x_gap < (min_h * 2.0)
275
+
276
+ if is_same_line and is_close:
277
+ # Merge
278
+ curr_bbox[0] = min(curr_bbox[0], next_bbox[0])
279
+ curr_bbox[1] = min(curr_bbox[1], next_bbox[1])
280
+ curr_bbox[2] = max(curr_bbox[2], next_bbox[2])
281
+ curr_bbox[3] = max(curr_bbox[3], next_bbox[3])
282
+ else:
283
+ merged_bboxes.append(tuple(curr_bbox))
284
+ curr_bbox = list(next_bbox)
285
+
286
+ merged_bboxes.append(tuple(curr_bbox))
287
+ return merged_bboxes
288
+
289
+
290
+ def add_highlight_rect(page, bbox, color, page_info, pdfminer_w, pdfminer_h):
291
+ """
292
+ Add a highlight annotation for a specific bounding box.
293
+ """
294
+ raw_width, raw_height, rotation, offset_x, offset_y = page_info
295
+
296
+ # Transform PDFMiner coordinates to raw page coordinates
297
+ transformed_bbox = transform_pdfminer_to_annotation_coords(
298
+ bbox,
299
+ pdfminer_w,
300
+ pdfminer_h,
301
+ raw_width,
302
+ raw_height,
303
+ rotation,
304
+ offset_x,
305
+ offset_y
306
+ )
307
+
308
+ # Create the highlight annotation
309
+ annotation = create_highlight_annotation(
310
+ transformed_bbox[0], transformed_bbox[1],
311
+ transformed_bbox[2], transformed_bbox[3],
312
+ color
313
+ )
314
+
315
+ # Add annotation to page
316
+ if "/Annots" in page:
317
+ page["/Annots"].append(annotation)
318
+ else:
319
+ page[NameObject("/Annots")] = ArrayObject([annotation])
320
+
321
+
322
+ def process_comparison(source_path, dest_path, output_dir):
323
+ """
324
+ Compare two PDFs and generate highlighted versions showing differences.
325
+
326
+ Args:
327
+ source_path: Path to source PDF
328
+ dest_path: Path to destination PDF
329
+ output_dir: Directory to save output files
330
+
331
+ Returns:
332
+ (src_filename, dest_filename, changes_list, total_pages)
333
+ """
334
+ # Create readers first (needed for page info during extraction)
335
+ src_reader = PdfReader(source_path)
336
+ dest_reader = PdfReader(dest_path)
337
+
338
+ # Extract text data with position info
339
+ src_chars, src_text, src_page_info = extract_pdf_data(source_path, src_reader)
340
+ dest_chars, dest_text, dest_page_info = extract_pdf_data(dest_path, dest_reader)
341
+
342
+ # Compare text using SequenceMatcher
343
+ matcher = difflib.SequenceMatcher(None, src_text, dest_text)
344
+
345
+ # Create writers and clone pages
346
+ src_writer = PdfWriter()
347
+ dest_writer = PdfWriter()
348
+
349
+ for page in src_reader.pages:
350
+ src_writer.add_page(page)
351
+ for page in dest_reader.pages:
352
+ dest_writer.add_page(page)
353
+
354
+ total_pages = max(len(src_reader.pages), len(dest_reader.pages))
355
+
356
+ # Color scheme
357
+ COLORS = {
358
+ 'delete': [1.0, 0.7, 0.7], # Light red for deletions
359
+ 'insert': [0.7, 1.0, 0.7], # Light green for insertions
360
+ 'replace_src': [1.0, 0.85, 0.5], # Orange/yellow for replaced (source)
361
+ 'replace_dest': [0.5, 0.75, 1.0] # Blue for replaced (destination)
362
+ }
363
+
364
+ changes_list = []
365
+ change_id = 1
366
+
367
+ # Process differences
368
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
369
+ if tag == 'equal':
370
+ continue
371
+
372
+ change_data = {
373
+ "id": change_id,
374
+ "type": tag.upper(),
375
+ "src_text": src_text[i1:i2].replace('\n', ' ').strip()[:100],
376
+ "dest_text": dest_text[j1:j2].replace('\n', ' ').strip()[:100],
377
+ "page": None
378
+ }
379
+
380
+ # Determine colors
381
+ if tag == 'replace':
382
+ color_src = COLORS['replace_src']
383
+ color_dest = COLORS['replace_dest']
384
+ elif tag == 'delete':
385
+ color_src = COLORS['delete']
386
+ color_dest = None
387
+ else: # insert
388
+ color_src = None
389
+ color_dest = COLORS['insert']
390
+
391
+ # Highlight source document (deletions and replacements)
392
+ if tag in ['replace', 'delete']:
393
+ src_chars_to_hl = [src_chars[k] for k in range(i1, i2) if k < len(src_chars) and src_chars[k].has_position]
394
+ by_page = {}
395
+ for c in src_chars_to_hl:
396
+ by_page.setdefault(c.page_num, []).append(c)
397
+
398
+ for p_num, p_chars in by_page.items():
399
+ if p_num >= len(src_writer.pages): continue
400
+ if change_data['page'] is None: change_data['page'] = p_num + 1
401
+
402
+ page = src_writer.pages[p_num]
403
+ page_info = src_page_info.get(p_num, (612, 792, 0, 0, 0))
404
+
405
+ for bbox in merge_char_components(p_chars):
406
+ add_highlight_rect(page, bbox, color_src, page_info, p_chars[0].pdfminer_width, p_chars[0].pdfminer_height)
407
+
408
+ # Highlight destination document (insertions and replacements)
409
+ if tag in ['replace', 'insert']:
410
+ dest_chars_to_hl = [dest_chars[k] for k in range(j1, j2) if k < len(dest_chars) and dest_chars[k].has_position]
411
+ by_page = {}
412
+ for c in dest_chars_to_hl:
413
+ by_page.setdefault(c.page_num, []).append(c)
414
+
415
+ for p_num, p_chars in by_page.items():
416
+ if p_num >= len(dest_writer.pages): continue
417
+ if change_data['page'] is None: change_data['page'] = p_num + 1
418
+
419
+ page = dest_writer.pages[p_num]
420
+ page_info = dest_page_info.get(p_num, (612, 792, 0, 0, 0))
421
+
422
+ for bbox in merge_char_components(p_chars):
423
+ add_highlight_rect(page, bbox, color_dest, page_info, p_chars[0].pdfminer_width, p_chars[0].pdfminer_height)
424
+
425
+ # Add change to list if we found a valid page
426
+ if change_data['page'] is not None:
427
+ changes_list.append(change_data)
428
+ change_id += 1
429
+
430
+ # Save output files
431
+ src_out_name = "source_highlighted.pdf"
432
+ dest_out_name = "dest_highlighted.pdf"
433
+
434
+ with open(f"{output_dir}/{src_out_name}", "wb") as f:
435
+ src_writer.write(f)
436
+
437
+ with open(f"{output_dir}/{dest_out_name}", "wb") as f:
438
+ dest_writer.write(f)
439
+
440
+ return src_out_name, dest_out_name, changes_list, total_pages
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask
2
+ pdfminer.six
3
+ pypdf
4
+ Flask-Cors
5
+ waitress
6
+ uvicorn
7
+ requests
static/output/dest_highlighted.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a4dfc6c3d9613848d2e59ba808ba0647a56158d0e1c3525fda8bc316671348
3
+ size 3750966
static/output/source_highlighted.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4244590d35f6fc13d1ac6cb1f4f42e056768d57524830a62f434a29637bc366
3
+ size 3768755
static/uploads/2024_12_17_venubabu.bathula_F5MS214215_AM8535_F25.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4febe1f6f29281441ceca87133de6594b74bed65364e75a173156b9a91f9ab52
3
+ size 3484226
static/uploads/2025_4_15_venubabu.bathula_F5MS214215_AM8535_F25.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa0da0c9e9d0f0005b44e1177fc25726c92a3fa0fcbc67d955df2726e95af8a
3
+ size 3485792
static/uploads/2025_6_16_bzakaria_F8MS180510_AM0449_S26-1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d089dfcbb6f1bcfeb12bee79afcacc8b42cebe1cc83bac697aff7f554627bc8
3
+ size 2950920
static/uploads/2025_9_16_venubabu.bathula_F8MS180510_AM0449_S26.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d686b18e56e09b3d2db876165338eee5ac5c13e40abada3b02a39cd3794f0b
3
+ size 2936649