Marthee commited on
Commit
4a8fab3
·
verified ·
1 Parent(s): db0afce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -38
app.py CHANGED
@@ -42,52 +42,95 @@ def home():
42
  else:
43
  # Redirect to your PDF viewer route when ready
44
  return redirect(url_for("view_pdf", **request.args))
45
- ################################################################################################################################################################
46
- ################################################################################################################################################################
47
- ##################### Main console ###########################################################################################################
48
- ################################################################################################################################################################
49
- ################################################################################################################################################################
50
-
51
  @app.route('/view-pdf', methods=['GET'])
52
- def download_pdf():
53
- # Parse and decode pdfLink safely
54
- # full_query_string = request.query_string.decode()
55
- # parsed_params = urllib.parse.parse_qs(full_query_string)
56
- # # print(parsed_params)
57
- # encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
58
- # # Get PDF link and keyword from finddata()
59
- # # pdf_link = finddata()
60
  encoded_pdf_link = request.args.get("pdfLink")
61
- if encoded_pdf_link:
62
- pdf_link = urllib.parse.unquote(encoded_pdf_link)
 
63
  if not encoded_pdf_link:
64
  return "Missing pdfLink parameter.", 400
65
 
66
- # Decode the URL-encoded PDF link
67
- # pdf_link = urllib.parse.unquote(encoded_pdf_link)
68
- print("Extracted PDF Link:", pdf_link)
69
- # pdf_link = request.get_json()
70
- # if not pdf_link:
71
- # return "Missing pdfLink in request body.", 400
72
-
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
- try:
75
- # Use InitialMarkups to extract content
76
- pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
77
- except Exception as e:
78
- print("Error during PDF extraction:", e)
79
- return "PDF could not be processed.", 500
80
 
81
- if pdf_content is None or not pdf_content.startswith(b"%PDF"):
82
- return "PDF content not found or broken.", 404
83
 
84
- pdf_bytes = BytesIO(pdf_content)
85
- return send_file(
86
- pdf_bytes,
87
- mimetype='application/pdf',
88
- as_attachment=False,
89
- download_name=f"annotated_page_{pageNumTextFound}.pdf"
90
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
  @app.route('/api/process-data', methods=['POST'])
 
42
  else:
43
  # Redirect to your PDF viewer route when ready
44
  return redirect(url_for("view_pdf", **request.args))
45
+ PROCESSED_PDF_PATH = "static/processed.pdf" # cached processed PDF
46
+
 
 
 
 
47
  @app.route('/view-pdf', methods=['GET'])
48
+ def view_pdf():
49
+ """First entry point process the PDF once, then show viewer at specific page/zoom."""
 
 
 
 
 
 
50
  encoded_pdf_link = request.args.get("pdfLink")
51
+ page = request.args.get("page", default="1")
52
+ zoom = request.args.get("zoom", default="100")
53
+
54
  if not encoded_pdf_link:
55
  return "Missing pdfLink parameter.", 400
56
 
57
+ pdf_link = urllib.parse.unquote(encoded_pdf_link)
58
+ print(f"Requested PDF: {pdf_link} | Page: {page} | Zoom: {zoom}")
59
+
60
+ # Only process PDF if not already processed
61
+ if not os.path.exists(PROCESSED_PDF_PATH):
62
+ try:
63
+ from InitialMarkups import extract_section_under_header
64
+ pdf_content = extract_section_under_header(pdf_link)[0]
65
+ if not pdf_content.startswith(b"%PDF"):
66
+ return "Invalid PDF content.", 500
67
+
68
+ os.makedirs("static", exist_ok=True)
69
+ with open(PROCESSED_PDF_PATH, "wb") as f:
70
+ f.write(pdf_content)
71
+ print("✅ PDF processed and saved to:", PROCESSED_PDF_PATH)
72
+ except Exception as e:
73
+ print("❌ Error processing PDF:", e)
74
+ return "PDF could not be processed.", 500
75
+ else:
76
+ print("⚙️ Using cached PDF (already processed).")
77
 
78
+ # Render viewer and start at requested page/zoom
79
+ return render_template("viewer.html", start_page=page, start_zoom=zoom)
 
 
 
 
80
 
 
 
81
 
82
+ @app.route('/get-pdf')
83
+ def get_pdf():
84
+ """Serve the cached processed PDF."""
85
+ if not os.path.exists(PROCESSED_PDF_PATH):
86
+ return "PDF not processed yet.", 404
87
+ return send_file(PROCESSED_PDF_PATH, mimetype="application/pdf")
88
+ ################################################################################################################################################################
89
+ ################################################################################################################################################################
90
+ ##################### Main console ###########################################################################################################
91
+ ################################################################################################################################################################
92
+ ################################################################################################################################################################
93
+
94
+ # @app.route('/view-pdf', methods=['GET'])
95
+ # def download_pdf():
96
+ # # Parse and decode pdfLink safely
97
+ # # full_query_string = request.query_string.decode()
98
+ # # parsed_params = urllib.parse.parse_qs(full_query_string)
99
+ # # # print(parsed_params)
100
+ # # encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
101
+ # # # Get PDF link and keyword from finddata()
102
+ # # # pdf_link = finddata()
103
+ # encoded_pdf_link = request.args.get("pdfLink")
104
+ # if encoded_pdf_link:
105
+ # pdf_link = urllib.parse.unquote(encoded_pdf_link)
106
+ # if not encoded_pdf_link:
107
+ # return "Missing pdfLink parameter.", 400
108
+
109
+ # # Decode the URL-encoded PDF link
110
+ # # pdf_link = urllib.parse.unquote(encoded_pdf_link)
111
+ # print("Extracted PDF Link:", pdf_link)
112
+ # # pdf_link = request.get_json()
113
+ # # if not pdf_link:
114
+ # # return "Missing pdfLink in request body.", 400
115
+
116
+
117
+ # try:
118
+ # # Use InitialMarkups to extract content
119
+ # pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
120
+ # except Exception as e:
121
+ # print("Error during PDF extraction:", e)
122
+ # return "PDF could not be processed.", 500
123
+
124
+ # if pdf_content is None or not pdf_content.startswith(b"%PDF"):
125
+ # return "PDF content not found or broken.", 404
126
+
127
+ # pdf_bytes = BytesIO(pdf_content)
128
+ # return send_file(
129
+ # pdf_bytes,
130
+ # mimetype='application/pdf',
131
+ # as_attachment=False,
132
+ # download_name=f"annotated_page_{pageNumTextFound}.pdf"
133
+ # )
134
 
135
 
136
  @app.route('/api/process-data', methods=['POST'])