InitialMarkups

Runtime error

App Files Files Community

Marthee commited on Oct 9, 2025

Commit

4a8fab3

verified ·

1 Parent(s): db0afce

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -38

app.py CHANGED Viewed

@@ -42,52 +42,95 @@ def home():
     else:
         # Redirect to your PDF viewer route when ready
         return redirect(url_for("view_pdf", **request.args))
-################################################################################################################################################################
-################################################################################################################################################################
-##################### Main console ###########################################################################################################
-################################################################################################################################################################
-################################################################################################################################################################
 @app.route('/view-pdf', methods=['GET'])
-def download_pdf():
-    # Parse and decode pdfLink safely
-    # full_query_string = request.query_string.decode()
-    # parsed_params = urllib.parse.parse_qs(full_query_string)
-    # # print(parsed_params)
-    # encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
-    # # Get PDF link and keyword from finddata()
-    # # pdf_link = finddata()
     encoded_pdf_link = request.args.get("pdfLink")
-    if encoded_pdf_link:
-        pdf_link = urllib.parse.unquote(encoded_pdf_link)
     if not encoded_pdf_link:
         return "Missing pdfLink parameter.", 400
-    # Decode the URL-encoded PDF link
-    # pdf_link = urllib.parse.unquote(encoded_pdf_link)
-    print("Extracted PDF Link:", pdf_link)
-    # pdf_link = request.get_json()
-    # if not pdf_link:
-    #     return "Missing pdfLink in request body.", 400
-    try:
-        # Use InitialMarkups to extract content
-        pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
-    except Exception as e:
-        print("Error during PDF extraction:", e)
-        return "PDF could not be processed.", 500
-    if pdf_content is None or not pdf_content.startswith(b"%PDF"):
-        return "PDF content not found or broken.", 404
-    pdf_bytes = BytesIO(pdf_content)
-    return send_file(
-        pdf_bytes,
-        mimetype='application/pdf',
-        as_attachment=False,
-        download_name=f"annotated_page_{pageNumTextFound}.pdf"
-    )
 @app.route('/api/process-data', methods=['POST'])

     else:
         # Redirect to your PDF viewer route when ready
         return redirect(url_for("view_pdf", **request.args))
+PROCESSED_PDF_PATH = "static/processed.pdf"  # cached processed PDF
 @app.route('/view-pdf', methods=['GET'])
+def view_pdf():
+    """First entry point — process the PDF once, then show viewer at specific page/zoom."""
     encoded_pdf_link = request.args.get("pdfLink")
+    page = request.args.get("page", default="1")
+    zoom = request.args.get("zoom", default="100")
     if not encoded_pdf_link:
         return "Missing pdfLink parameter.", 400
+    pdf_link = urllib.parse.unquote(encoded_pdf_link)
+    print(f"Requested PDF: {pdf_link} | Page: {page} | Zoom: {zoom}")
+    # Only process PDF if not already processed
+    if not os.path.exists(PROCESSED_PDF_PATH):
+        try:
+            from InitialMarkups import extract_section_under_header
+            pdf_content = extract_section_under_header(pdf_link)[0]
+            if not pdf_content.startswith(b"%PDF"):
+                return "Invalid PDF content.", 500
+            os.makedirs("static", exist_ok=True)
+            with open(PROCESSED_PDF_PATH, "wb") as f:
+                f.write(pdf_content)
+            print("✅ PDF processed and saved to:", PROCESSED_PDF_PATH)
+        except Exception as e:
+            print("❌ Error processing PDF:", e)
+            return "PDF could not be processed.", 500
+    else:
+        print("⚙️ Using cached PDF (already processed).")
+    # Render viewer and start at requested page/zoom
+    return render_template("viewer.html", start_page=page, start_zoom=zoom)
+@app.route('/get-pdf')
+def get_pdf():
+    """Serve the cached processed PDF."""
+    if not os.path.exists(PROCESSED_PDF_PATH):
+        return "PDF not processed yet.", 404
+    return send_file(PROCESSED_PDF_PATH, mimetype="application/pdf")
+################################################################################################################################################################
+################################################################################################################################################################
+##################### Main console ###########################################################################################################
+################################################################################################################################################################
+################################################################################################################################################################
+# @app.route('/view-pdf', methods=['GET'])
+# def download_pdf():
+#     # Parse and decode pdfLink safely
+#     # full_query_string = request.query_string.decode()
+#     # parsed_params = urllib.parse.parse_qs(full_query_string)
+#     # # print(parsed_params)
+#     # encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
+#     # # Get PDF link and keyword from finddata()
+#     # # pdf_link = finddata()
+#     encoded_pdf_link = request.args.get("pdfLink")
+#     if encoded_pdf_link:
+#         pdf_link = urllib.parse.unquote(encoded_pdf_link)
+#     if not encoded_pdf_link:
+#         return "Missing pdfLink parameter.", 400
+#     # Decode the URL-encoded PDF link
+#     # pdf_link = urllib.parse.unquote(encoded_pdf_link)
+#     print("Extracted PDF Link:", pdf_link)
+#     # pdf_link = request.get_json()
+#     # if not pdf_link:
+#     #     return "Missing pdfLink in request body.", 400
+#     try:
+#         # Use InitialMarkups to extract content
+#         pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
+#     except Exception as e:
+#         print("Error during PDF extraction:", e)
+#         return "PDF could not be processed.", 500
+#     if pdf_content is None or not pdf_content.startswith(b"%PDF"):
+#         return "PDF content not found or broken.", 404
+#     pdf_bytes = BytesIO(pdf_content)
+#     return send_file(
+#         pdf_bytes,
+#         mimetype='application/pdf',
+#         as_attachment=False,
+#         download_name=f"annotated_page_{pageNumTextFound}.pdf"
+#     )
 @app.route('/api/process-data', methods=['POST'])