InitialMarkups

Runtime error

App Files Files Community

Marthee commited on Jul 2, 2025

Commit

ef850dc

verified ·

1 Parent(s): 7e99d85

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -100

app.py CHANGED Viewed

@@ -10,51 +10,155 @@ import datetime
 import time
 from threading import Thread
 import urllib
 app = Flask(__name__)
-pdf_content = None
 pageNumTextFound = 0
-BASE_URL = "https://findconsole-initialmarkups.hf.space"  # Hugging Face Spaces base URL
-global jsonoutput
-@app.route("/", methods=["GET", "POST"])
-def thismain():
-    print('ayhaga')
-    return render_template("gui.html")
 @app.route("/keepaliveapii", methods=["GET", "POST"])
 def keepaliveapi():
     try:
-      print('alivee')
-      return 'alivee'
     except Exception as error:
-        print('error in keepalive:', error)
         return jsonify(status="error", message=str(error)), 500
 @app.route('/view-pdf', methods=['GET'])
 def download_pdf():
-    # Manually parse the query parameters
-    full_query_string = request.query_string.decode()  # Get raw query string
-    parsed_params = urllib.parse.parse_qs(full_query_string)  # Parse it
-    # Extract pdfLink and keyword manually
-    pdf_link = parsed_params.get('pdfLink', [None])[0]
-    if not pdf_link :
-        return "Missing required parameters.", 400
-    # Decode the extracted values
-    pdf_link = urllib.parse.unquote(pdf_link)
     print("Extracted PDF Link:", pdf_link)
-    # print("Extracted Keywords:", keyword)
-    createDF=False
-    pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
-    if pdf_content is None:
-        return "PDF content not found.", 404
     pdf_bytes = BytesIO(pdf_content)
     return send_file(
@@ -64,6 +168,12 @@ def download_pdf():
         download_name=f"annotated_page_{pageNumTextFound}.pdf"
     )
 @app.route('/view-highlight', methods=['GET','POST'])
 def download_pdfHighlight():
@@ -105,81 +215,55 @@ def download_pdfHighlight():
         as_attachment=False,
         download_name=f"annotated_page_{pageNumTextFound}.pdf"
     )
-@app.route('/api/process-data', methods=['POST'])
-def receive_pdf_data():
-    global pdf_content, pageNumTextFound
-    # Get PDF link and keyword from finddata()
-    pdfLink = finddata()
-    if not pdfLink :
-        return jsonify({"error": "'pdfLink' must be provided."}), 400
-    try:
-        print(pdfLink)
-        pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
-        dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
-        # Get metadata using the shared link
-        metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
-        dbPath='/TSA JOBS/ADR Test/FIND/'
-        pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
-        print('LINKS0',pdflink)
-        dbPath='/TSA JOBS/ADR Test/FIND/'
-        tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
-        print(f"PDF successfully uploaded to Dropbox at")
-        print('LINKS1',tablepdfLink)
-        return jsonify({
-            "message": "PDF processed successfully.",
-            "PDF_MarkedUp": pdflink,
-            'Table_PDF_Markup_Summary': tablepdfLink
-        })
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-@app.route('/findapi', methods=['GET','POST'])
-def findapi():
     try:
         print('In process [Try]')
         data = request.get_json()
         # Extracting values
         pdfLink = data.get('filePath')
-        pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
         global jsonoutput
         jsonoutput=tablepdfoutput
         return jsonify(tablepdfoutput)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
-@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
-def findapiFilteredHeadings():
     try:
         print('In process [Try]')
         data = request.get_json()
         # Extracting values
         pdfLink = data.get('filePath')
-        print(pdfLink)
-        listofheadings = data.get('listofheadings') #in json format
-        print(listofheadings)
-        pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_headerRawan(pdfLink,listofheadings)
         global jsonoutput
         jsonoutput=tablepdfoutput
         return jsonify(tablepdfoutput)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
 def finddata():
     pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
     keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
     return pdfLink, keyword
 #_________________________________________________________________________________________________________________________
 #_________________________________________________________________________________________________________________________
@@ -190,32 +274,6 @@ def finddata():
 #_________________________________________________________________________________________________________________________
-# def runn():
-#     from gevent.pywsgi import WSGIServer
-#     http_server = WSGIServer(('0.0.0.0', 7860), app)
-#     http_server.serve_forever()
-# def keep_alive():
-#   t=Thread(target=runn)
-#   t.start()
-# dtn = datetime.datetime.now(datetime.timezone.utc)
-# print(dtn)
-# next_start = datetime.datetime(dtn.year, dtn.month, dtn.day, 21, 0, 0).astimezone(datetime.timezone.utc) #us - 2 = utc time  (21 utc is 19:00 our time and 9 is 7 our time , it needs to run 9 utc time ____ )
-# print(next_start)
-# keep_alive()
-# while 1:
-#   dtnNow = datetime.datetime.now(datetime.timezone.utc)
-#   print(dtnNow)
-#   if dtnNow >= next_start:
-#     next_start += datetime.timedelta(hours=12)  # 1 day
-#     print('YES- 12 hours passed!!',next_start)
-#   time.sleep(1800)
-# if __name__ == "__main__":
-#     runn()
-#     app.run
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)

 import time
 from threading import Thread
 import urllib
+from urllib.parse import quote
 app = Flask(__name__)
 pageNumTextFound = 0
+BASE_URL = "https://findconsole-initialmarkups.hf.space"
+# Simulate a backend readiness flag (replace with actual check if possible)
+backend_ready = False
+# @app.route("/")
+# def thismain():
+#     print('Home page loaded')
+#     return render_template("gui.html")
 @app.route("/keepaliveapii", methods=["GET", "POST"])
 def keepaliveapi():
     try:
+        print('Keepalive pinged')
+        return 'alivee'
     except Exception as error:
+        print('Error in keepalive:', error)
         return jsonify(status="error", message=str(error)), 500
+@app.route("/")
+def home():
+    global backend_ready
+    # If backend not ready, show loading page
+    if not backend_ready:
+        return render_template("wake_and_redirect.html")
+    else:
+        # Redirect to your PDF viewer route when ready
+        return redirect(url_for("view_pdf", **request.args))
+################################################################################################################################################################
+################################################################################################################################################################
+##################### Main console ###########################################################################################################
+################################################################################################################################################################
+################################################################################################################################################################
 @app.route('/view-pdf', methods=['GET'])
 def download_pdf():
+    # Parse and decode pdfLink safely
+    full_query_string = request.query_string.decode()
+    parsed_params = urllib.parse.parse_qs(full_query_string)
+    encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
+    if not encoded_pdf_link:
+        return "Missing pdfLink parameter.", 400
+    # Decode the URL-encoded PDF link
+    pdf_link = urllib.parse.unquote(encoded_pdf_link)
+    print("Extracted PDF Link:", pdf_link)
+    try:
+        # Use InitialMarkups to extract content
+        pdf_content = InitialMarkups.extract_section_under_header(pdf_link)[0]
+    except Exception as e:
+        print("Error during PDF extraction:", e)
+        return "PDF could not be processed.", 500
+    if pdf_content is None or not pdf_content.startswith(b"%PDF"):
+        return "PDF content not found or broken.", 404
+    pdf_bytes = BytesIO(pdf_content)
+    return send_file(
+        pdf_bytes,
+        mimetype='application/pdf',
+        as_attachment=False,
+        download_name=f"annotated_page_{pageNumTextFound}.pdf"
+    )
+@app.route('/api/process-data', methods=['POST'])
+def receive_pdf_data():
+    global pdf_content, pageNumTextFound
+    # Get PDF link and keyword from finddata()
+    pdfLink = finddata()
+    if not pdfLink :
+        return jsonify({"error": "'pdfLink' must be provided."}), 400
+    try:
+        print(pdfLink)
+        pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
+        dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
+        # Get metadata using the shared link
+        metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
+        dbPath='/TSA JOBS/ADR Test/FIND/'
+        pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
+        print('LINKS0',pdflink)
+        dbPath='/TSA JOBS/ADR Test/FIND/'
+        tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
+        print(f"PDF successfully uploaded to Dropbox at")
+        print('LINKS1',tablepdfLink)
+        return jsonify({
+            "message": "PDF processed successfully.",
+            "PDF_MarkedUp": pdflink,
+            'Table_PDF_Markup_Summary': tablepdfLink
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+################################################################################################################################################################
+################################################################################################################################################################
+##################### Not to  billed not markuped up ###########################################################################################################
+################################################################################################################################################################
+################################################################################################################################################################
+@app.route('/findapitobebilled', methods=['GET','POST'])
+def findapitobebilled():
+    try:
+        print('In process [Try]')
+        data = request.get_json()
+        # Extracting values
+        pdfLink = data.get('filePath')
+        pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
+        global jsonoutput
+        jsonoutput=tablepdfoutput
+        return jsonify(tablepdfoutput)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route('/view-pdf-tobebilled', methods=['GET'])
+def download_pdf_tobebilled():
+    # Parse and decode pdfLink safely
+    full_query_string = request.query_string.decode()
+    parsed_params = urllib.parse.parse_qs(full_query_string)
+    encoded_pdf_link = parsed_params.get('pdfLink', [None])[0]
+    if not encoded_pdf_link:
+        return "Missing pdfLink parameter.", 400
+    # Decode the URL-encoded PDF link
+    pdf_link = urllib.parse.unquote(encoded_pdf_link)
     print("Extracted PDF Link:", pdf_link)
+    try:
+        # Use InitialMarkups to extract content
+        pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
+    except Exception as e:
+        print("Error during PDF extraction:", e)
+        return "PDF could not be processed.", 500
+    if pdf_content is None or not pdf_content.startswith(b"%PDF"):
+        return "PDF content not found or broken.", 404
     pdf_bytes = BytesIO(pdf_content)
     return send_file(
         download_name=f"annotated_page_{pageNumTextFound}.pdf"
     )
+################################################################################################################################################################
+################################################################################################################################################################
+##################### For final markups - view one highlight at a time - not used yet  ###########################################################################################################
+################################################################################################################################################################
+################################################################################################################################################################
 @app.route('/view-highlight', methods=['GET','POST'])
 def download_pdfHighlight():
         as_attachment=False,
         download_name=f"annotated_page_{pageNumTextFound}.pdf"
     )
+@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
+def findapiFilteredHeadings():
     try:
         print('In process [Try]')
         data = request.get_json()
         # Extracting values
         pdfLink = data.get('filePath')
+        print(pdfLink)
+        listofheadings = data.get('listofheadings') #in json format
+        print(listofheadings)
+        pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_headerRawan(pdfLink,listofheadings)
         global jsonoutput
         jsonoutput=tablepdfoutput
         return jsonify(tablepdfoutput)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
+################################################################################################################################################################
+################################################################################################################################################################
+##################### For Rawan - MC Connection ###########################################################################################################
+################################################################################################################################################################
+################################################################################################################################################################
+@app.route('/findapi', methods=['GET','POST'])
+def findapi():
     try:
         print('In process [Try]')
         data = request.get_json()
         # Extracting values
         pdfLink = data.get('filePath')
+        pdfbytes, pdf_document,tablepdfoutput= InitialMarkups.extract_section_under_header(pdfLink)
         global jsonoutput
         jsonoutput=tablepdfoutput
         return jsonify(tablepdfoutput)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
+############################################# Testing  #################################################
 def finddata():
     pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
     keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
     return pdfLink, keyword
+########################################### Running #####################################################
 #_________________________________________________________________________________________________________________________
 #_________________________________________________________________________________________________________________________
 #_________________________________________________________________________________________________________________________
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=7860)