Spaces:

findConsole
/

PromptTesting

Running

App Files Files Community

Marthee commited on Dec 30, 2025

Commit

44130c7

verified ·

1 Parent(s): 00ecae5

Upload app.py

Browse files

Files changed (1) hide show

app.py +450 -0

app.py ADDED Viewed

	@@ -0,0 +1,450 @@

+from flask import Flask, request, jsonify, render_template, send_file, redirect, url_for, Response
+import tsadropboxretrieval
+# import findInitialMarkups
+import InitialMarkups
+import requests
+import fitz
+from io import BytesIO
+import datetime
+import time
+from threading import Thread
+from urllib.parse import quote, unquote, parse_qs
+# import pdftotext
+import json
+# -------------------- App & Globals --------------------
+app = Flask(__name__)
+pageNumTextFound = 0
+BASE_URL = "https://adr.trevorsadd.co.uk/api/testpage" ##changed this only
+backend_ready = False
+jsonoutput = []  # ensure defined before use
+# -------------------- Simple Health/Test --------------------
+@app.route("/health", methods=["GET"])
+def health():
+    return jsonify(status="ok", time=datetime.datetime.now().isoformat())
+# -------------------- Root: keep it simple & reliable --------------------
+@app.route("/", methods=["GET"])
+def root():
+    # Avoid missing-template errors. Keep it simple so external access works.
+    return jsonify(message="FIND APIs root. Use /health or /testpage."), 200
+# -------------------- Headers Filtering Find 1 Space --------------------
+@app.route('/api/process-data', methods=['POST'])
+def process_headers():
+    try:
+        data = request.get_json(force=True) or {}
+        filePath = data.get('filePath')
+        if not filePath:
+            return jsonify({"error": "Missing 'filePath'"}), 400
+        headers = findInitialMarkups.headersfrompdf(filePath)
+        return jsonify(headers)
+    except Exception as e:
+        print(f"Error in /api/process-data: {e}")
+        return jsonify({"error": str(e)}), 500
+# -------------------- PDF to Text 1 Space --------------------
+@app.route('/processalltext1', methods=['POST'])
+def processalltextTotext():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfpath = data.get('filePath')
+        if not pdfpath:
+            return jsonify({"error": "Missing 'filePath' in request data"}), 400
+        pdftext,filename = pdftotext.texts_from_pdfAllText(pdfpath)
+        return jsonify({"message": "Data received", "input_data": pdftext,"Filename:":filename})
+    except Exception as e:
+        print(f"Error in /processalltext1: {e}")
+        return jsonify({"error": str(e)}), 500
+# -------------------- Keepalive --------------------
+@app.route("/keepaliveapii", methods=["GET", "POST"])
+def keepaliveapi():
+    try:
+        print('Keepalive pinged')
+        return 'alivee'
+    except Exception as error:
+        print('Error in keepalive:', error)
+        return jsonify(status="error", message=str(error)), 500
+# -------------------- View PDF (Marked up) --------------------
+def getpdfcontent(pdf_path):
+    # Handle Dropbox URLs
+    if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
+        pdf_path = pdf_path.replace('dl=0', 'dl=1')
+    # Get the PDF bytes
+    response = requests.get(pdf_path)
+    pdf_bytes = response.content
+    if not pdf_bytes or not pdf_bytes.startswith(b"%PDF"):
+        raise ValueError("No valid PDF content found.")
+    # Return a BytesIO stream
+    return BytesIO(pdf_bytes)
+@app.route('/view-pdf', methods=['GET'])
+def view_pdf():
+    encoded_pdf_link = request.args.get('pdfLink')
+    if not encoded_pdf_link:
+        return "Missing pdfLink parameter.", 400
+    pdf_link = unquote(encoded_pdf_link)
+    print("Extracted PDF Link:", pdf_link)
+    try:
+        pdf_content = getpdfcontent(pdf_link)
+    except Exception as e:
+        print("Error during PDF extraction:", e)
+        return "PDF could not be processed.", 500
+    if pdf_content is None:
+        return "PDF content not found or broken.", 404
+    # ✅ Do NOT wrap again in BytesIO
+    return send_file(
+        pdf_content,
+        mimetype='application/pdf',
+        as_attachment=False,
+        download_name="annotated_page.pdf"
+    )
+# -------------------- Process PDF -> Upload to Dropbox (renamed to avoid duplicate route) --------------------
+@app.route('/api/process-pdf', methods=['POST'])
+def process_pdf_and_upload():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        if not pdfLink:
+            return jsonify({"error": "'filePath' must be provided."}), 400
+        print("Processing PDF:", pdfLink)
+        pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)
+        dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user')
+        metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
+        dbPath = '/TSA JOBS/ADR Test/FIND/'
+        pdflink = tsadropboxretrieval.uploadanyFile(doc=pdf_document, path=dbPath, pdfname=metadata.name)
+        tablepdfLink = tsadropboxretrieval.uploadanyFile(
+            doc=tablepdfoutput,
+            path=dbPath,
+            pdfname=metadata.name.rsplit(".pdf", 1)[0] + ' Markup Summary.pdf'
+        )
+        print('Uploaded:', pdflink, tablepdfLink)
+        return jsonify({
+            "message": "PDF processed successfully.",
+            "PDF_MarkedUp": pdflink,
+            "Table_PDF_Markup_Summary": tablepdfLink
+        })
+    except Exception as e:
+        print(f"Error in /api/process-pdf: {e}")
+        return jsonify({"error": str(e)}), 500
+# -------------------- Not billed / Markup subsets --------------------
+@app.route('/findapitobebilled1', methods=['GET','POST'])
+def findapitobebilled1():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        if not pdfLink:
+            return jsonify({"error": "Missing 'filePath'"}), 400
+        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
+        return jsonify(alltext_tobebilled)
+    except Exception as e:
+        print(f"Error in /findapitobebilled1: {e}")
+        return jsonify({"error": str(e)}), 500
+# ----------------------------------------------------------------------
+@app.route('/findapitobebilled_htmlformat', methods=['GET','POST'])
+def findapitobebilled_htmlformat():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        if not pdfLink:
+            return jsonify({"error": "Missing 'filePath'"}), 400
+        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
+        # Parse JSON string → list of dicts
+        data = json.loads(tablepdfoutput)
+        # Collect all body parts
+        html_body = ""
+        for section in data:
+            if "head above 2" in section:
+                html_body += f"<h1>{section['head above 2']}</h1><br>"
+            if "head above 1" in section:
+                html_body += f"<h2>{section['head above 1']}</h2><br>"
+            if "Subject" in section:
+                html_body += f"<h3>{section['Subject']}</h3><br>"
+                if "BodyText" in section:
+                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
+                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"
+        # Wrap everything into one HTML document
+        html_content = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>{filename}</title>
+            <meta charset="utf-8">
+        </head>
+        <body>
+            {html_body}
+        </body>
+        </html>
+        """
+        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
+        return jsonify({"input_data": html_content,"Filename:":filename})
+    except Exception as e:
+        print(f"Error in /findapitobebilled_htmlformat: {e}")
+        return jsonify({"error": str(e)}), 500
+@app.route('/view-pdf-tobebilled', methods=['GET'])
+def view_pdf_tobebilled():
+    encoded_pdf_link = request.args.get('pdfLink')
+    if not encoded_pdf_link:
+        return "Missing pdfLink parameter.", 400
+    pdf_link = unquote(encoded_pdf_link)
+    print("Extracted PDF Link:", pdf_link)
+    try:
+        pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
+    except Exception as e:
+        print("Error during PDF extraction:", e)
+        return "PDF could not be processed.", 500
+    if pdf_content is None or not pdf_content.startswith(b"%PDF"):
+        return "PDF content not found or broken.", 404
+    return send_file(
+        BytesIO(pdf_content),
+        mimetype='application/pdf',
+        as_attachment=False,
+        download_name=f"annotated_page_{pageNumTextFound}.pdf"
+    )
+# -------------------- Final markups: view one highlight --------------------
+@app.route('/view-highlight', methods=['GET','POST'])
+def download_pdfHighlight():
+    pdf_link = request.args.get('pdfLink')
+    keyword = request.args.get('keyword')
+    if not pdf_link or not keyword:
+        return "Missing required parameters.", 400
+    pdf_link = unquote(pdf_link)
+    print("Extracted PDF Link:", pdf_link)
+    print("Extracted Keyword:", keyword)
+    global jsonoutput
+    matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None)
+    if matching_item:
+        page_number = int(matching_item.get("Page")) - 1
+        stringtowrite = matching_item.get("head above 1")
+        print(f"Page number for '{keyword}': {page_number}")
+    else:
+        page_number = 0
+        stringtowrite = None
+        print("No match found in jsonoutput; defaulting to page 0.")
+    pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link, keyword, page_number, stringtowrite)[0]
+    if pdf_content is None:
+        return "PDF content not found.", 404
+    return send_file(
+        BytesIO(pdf_content),
+        mimetype='application/pdf',
+        as_attachment=False,
+        download_name=f"annotated_page_{pageNumTextFound}.pdf"
+    )
+@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
+def findapiFilteredHeadings():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        listofheadings = data.get('listofheadings')  # json array
+        if not pdfLink or listofheadings is None:
+            return jsonify({"error": "Missing 'filePath' or 'listofheadings'"}), 400
+        pdfbytes, pdf_document, tablepdfoutput, alltext = InitialMarkups.extract_section_under_headerRawan(pdfLink, listofheadings)
+        global jsonoutput
+        jsonoutput = tablepdfoutput
+        return jsonify(alltext)
+    except Exception as e:
+        print(f"Error in /findapiFilteredHeadings: {e}")
+        return jsonify({"error": str(e)}), 500
+@app.route('/findapitobebilledonlyNew', methods=['GET','POST'])
+def findapitobebilledonly():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        if not pdfLink:
+            return jsonify({"error": "Missing 'filePath'"}), 400
+        pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilled2(pdfLink)
+        # return jsonify(tablepdfoutput)
+                # Parse JSON string → list of dicts
+        data = json.loads(tablepdfoutput)
+        # Collect all body parts
+        html_body = ""
+        for section in data:
+            if "head above 2" in section:
+                html_body += f"<h1>{section['head above 2']}</h1><br>"
+            if "head above 1" in section:
+                html_body += f"<h2>{section['head above 1']}</h2><br>"
+            if "Subject" in section:
+                html_body += f"<h3>{section['Subject']}</h3><br>"
+                if "BodyText" in section:
+                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
+                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"
+        # Wrap everything into one HTML document
+        html_content = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>{filename}</title>
+            <meta charset="utf-8">
+        </head>
+        <body>
+            {html_body}
+        </body>
+        </html>
+        """
+        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
+        return jsonify({"input_data": html_content,"Filename:":filename})
+        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
+    except Exception as e:
+        print(f"Error in /findapitobebilledonly: {e}")
+        return jsonify({"error": str(e)}), 500
+@app.route('/findapitobebilledonlyNewMultiplePDFS', methods=['GET','POST'])
+def findapitobebilledonlymarthe():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        if not pdfLink:
+            return jsonify({"error": "Missing 'filePath'"}), 400
+        pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilledMultiplePDFS(pdfLink)
+        # return jsonify(tablepdfoutput)
+                # Parse JSON string → list of dicts
+        if isinstance(tablepdfoutput, str):
+            data = json.loads(tablepdfoutput)
+        else:
+            data = tablepdfoutput
+        # Collect all body parts
+        html_body = ""
+        for section in data:
+            if "head above 2" in section:
+                html_body += f"<h1>{section['head above 2']}</h1><br>"
+            if "head above 1" in section:
+                html_body += f"<h2>{section['head above 1']}</h2><br>"
+            if "Subject" in section:
+                html_body += f"<h3>{section['Subject']}</h3><br>"
+                if "BodyText" in section:
+                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
+                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"
+        # Wrap everything into one HTML document
+        html_content = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>{filename}</title>
+            <meta charset="utf-8">
+        </head>
+        <body>
+            {html_body}
+        </body>
+        </html>
+        """
+        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
+        return jsonify({"input_data": html_content,"Filename:":filename})
+        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
+    except Exception as e:
+        print(f"Error in /findapitobebilledonly: {e}")
+        return jsonify({"error": str(e)}), 500
+@app.route('/findapiAllDocNoNotbilled', methods=['GET','POST'])
+def findapiAllDocNoNotbilled():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        if not pdfLink:
+            return jsonify({"error": "Missing 'filePath'"}), 400
+        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled ,filename= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
+        return jsonify(alltextNoNotbilled)
+    except Exception as e:
+        print(f"Error in /findapiAllDocNoNotbilled: {e}")
+        return jsonify({"error": str(e)}), 500
+# -------------------- Rawan - MC Connection --------------------
+@app.route('/findapi', methods=['GET','POST'])
+def findapi():
+    try:
+        data = request.get_json(force=True) or {}
+        pdfLink = data.get('filePath')
+        if not pdfLink:
+            return jsonify({"error": "Missing 'filePath'"}), 400
+        pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)
+        global jsonoutput
+        jsonoutput = tablepdfoutput
+        return jsonify(tablepdfoutput)
+    except Exception as e:
+        print(f"Error in /findapi: {e}")
+        return jsonify({"error": str(e)}), 500
+#--------------------testpage-----------------------------
+import socket
+from datetime import datetime
+@app.route('/testpage')
+def test_page():
+    # Get some system info
+    hostname = socket.gethostname()
+    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    return f"""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Server Test Page</title>
+        <style>
+            body {{ font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }}
+            .success {{ color: #2ecc71; font-size: 24px; }}
+            .info {{ color: #34495e; margin-top: 10px; }}
+            .container {{ max-width: 600px; margin: 0 auto; text-align: left; }}
+        </style>
+    </head>
+    <body>
+        <div class="success">🚀 Flask Server is Running!</div>
+        <div class="container">
+            <p class="info"><strong>Hostname:</strong> {hostname}</p>
+            <p class="info"><strong>Server Time:</strong> {current_time}</p>
+            <p class="info"><strong>Endpoint:</strong> /testpage</p>
+            <p class="info"><strong>Status:</strong> <span style="color: #2ecc71;">Operational ✅</span></p>
+        </div>
+    </body>
+    </html>
+    """
+# -------------------- Run --------------------
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=5000, debug=True)