InitialMarkups

Runtime error

File size: 17,108 Bytes

from flask import Flask, request, jsonify, render_template, send_file, redirect, url_for, Response
import tsadropboxretrieval
# import findInitialMarkups
import InitialMarkups
import requests
import fitz
from io import BytesIO   
import datetime
import time
from threading import Thread
from urllib.parse import quote, unquote, parse_qs
# import pdftotext
import json
# -------------------- App & Globals --------------------
app = Flask(__name__)
pageNumTextFound = 0
BASE_URL = "https://adr.trevorsadd.co.uk/api/testpage" ##changed this only
backend_ready = False
jsonoutput = []  # ensure defined before use

# -------------------- Simple Health/Test --------------------
@app.route("/health", methods=["GET"])
def health():
    return jsonify(status="ok", time=datetime.datetime.now().isoformat())

# -------------------- Root: keep it simple & reliable --------------------
@app.route("/", methods=["GET"])
def root():
    # Avoid missing-template errors. Keep it simple so external access works.
    return jsonify(message="FIND APIs root. Use /health or /testpage."), 200

# -------------------- Headers Filtering Find 1 Space --------------------
@app.route('/api/process-data', methods=['POST'])
def process_headers():
    try:
        data = request.get_json(force=True) or {}
        filePath = data.get('filePath')
        if not filePath:
            return jsonify({"error": "Missing 'filePath'"}), 400
        headers = findInitialMarkups.headersfrompdf(filePath)
        return jsonify(headers)
    except Exception as e:
        print(f"Error in /api/process-data: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- PDF to Text 1 Space --------------------
@app.route('/processalltext1', methods=['POST'])
def processalltextTotext():
    try:
        data = request.get_json(force=True) or {}
        pdfpath = data.get('filePath')
        if not pdfpath:
            return jsonify({"error": "Missing 'filePath' in request data"}), 400
        pdftext,filename = pdftotext.texts_from_pdfAllText(pdfpath)
        return jsonify({"message": "Data received", "input_data": pdftext,"Filename:":filename})
    except Exception as e:
        print(f"Error in /processalltext1: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- Keepalive --------------------
@app.route("/keepaliveapii", methods=["GET", "POST"])
def keepaliveapi():
    try:
        print('Keepalive pinged')
        return 'alivee'
    except Exception as error:
        print('Error in keepalive:', error)
        return jsonify(status="error", message=str(error)), 500

# -------------------- View PDF (Marked up) --------------------
def getpdfcontent(pdf_path):
    # Handle Dropbox URLs
    if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
        pdf_path = pdf_path.replace('dl=0', 'dl=1')

    # Get the PDF bytes
    response = requests.get(pdf_path)
    pdf_bytes = response.content

    if not pdf_bytes or not pdf_bytes.startswith(b"%PDF"):
        raise ValueError("No valid PDF content found.")

    # Return a BytesIO stream
    return BytesIO(pdf_bytes)


@app.route('/view-pdf', methods=['GET'])
def view_pdf():
    encoded_pdf_link = request.args.get('pdfLink')
    if not encoded_pdf_link:
        return "Missing pdfLink parameter.", 400

    pdf_link = unquote(encoded_pdf_link)
    print("Extracted PDF Link:", pdf_link)

    try:
        pdf_content = getpdfcontent(pdf_link)
    except Exception as e:
        print("Error during PDF extraction:", e)
        return "PDF could not be processed.", 500

    if pdf_content is None:
        return "PDF content not found or broken.", 404

    # ✅ Do NOT wrap again in BytesIO
    return send_file(
        pdf_content,
        mimetype='application/pdf',
        as_attachment=False,
        download_name="annotated_page.pdf"
    )

# -------------------- Process PDF -> Upload to Dropbox (renamed to avoid duplicate route) --------------------
@app.route('/api/process-pdf', methods=['POST'])
def process_pdf_and_upload():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "'filePath' must be provided."}), 400

        print("Processing PDF:", pdfLink)
        pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)

        dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user')
        metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)

        dbPath = '/TSA JOBS/ADR Test/FIND/'
        pdflink = tsadropboxretrieval.uploadanyFile(doc=pdf_document, path=dbPath, pdfname=metadata.name)
        tablepdfLink = tsadropboxretrieval.uploadanyFile(
            doc=tablepdfoutput,
            path=dbPath,
            pdfname=metadata.name.rsplit(".pdf", 1)[0] + ' Markup Summary.pdf'
        )
        print('Uploaded:', pdflink, tablepdfLink)

        return jsonify({
            "message": "PDF processed successfully.",
            "PDF_MarkedUp": pdflink,
            "Table_PDF_Markup_Summary": tablepdfLink
        })
    except Exception as e:
        print(f"Error in /api/process-pdf: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- Not billed / Markup subsets --------------------
@app.route('/findapitobebilled1', methods=['GET','POST'])
def findapitobebilled1():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
        
        return jsonify(alltext_tobebilled)
    except Exception as e:
        print(f"Error in /findapitobebilled1: {e}")
        return jsonify({"error": str(e)}), 500
    

# ----------------------------------------------------------------------
@app.route('/findapitobebilled_htmlformat', methods=['GET','POST'])
def findapitobebilled_htmlformat():  
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
        # Parse JSON string → list of dicts
        data = json.loads(tablepdfoutput)

        # Collect all body parts
        html_body = ""

        for section in data:
            if "head above 2" in section:
                html_body += f"<h1>{section['head above 2']}</h1><br>"

            if "head above 1" in section:
                html_body += f"<h2>{section['head above 1']}</h2><br>"

            if "Subject" in section:
                html_body += f"<h3>{section['Subject']}</h3><br>"
                if "BodyText" in section:
                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

        # Wrap everything into one HTML document
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>{filename}</title>
            <meta charset="utf-8">
        </head>
        <body>
            {html_body}
        </body>
        </html>
        """
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
        return jsonify({"input_data": html_content,"Filename:":filename})
    except Exception as e:
        print(f"Error in /findapitobebilled_htmlformat: {e}")
        return jsonify({"error": str(e)}), 500


@app.route('/view-pdf-tobebilled', methods=['GET'])
def view_pdf_tobebilled():
    encoded_pdf_link = request.args.get('pdfLink')
    if not encoded_pdf_link:
        return "Missing pdfLink parameter.", 400
    pdf_link = unquote(encoded_pdf_link)
    print("Extracted PDF Link:", pdf_link)
    try:
        pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
    except Exception as e:
        print("Error during PDF extraction:", e)
        return "PDF could not be processed.", 500
    if pdf_content is None or not pdf_content.startswith(b"%PDF"):
        return "PDF content not found or broken.", 404
    return send_file(
        BytesIO(pdf_content),
        mimetype='application/pdf',
        as_attachment=False,
        download_name=f"annotated_page_{pageNumTextFound}.pdf"
    )

# -------------------- Final markups: view one highlight --------------------
@app.route('/view-highlight', methods=['GET','POST'])
def download_pdfHighlight():
    pdf_link = request.args.get('pdfLink')
    keyword = request.args.get('keyword')
    if not pdf_link or not keyword:
        return "Missing required parameters.", 400

    pdf_link = unquote(pdf_link)
    print("Extracted PDF Link:", pdf_link)
    print("Extracted Keyword:", keyword)

    global jsonoutput
    matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None)

    if matching_item:
        page_number = int(matching_item.get("Page")) - 1
        stringtowrite = matching_item.get("head above 1")
        print(f"Page number for '{keyword}': {page_number}")
    else:
        page_number = 0
        stringtowrite = None
        print("No match found in jsonoutput; defaulting to page 0.")

    pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link, keyword, page_number, stringtowrite)[0]
    if pdf_content is None:
        return "PDF content not found.", 404

    return send_file(
        BytesIO(pdf_content),
        mimetype='application/pdf',
        as_attachment=False,
        download_name=f"annotated_page_{pageNumTextFound}.pdf"
    )

@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
def findapiFilteredHeadings():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        listofheadings = data.get('listofheadings')  # json array
        if not pdfLink or listofheadings is None:
            return jsonify({"error": "Missing 'filePath' or 'listofheadings'"}), 400

        pdfbytes, pdf_document, tablepdfoutput, alltext = InitialMarkups.extract_section_under_headerRawan(pdfLink, listofheadings)
        global jsonoutput
        jsonoutput = tablepdfoutput
        return jsonify(alltext)
    except Exception as e:
        print(f"Error in /findapiFilteredHeadings: {e}")
        return jsonify({"error": str(e)}), 500

@app.route('/findapitobebilledonlyNew', methods=['GET','POST'])
def findapitobebilledonly():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilled2(pdfLink)
        # return jsonify(tablepdfoutput)
                # Parse JSON string → list of dicts
        data = json.loads(tablepdfoutput)

        # Collect all body parts
        html_body = ""

        for section in data:
            if "head above 2" in section:
                html_body += f"<h1>{section['head above 2']}</h1><br>"

            if "head above 1" in section:
                html_body += f"<h2>{section['head above 1']}</h2><br>"

            if "Subject" in section:
                html_body += f"<h3>{section['Subject']}</h3><br>"
                if "BodyText" in section:
                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

        # Wrap everything into one HTML document
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>{filename}</title>
            <meta charset="utf-8">
        </head>
        <body>
            {html_body}
        </body>
        </html>
        """
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
        return jsonify({"input_data": html_content,"Filename:":filename})
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
    except Exception as e:
        print(f"Error in /findapitobebilledonly: {e}")
        return jsonify({"error": str(e)}), 500



@app.route('/findapitobebilledonlyNewMultiplePDFS', methods=['GET','POST'])
def findapitobebilledonlymarthe():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilledMultiplePDFS(pdfLink)
        # return jsonify(tablepdfoutput)
                # Parse JSON string → list of dicts
        if isinstance(tablepdfoutput, str):
            data = json.loads(tablepdfoutput)
        else:
            data = tablepdfoutput
        # Collect all body parts
        html_body = ""

        for section in data:
            if "head above 2" in section:
                html_body += f"<h1>{section['head above 2']}</h1><br>"

            if "head above 1" in section:
                html_body += f"<h2>{section['head above 1']}</h2><br>"

            if "Subject" in section:
                html_body += f"<h3>{section['Subject']}</h3><br>"
                if "BodyText" in section:
                    html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
                    # html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

        # Wrap everything into one HTML document
        html_content = f"""
        <!DOCTYPE html>
        <html>
        <head>
            <title>{filename}</title>
            <meta charset="utf-8">
        </head>
        <body>
            {html_body}
        </body>
        </html>
        """
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
        return jsonify({"input_data": html_content,"Filename:":filename})
        # return Response(html_content, mimetype="text/html", headers={"Filename": filename})
    except Exception as e:
        print(f"Error in /findapitobebilledonly: {e}")
        return jsonify({"error": str(e)}), 500


@app.route('/findapiAllDocNoNotbilled', methods=['GET','POST'])
def findapiAllDocNoNotbilled():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled ,filename= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
        return jsonify(alltextNoNotbilled)
    except Exception as e:
        print(f"Error in /findapiAllDocNoNotbilled: {e}")
        return jsonify({"error": str(e)}), 500

# -------------------- Rawan - MC Connection --------------------
@app.route('/findapi', methods=['GET','POST'])
def findapi():
    try:
        data = request.get_json(force=True) or {}
        pdfLink = data.get('filePath')
        if not pdfLink:
            return jsonify({"error": "Missing 'filePath'"}), 400
        
        pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)
        global jsonoutput
        jsonoutput = tablepdfoutput
        return jsonify(tablepdfoutput)
    except Exception as e:
        print(f"Error in /findapi: {e}")
        return jsonify({"error": str(e)}), 500

#--------------------testpage-----------------------------
import socket
from datetime import datetime

@app.route('/testpage')
def test_page():
    # Get some system info
    hostname = socket.gethostname()
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    return f"""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Server Test Page</title>
        <style>
            body {{ font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }}
            .success {{ color: #2ecc71; font-size: 24px; }}
            .info {{ color: #34495e; margin-top: 10px; }}
            .container {{ max-width: 600px; margin: 0 auto; text-align: left; }}
        </style>
    </head>
    <body>
        <div class="success">🚀 Flask Server is Running!</div>
        <div class="container">
            <p class="info"><strong>Hostname:</strong> {hostname}</p>
            <p class="info"><strong>Server Time:</strong> {current_time}</p>
            <p class="info"><strong>Endpoint:</strong> /testpage</p>
            <p class="info"><strong>Status:</strong> <span style="color: #2ecc71;">Operational ✅</span></p>
        </div>
    </body>
    </html>
    """   

# -------------------- Run --------------------   
if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000, debug=True)