InitialMarkups / app.py
Marthee's picture
Update app.py
d99ce5e verified
from flask import Flask, request, jsonify, render_template, send_file, redirect, url_for, Response
import tsadropboxretrieval
# import findInitialMarkups
import InitialMarkups
import requests
import fitz
from io import BytesIO
import datetime
import time
from threading import Thread
from urllib.parse import quote, unquote, parse_qs
# import pdftotext
import json
# -------------------- App & Globals --------------------
app = Flask(__name__)
pageNumTextFound = 0
BASE_URL = "https://adr.trevorsadd.co.uk/api/testpage" ##changed this only
backend_ready = False
jsonoutput = [] # ensure defined before use
# -------------------- Simple Health/Test --------------------
@app.route("/health", methods=["GET"])
def health():
return jsonify(status="ok", time=datetime.datetime.now().isoformat())
# -------------------- Root: keep it simple & reliable --------------------
@app.route("/", methods=["GET"])
def root():
# Avoid missing-template errors. Keep it simple so external access works.
return jsonify(message="FIND APIs root. Use /health or /testpage."), 200
# -------------------- Headers Filtering Find 1 Space --------------------
@app.route('/api/process-data', methods=['POST'])
def process_headers():
try:
data = request.get_json(force=True) or {}
filePath = data.get('filePath')
if not filePath:
return jsonify({"error": "Missing 'filePath'"}), 400
headers = findInitialMarkups.headersfrompdf(filePath)
return jsonify(headers)
except Exception as e:
print(f"Error in /api/process-data: {e}")
return jsonify({"error": str(e)}), 500
# -------------------- PDF to Text 1 Space --------------------
@app.route('/processalltext1', methods=['POST'])
def processalltextTotext():
try:
data = request.get_json(force=True) or {}
pdfpath = data.get('filePath')
if not pdfpath:
return jsonify({"error": "Missing 'filePath' in request data"}), 400
pdftext,filename = pdftotext.texts_from_pdfAllText(pdfpath)
return jsonify({"message": "Data received", "input_data": pdftext,"Filename:":filename})
except Exception as e:
print(f"Error in /processalltext1: {e}")
return jsonify({"error": str(e)}), 500
# -------------------- Keepalive --------------------
@app.route("/keepaliveapii", methods=["GET", "POST"])
def keepaliveapi():
try:
print('Keepalive pinged')
return 'alivee'
except Exception as error:
print('Error in keepalive:', error)
return jsonify(status="error", message=str(error)), 500
# -------------------- View PDF (Marked up) --------------------
def getpdfcontent(pdf_path):
# Handle Dropbox URLs
if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
pdf_path = pdf_path.replace('dl=0', 'dl=1')
# Get the PDF bytes
response = requests.get(pdf_path)
pdf_bytes = response.content
if not pdf_bytes or not pdf_bytes.startswith(b"%PDF"):
raise ValueError("No valid PDF content found.")
# Return a BytesIO stream
return BytesIO(pdf_bytes)
@app.route('/view-pdf', methods=['GET'])
def view_pdf():
encoded_pdf_link = request.args.get('pdfLink')
if not encoded_pdf_link:
return "Missing pdfLink parameter.", 400
pdf_link = unquote(encoded_pdf_link)
print("Extracted PDF Link:", pdf_link)
try:
pdf_content = getpdfcontent(pdf_link)
except Exception as e:
print("Error during PDF extraction:", e)
return "PDF could not be processed.", 500
if pdf_content is None:
return "PDF content not found or broken.", 404
# ✅ Do NOT wrap again in BytesIO
return send_file(
pdf_content,
mimetype='application/pdf',
as_attachment=False,
download_name="annotated_page.pdf"
)
# -------------------- Process PDF -> Upload to Dropbox (renamed to avoid duplicate route) --------------------
@app.route('/api/process-pdf', methods=['POST'])
def process_pdf_and_upload():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
if not pdfLink:
return jsonify({"error": "'filePath' must be provided."}), 400
print("Processing PDF:", pdfLink)
pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)
dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user')
metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
dbPath = '/TSA JOBS/ADR Test/FIND/'
pdflink = tsadropboxretrieval.uploadanyFile(doc=pdf_document, path=dbPath, pdfname=metadata.name)
tablepdfLink = tsadropboxretrieval.uploadanyFile(
doc=tablepdfoutput,
path=dbPath,
pdfname=metadata.name.rsplit(".pdf", 1)[0] + ' Markup Summary.pdf'
)
print('Uploaded:', pdflink, tablepdfLink)
return jsonify({
"message": "PDF processed successfully.",
"PDF_MarkedUp": pdflink,
"Table_PDF_Markup_Summary": tablepdfLink
})
except Exception as e:
print(f"Error in /api/process-pdf: {e}")
return jsonify({"error": str(e)}), 500
# -------------------- Not billed / Markup subsets --------------------
@app.route('/findapitobebilled1', methods=['GET','POST'])
def findapitobebilled1():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
if not pdfLink:
return jsonify({"error": "Missing 'filePath'"}), 400
pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
return jsonify(alltext_tobebilled)
except Exception as e:
print(f"Error in /findapitobebilled1: {e}")
return jsonify({"error": str(e)}), 500
# ----------------------------------------------------------------------
@app.route('/findapitobebilled_htmlformat', methods=['GET','POST'])
def findapitobebilled_htmlformat():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
if not pdfLink:
return jsonify({"error": "Missing 'filePath'"}), 400
pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
# Parse JSON string → list of dicts
data = json.loads(tablepdfoutput)
# Collect all body parts
html_body = ""
for section in data:
if "head above 2" in section:
html_body += f"<h1>{section['head above 2']}</h1><br>"
if "head above 1" in section:
html_body += f"<h2>{section['head above 1']}</h2><br>"
if "Subject" in section:
html_body += f"<h3>{section['Subject']}</h3><br>"
if "BodyText" in section:
html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
# html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"
# Wrap everything into one HTML document
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>{filename}</title>
<meta charset="utf-8">
</head>
<body>
{html_body}
</body>
</html>
"""
# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
return jsonify({"input_data": html_content,"Filename:":filename})
except Exception as e:
print(f"Error in /findapitobebilled_htmlformat: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/view-pdf-tobebilled', methods=['GET'])
def view_pdf_tobebilled():
encoded_pdf_link = request.args.get('pdfLink')
if not encoded_pdf_link:
return "Missing pdfLink parameter.", 400
pdf_link = unquote(encoded_pdf_link)
print("Extracted PDF Link:", pdf_link)
try:
pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
except Exception as e:
print("Error during PDF extraction:", e)
return "PDF could not be processed.", 500
if pdf_content is None or not pdf_content.startswith(b"%PDF"):
return "PDF content not found or broken.", 404
return send_file(
BytesIO(pdf_content),
mimetype='application/pdf',
as_attachment=False,
download_name=f"annotated_page_{pageNumTextFound}.pdf"
)
# -------------------- Final markups: view one highlight --------------------
@app.route('/view-highlight', methods=['GET','POST'])
def download_pdfHighlight():
pdf_link = request.args.get('pdfLink')
keyword = request.args.get('keyword')
if not pdf_link or not keyword:
return "Missing required parameters.", 400
pdf_link = unquote(pdf_link)
print("Extracted PDF Link:", pdf_link)
print("Extracted Keyword:", keyword)
global jsonoutput
matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None)
if matching_item:
page_number = int(matching_item.get("Page")) - 1
stringtowrite = matching_item.get("head above 1")
print(f"Page number for '{keyword}': {page_number}")
else:
page_number = 0
stringtowrite = None
print("No match found in jsonoutput; defaulting to page 0.")
pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link, keyword, page_number, stringtowrite)[0]
if pdf_content is None:
return "PDF content not found.", 404
return send_file(
BytesIO(pdf_content),
mimetype='application/pdf',
as_attachment=False,
download_name=f"annotated_page_{pageNumTextFound}.pdf"
)
@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
def findapiFilteredHeadings():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
listofheadings = data.get('listofheadings') # json array
if not pdfLink or listofheadings is None:
return jsonify({"error": "Missing 'filePath' or 'listofheadings'"}), 400
pdfbytes, pdf_document, tablepdfoutput, alltext = InitialMarkups.extract_section_under_headerRawan(pdfLink, listofheadings)
global jsonoutput
jsonoutput = tablepdfoutput
return jsonify(alltext)
except Exception as e:
print(f"Error in /findapiFilteredHeadings: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/findapitobebilledonlyNew', methods=['GET','POST'])
def findapitobebilledonly():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
if not pdfLink:
return jsonify({"error": "Missing 'filePath'"}), 400
pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilled2(pdfLink)
# return jsonify(tablepdfoutput)
# Parse JSON string → list of dicts
data = json.loads(tablepdfoutput)
# Collect all body parts
html_body = ""
for section in data:
if "head above 2" in section:
html_body += f"<h1>{section['head above 2']}</h1><br>"
if "head above 1" in section:
html_body += f"<h2>{section['head above 1']}</h2><br>"
if "Subject" in section:
html_body += f"<h3>{section['Subject']}</h3><br>"
if "BodyText" in section:
html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
# html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"
# Wrap everything into one HTML document
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>{filename}</title>
<meta charset="utf-8">
</head>
<body>
{html_body}
</body>
</html>
"""
# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
return jsonify({"input_data": html_content,"Filename:":filename})
# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
except Exception as e:
print(f"Error in /findapitobebilledonly: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/findapitobebilledonlyNewMultiplePDFS', methods=['GET','POST'])
def findapitobebilledonlymarthe():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
if not pdfLink:
return jsonify({"error": "Missing 'filePath'"}), 400
pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilledMultiplePDFS(pdfLink)
# return jsonify(tablepdfoutput)
# Parse JSON string → list of dicts
if isinstance(tablepdfoutput, str):
data = json.loads(tablepdfoutput)
else:
data = tablepdfoutput
# Collect all body parts
html_body = ""
for section in data:
if "head above 2" in section:
html_body += f"<h1>{section['head above 2']}</h1><br>"
if "head above 1" in section:
html_body += f"<h2>{section['head above 1']}</h2><br>"
if "Subject" in section:
html_body += f"<h3>{section['Subject']}</h3><br>"
if "BodyText" in section:
html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
# html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"
# Wrap everything into one HTML document
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>{filename}</title>
<meta charset="utf-8">
</head>
<body>
{html_body}
</body>
</html>
"""
# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
return jsonify({"input_data": html_content,"Filename:":filename})
# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
except Exception as e:
print(f"Error in /findapitobebilledonly: {e}")
return jsonify({"error": str(e)}), 500
@app.route('/findapiAllDocNoNotbilled', methods=['GET','POST'])
def findapiAllDocNoNotbilled():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
if not pdfLink:
return jsonify({"error": "Missing 'filePath'"}), 400
pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled ,filename= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
return jsonify(alltextNoNotbilled)
except Exception as e:
print(f"Error in /findapiAllDocNoNotbilled: {e}")
return jsonify({"error": str(e)}), 500
# -------------------- Rawan - MC Connection --------------------
@app.route('/findapi', methods=['GET','POST'])
def findapi():
try:
data = request.get_json(force=True) or {}
pdfLink = data.get('filePath')
if not pdfLink:
return jsonify({"error": "Missing 'filePath'"}), 400
pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)
global jsonoutput
jsonoutput = tablepdfoutput
return jsonify(tablepdfoutput)
except Exception as e:
print(f"Error in /findapi: {e}")
return jsonify({"error": str(e)}), 500
#--------------------testpage-----------------------------
import socket
from datetime import datetime
@app.route('/testpage')
def test_page():
# Get some system info
hostname = socket.gethostname()
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return f"""
<!DOCTYPE html>
<html>
<head>
<title>Server Test Page</title>
<style>
body {{ font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }}
.success {{ color: #2ecc71; font-size: 24px; }}
.info {{ color: #34495e; margin-top: 10px; }}
.container {{ max-width: 600px; margin: 0 auto; text-align: left; }}
</style>
</head>
<body>
<div class="success">🚀 Flask Server is Running!</div>
<div class="container">
<p class="info"><strong>Hostname:</strong> {hostname}</p>
<p class="info"><strong>Server Time:</strong> {current_time}</p>
<p class="info"><strong>Endpoint:</strong> /testpage</p>
<p class="info"><strong>Status:</strong> <span style="color: #2ecc71;">Operational ✅</span></p>
</div>
</body>
</html>
"""
# -------------------- Run --------------------
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000, debug=True)