InitialMarkups

Runtime error

App Files Files Community

InitialMarkups / app.py

Marthee

Update app.py

d99ce5e verified 2 months ago

raw

history blame contribute delete

17.1 kB

	from flask import Flask, request, jsonify, render_template, send_file, redirect, url_for, Response
	import tsadropboxretrieval
	# import findInitialMarkups
	import InitialMarkups
	import requests
	import fitz
	from io import BytesIO
	import datetime
	import time
	from threading import Thread
	from urllib.parse import quote, unquote, parse_qs
	# import pdftotext
	import json
	# -------------------- App & Globals --------------------
	app = Flask(__name__)
	pageNumTextFound = 0
	BASE_URL = "https://adr.trevorsadd.co.uk/api/testpage" ##changed this only
	backend_ready = False
	jsonoutput = [] # ensure defined before use

	# -------------------- Simple Health/Test --------------------
	@app.route("/health", methods=["GET"])
	def health():
	return jsonify(status="ok", time=datetime.datetime.now().isoformat())

	# -------------------- Root: keep it simple & reliable --------------------
	@app.route("/", methods=["GET"])
	def root():
	# Avoid missing-template errors. Keep it simple so external access works.
	return jsonify(message="FIND APIs root. Use /health or /testpage."), 200

	# -------------------- Headers Filtering Find 1 Space --------------------
	@app.route('/api/process-data', methods=['POST'])
	def process_headers():
	try:
	data = request.get_json(force=True) or {}
	filePath = data.get('filePath')
	if not filePath:
	return jsonify({"error": "Missing 'filePath'"}), 400
	headers = findInitialMarkups.headersfrompdf(filePath)
	return jsonify(headers)
	except Exception as e:
	print(f"Error in /api/process-data: {e}")
	return jsonify({"error": str(e)}), 500

	# -------------------- PDF to Text 1 Space --------------------
	@app.route('/processalltext1', methods=['POST'])
	def processalltextTotext():
	try:
	data = request.get_json(force=True) or {}
	pdfpath = data.get('filePath')
	if not pdfpath:
	return jsonify({"error": "Missing 'filePath' in request data"}), 400
	pdftext,filename = pdftotext.texts_from_pdfAllText(pdfpath)
	return jsonify({"message": "Data received", "input_data": pdftext,"Filename:":filename})
	except Exception as e:
	print(f"Error in /processalltext1: {e}")
	return jsonify({"error": str(e)}), 500

	# -------------------- Keepalive --------------------
	@app.route("/keepaliveapii", methods=["GET", "POST"])
	def keepaliveapi():
	try:
	print('Keepalive pinged')
	return 'alivee'
	except Exception as error:
	print('Error in keepalive:', error)
	return jsonify(status="error", message=str(error)), 500

	# -------------------- View PDF (Marked up) --------------------
	def getpdfcontent(pdf_path):
	# Handle Dropbox URLs
	if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
	pdf_path = pdf_path.replace('dl=0', 'dl=1')

	# Get the PDF bytes
	response = requests.get(pdf_path)
	pdf_bytes = response.content

	if not pdf_bytes or not pdf_bytes.startswith(b"%PDF"):
	raise ValueError("No valid PDF content found.")

	# Return a BytesIO stream
	return BytesIO(pdf_bytes)


	@app.route('/view-pdf', methods=['GET'])
	def view_pdf():
	encoded_pdf_link = request.args.get('pdfLink')
	if not encoded_pdf_link:
	return "Missing pdfLink parameter.", 400

	pdf_link = unquote(encoded_pdf_link)
	print("Extracted PDF Link:", pdf_link)

	try:
	pdf_content = getpdfcontent(pdf_link)
	except Exception as e:
	print("Error during PDF extraction:", e)
	return "PDF could not be processed.", 500

	if pdf_content is None:
	return "PDF content not found or broken.", 404

	# ✅ Do NOT wrap again in BytesIO
	return send_file(
	pdf_content,
	mimetype='application/pdf',
	as_attachment=False,
	download_name="annotated_page.pdf"
	)

	# -------------------- Process PDF -> Upload to Dropbox (renamed to avoid duplicate route) --------------------
	@app.route('/api/process-pdf', methods=['POST'])
	def process_pdf_and_upload():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	if not pdfLink:
	return jsonify({"error": "'filePath' must be provided."}), 400

	print("Processing PDF:", pdfLink)
	pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)

	dbxTeam = tsadropboxretrieval.ADR_Access_DropboxTeam('user')
	metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)

	dbPath = '/TSA JOBS/ADR Test/FIND/'
	pdflink = tsadropboxretrieval.uploadanyFile(doc=pdf_document, path=dbPath, pdfname=metadata.name)
	tablepdfLink = tsadropboxretrieval.uploadanyFile(
	doc=tablepdfoutput,
	path=dbPath,
	pdfname=metadata.name.rsplit(".pdf", 1)[0] + ' Markup Summary.pdf'
	)
	print('Uploaded:', pdflink, tablepdfLink)

	return jsonify({
	"message": "PDF processed successfully.",
	"PDF_MarkedUp": pdflink,
	"Table_PDF_Markup_Summary": tablepdfLink
	})
	except Exception as e:
	print(f"Error in /api/process-pdf: {e}")
	return jsonify({"error": str(e)}), 500

	# -------------------- Not billed / Markup subsets --------------------
	@app.route('/findapitobebilled1', methods=['GET','POST'])
	def findapitobebilled1():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	if not pdfLink:
	return jsonify({"error": "Missing 'filePath'"}), 400
	pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)

	return jsonify(alltext_tobebilled)
	except Exception as e:
	print(f"Error in /findapitobebilled1: {e}")
	return jsonify({"error": str(e)}), 500


	# ----------------------------------------------------------------------
	@app.route('/findapitobebilled_htmlformat', methods=['GET','POST'])
	def findapitobebilled_htmlformat():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	if not pdfLink:
	return jsonify({"error": "Missing 'filePath'"}), 400
	pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled , filename = InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
	# Parse JSON string → list of dicts
	data = json.loads(tablepdfoutput)

	# Collect all body parts
	html_body = ""

	for section in data:
	if "head above 2" in section:
	html_body += f"<h1>{section['head above 2']}</h1><br>"

	if "head above 1" in section:
	html_body += f"<h2>{section['head above 1']}</h2><br>"

	if "Subject" in section:
	html_body += f"<h3>{section['Subject']}</h3><br>"
	if "BodyText" in section:
	html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
	# html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

	# Wrap everything into one HTML document
	html_content = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>{filename}</title>
	<meta charset="utf-8">
	</head>
	<body>
	{html_body}
	</body>
	</html>
	"""
	# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
	return jsonify({"input_data": html_content,"Filename:":filename})
	except Exception as e:
	print(f"Error in /findapitobebilled_htmlformat: {e}")
	return jsonify({"error": str(e)}), 500


	@app.route('/view-pdf-tobebilled', methods=['GET'])
	def view_pdf_tobebilled():
	encoded_pdf_link = request.args.get('pdfLink')
	if not encoded_pdf_link:
	return "Missing pdfLink parameter.", 400
	pdf_link = unquote(encoded_pdf_link)
	print("Extracted PDF Link:", pdf_link)
	try:
	pdf_content = InitialMarkups.extract_section_under_header_tobebilledOnly(pdf_link)[0]
	except Exception as e:
	print("Error during PDF extraction:", e)
	return "PDF could not be processed.", 500
	if pdf_content is None or not pdf_content.startswith(b"%PDF"):
	return "PDF content not found or broken.", 404
	return send_file(
	BytesIO(pdf_content),
	mimetype='application/pdf',
	as_attachment=False,
	download_name=f"annotated_page_{pageNumTextFound}.pdf"
	)

	# -------------------- Final markups: view one highlight --------------------
	@app.route('/view-highlight', methods=['GET','POST'])
	def download_pdfHighlight():
	pdf_link = request.args.get('pdfLink')
	keyword = request.args.get('keyword')
	if not pdf_link or not keyword:
	return "Missing required parameters.", 400

	pdf_link = unquote(pdf_link)
	print("Extracted PDF Link:", pdf_link)
	print("Extracted Keyword:", keyword)

	global jsonoutput
	matching_item = next((item for item in jsonoutput if item.get("Subject") == keyword), None)

	if matching_item:
	page_number = int(matching_item.get("Page")) - 1
	stringtowrite = matching_item.get("head above 1")
	print(f"Page number for '{keyword}': {page_number}")
	else:
	page_number = 0
	stringtowrite = None
	print("No match found in jsonoutput; defaulting to page 0.")

	pdf_content = InitialMarkups.extract_section_under_headerRawan(pdf_link, keyword, page_number, stringtowrite)[0]
	if pdf_content is None:
	return "PDF content not found.", 404

	return send_file(
	BytesIO(pdf_content),
	mimetype='application/pdf',
	as_attachment=False,
	download_name=f"annotated_page_{pageNumTextFound}.pdf"
	)

	@app.route('/findapiFilteredHeadings', methods=['GET','POST'])
	def findapiFilteredHeadings():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	listofheadings = data.get('listofheadings') # json array
	if not pdfLink or listofheadings is None:
	return jsonify({"error": "Missing 'filePath' or 'listofheadings'"}), 400

	pdfbytes, pdf_document, tablepdfoutput, alltext = InitialMarkups.extract_section_under_headerRawan(pdfLink, listofheadings)
	global jsonoutput
	jsonoutput = tablepdfoutput
	return jsonify(alltext)
	except Exception as e:
	print(f"Error in /findapiFilteredHeadings: {e}")
	return jsonify({"error": str(e)}), 500

	@app.route('/findapitobebilledonlyNew', methods=['GET','POST'])
	def findapitobebilledonly():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	if not pdfLink:
	return jsonify({"error": "Missing 'filePath'"}), 400
	pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilled2(pdfLink)
	# return jsonify(tablepdfoutput)
	# Parse JSON string → list of dicts
	data = json.loads(tablepdfoutput)

	# Collect all body parts
	html_body = ""

	for section in data:
	if "head above 2" in section:
	html_body += f"<h1>{section['head above 2']}</h1><br>"

	if "head above 1" in section:
	html_body += f"<h2>{section['head above 1']}</h2><br>"

	if "Subject" in section:
	html_body += f"<h3>{section['Subject']}</h3><br>"
	if "BodyText" in section:
	html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
	# html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

	# Wrap everything into one HTML document
	html_content = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>{filename}</title>
	<meta charset="utf-8">
	</head>
	<body>
	{html_body}
	</body>
	</html>
	"""
	# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
	return jsonify({"input_data": html_content,"Filename:":filename})
	# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
	except Exception as e:
	print(f"Error in /findapitobebilledonly: {e}")
	return jsonify({"error": str(e)}), 500



	@app.route('/findapitobebilledonlyNewMultiplePDFS', methods=['GET','POST'])
	def findapitobebilledonlymarthe():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	if not pdfLink:
	return jsonify({"error": "Missing 'filePath'"}), 400
	pdfbytes, pdf_document, tablepdfoutput, alltext , filename= InitialMarkups.extract_section_under_header_tobebilledMultiplePDFS(pdfLink)
	# return jsonify(tablepdfoutput)
	# Parse JSON string → list of dicts
	if isinstance(tablepdfoutput, str):
	data = json.loads(tablepdfoutput)
	else:
	data = tablepdfoutput
	# Collect all body parts
	html_body = ""

	for section in data:
	if "head above 2" in section:
	html_body += f"<h1>{section['head above 2']}</h1><br>"

	if "head above 1" in section:
	html_body += f"<h2>{section['head above 1']}</h2><br>"

	if "Subject" in section:
	html_body += f"<h3>{section['Subject']}</h3><br>"
	if "BodyText" in section:
	html_body += f"<p>{' '.join(section['BodyText'])}</p><br>"
	# html_body += f"<div>{' '.join(section['bodytext'])}</div><br>"

	# Wrap everything into one HTML document
	html_content = f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>{filename}</title>
	<meta charset="utf-8">
	</head>
	<body>
	{html_body}
	</body>
	</html>
	"""
	# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
	return jsonify({"input_data": html_content,"Filename:":filename})
	# return Response(html_content, mimetype="text/html", headers={"Filename": filename})
	except Exception as e:
	print(f"Error in /findapitobebilledonly: {e}")
	return jsonify({"error": str(e)}), 500


	@app.route('/findapiAllDocNoNotbilled', methods=['GET','POST'])
	def findapiAllDocNoNotbilled():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	if not pdfLink:
	return jsonify({"error": "Missing 'filePath'"}), 400
	pdfbytes, pdf_document, tablepdfoutput, alltext_tobebilled, alltextNoNotbilled ,filename= InitialMarkups.extract_section_under_header_tobebilledOnly(pdfLink)
	return jsonify(alltextNoNotbilled)
	except Exception as e:
	print(f"Error in /findapiAllDocNoNotbilled: {e}")
	return jsonify({"error": str(e)}), 500

	# -------------------- Rawan - MC Connection --------------------
	@app.route('/findapi', methods=['GET','POST'])
	def findapi():
	try:
	data = request.get_json(force=True) or {}
	pdfLink = data.get('filePath')
	if not pdfLink:
	return jsonify({"error": "Missing 'filePath'"}), 400

	pdfbytes, pdf_document, tablepdfoutput = InitialMarkups.extract_section_under_header(pdfLink)
	global jsonoutput
	jsonoutput = tablepdfoutput
	return jsonify(tablepdfoutput)
	except Exception as e:
	print(f"Error in /findapi: {e}")
	return jsonify({"error": str(e)}), 500

	#--------------------testpage-----------------------------
	import socket
	from datetime import datetime

	@app.route('/testpage')
	def test_page():
	# Get some system info
	hostname = socket.gethostname()
	current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	return f"""
	<!DOCTYPE html>
	<html>
	<head>
	<title>Server Test Page</title>
	<style>
	body {{ font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }}
	.success {{ color: #2ecc71; font-size: 24px; }}
	.info {{ color: #34495e; margin-top: 10px; }}
	.container {{ max-width: 600px; margin: 0 auto; text-align: left; }}
	</style>
	</head>
	<body>
	<div class="success">🚀 Flask Server is Running!</div>
	<div class="container">
	<p class="info"><strong>Hostname:</strong> {hostname}</p>
	<p class="info"><strong>Server Time:</strong> {current_time}</p>
	<p class="info"><strong>Endpoint:</strong> /testpage</p>
	<p class="info"><strong>Status:</strong> <span style="color: #2ecc71;">Operational ✅</span></p>
	</div>
	</body>
	</html>
	"""

	# -------------------- Run --------------------
	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=5000, debug=True)