Marthee commited on
Commit
7b6c54a
·
verified ·
1 Parent(s): 255e585

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -26
app.py CHANGED
@@ -1,19 +1,18 @@
1
  from __future__ import print_function
2
  from flask import Flask, render_template,request,flash , json, url_for,g , redirect , jsonify , send_file ,make_response
3
- import os
4
  import json
5
  import fitz
6
  from PIL import Image
7
  import cv2
8
  import numpy as np
9
  import pilecaps_adr
10
- import base64
11
  from db import dropbox_connect
12
- import db
13
  import cv2
14
  import pandas as pd
15
  import time
16
  from io import BytesIO, StringIO
 
17
  import tempfile
18
  from flask import Flask, Response
19
  from werkzeug.wsgi import wrap_file
@@ -21,7 +20,6 @@ import tameem3_2
21
  import pypdfium2 as pdfium
22
  import pixelconversion
23
  import tameem2_1
24
- import base64
25
  import io
26
  from urllib.parse import unquote
27
  import API
@@ -30,14 +28,16 @@ import tsadropboxretrieval
30
  import doc_search
31
  import google_sheet_Legend
32
  import dxf__omar3_2
 
33
  import google_sheet_to_xml
34
  from threading import Thread
35
  import mainDBAlaa
36
  import datetime
37
  import doors_fasterrcnn
38
  import deploying_3_3
 
39
  import Counting_Columns_2_1
40
-
41
  import ezdxf
42
  app = Flask(__name__)
43
 
@@ -53,13 +53,11 @@ global pdflink
53
  def getInfotoMeasure():
54
  API.AppendtablestoSheets()
55
  return render_template("proposed-GUI.html")
56
-
57
  @app.route("/WordSearch",methods=["GET", "POST"])
58
  def getInfo2toMeasure():
59
  API.AppendtablestoSheets()
60
  return render_template("wordSearch.html")
61
 
62
-
63
  @app.route('/getprojectnames/',methods=['GET'])
64
  def getprjnamesfromTestAPI():
65
  prjnames,prjids=API.getPrjNames()
@@ -85,9 +83,9 @@ def get_javascript_data(jsdata):
85
  #get project from the first dropdown
86
  # jsdata=eval(jsdata)
87
  print('tsa')
88
- documnetsToMeasure,RelevantDocuments=tsadropboxretrieval.retrieveProjects(jsdata)
89
  if RelevantDocuments:
90
- return jsonify ([documnetsToMeasure, RelevantDocuments])
91
  else:
92
  return ['No projects found']
93
  #---------------------------------------------------------------
@@ -175,7 +173,7 @@ def getfromdropboxImg(jsdata):
175
  def stringToRGB():
176
  vv = eval(request.form.get('answers'))
177
  print(vv)
178
- if ( vv[5][2].startswith('3.2') or vv[5][2].startswith('3.3') ) :
179
  print('3.2 section')
180
  pdfpath,pdflink=tsadropboxretrieval.getPathtoPDF_File(nameofPDF=vv[0])
181
  dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
@@ -189,6 +187,8 @@ def stringToRGB():
189
  arr=measureproject(vv,dataDoc,0,file)
190
  return jsonify(arr)
191
 
 
 
192
  if vv[5][2].startswith('1.0'):
193
  opencv_img,dataDoc = plan2img( str(vv[0]) )
194
  if vv[1]==220:
@@ -273,6 +273,29 @@ def measureproject(result,dataDoc=0,img=0,dxffile=0):
273
  #
274
  pdflink= tsadropboxretrieval.uploadanyFile(doc=doc,path=dbPath,pdfname=result[0]) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/'
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  _, buffer = cv2.imencode('.png', outputimg)
277
  arr=[ base64.b64encode(buffer).decode('utf-8'),SimilarAreaDictionary.to_dict(), spreadsheet_url,spreadsheetId,[],list1.to_dict(),pdflink,hatched_areas,namepathArr]#,hatched_areas,namepathArr]# , spreadsheetId, spreadsheet_url , list1.to_dict()]
278
  ################################# -2.8- #################################
@@ -283,7 +306,7 @@ def measureproject(result,dataDoc=0,img=0,dxffile=0):
283
  legendLinks=[]
284
  listofmarkups=[]
285
 
286
- annotatedimg,pdf_document,spreadsheet_url, list1 , df_doors=doors_fasterrcnn.main_run(img,dataDoc,'separated_classes.pth','separated_classes_all.pth',result[0],pdfpath, result[4]) #single_double.pth
287
 
288
  dbPath='/TSA JOBS/ADR Test'+pdfpath+'Measured Plan/'
289
  pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=result[0]) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
@@ -297,12 +320,12 @@ def measureproject(result,dataDoc=0,img=0,dxffile=0):
297
  dpxlinks=[]
298
  legendLinks=[]
299
  listofmarkups=[]
300
- print('wasalt::',dataDoc)
301
  annotatedimg,pdf_document,spreadsheet_url, list1 , df_doors=Counting_Columns_2_1.mainfun(dataDoc,pdfpath,result[0])
302
 
303
- dbPath='/TSA JOBS/ADR Test'+pdfpath+'Measured Plan/'
304
- pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=result[0]) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
305
- _, buffer = cv2.imencode('.png', annotatedimg)
306
  arr=[base64.b64encode(buffer).decode('utf-8') ,pdflink,spreadsheet_url,list1.to_dict(), df_doors.to_dict()]# , spreadsheetId, spreadsheet_url , list1.to_dict()]
307
 
308
  ################################# -2.2- #################################
@@ -566,13 +589,121 @@ def dltmarkupslegend():
566
  #get pdf dropbox url after measurement is done
567
  @app.route('/getdropboxurl/<jsdata>',methods=["GET", "POST"])
568
  def calldropboxurl(jsdata):
569
- print(jsdata)
570
  pdfurl=tsadropboxretrieval.getPathtoPDF_File(nameofPDF=jsdata)[1]
571
- print(pdfurl)
572
- return jsonify(pdfurl)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
 
574
  #_________________________________________________________________________________________________________________________
575
  #_________________________________________________________________________________________________________________________
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
576
  #Google sheet links
577
  @app.route('/getdrivelinks/<jsdata>',methods=["GET", "POST"])
578
  def getlinkscreated(jsdata):
@@ -703,7 +834,7 @@ def checktables():
703
  @app.route('/refreshDropbox',methods=["GET", "POST"])
704
  def checkdropbox():
705
  print('checkingggdf')
706
- dfFromDropbox=tsadropboxretrieval.DropboxItemstoDF( "/TSA JOBS")[0]
707
  dfParquet=tsadropboxretrieval.GetParquetDF()
708
 
709
  dfParquet1 = dfParquet[['name', 'path_display', 'client_modified', 'server_modified']]
@@ -718,13 +849,45 @@ def checkdropbox():
718
  stringReturned= 'Updated Sucessfully.'
719
  else:
720
  stringReturned= 'Nothing to update.'
721
- return stringReturned
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
 
 
723
 
 
 
724
 
725
- @app.route('/postdropboxprojects',methods=["GET", "POST"])
726
- def postDropboxprojectsinConsole():
727
- stringReturned=checkdropbox()
 
 
 
 
 
 
 
 
 
 
 
 
 
728
  print(stringReturned)
729
  return stringReturned
730
 
@@ -750,9 +913,6 @@ def cvtSummarytoXML(jsdata):
750
  # print(result[1])
751
  xmllink=google_sheet_to_xml.create_xml(documentname=result[1],dbPath=path)
752
  return jsonify(xmllink)
753
-
754
-
755
-
756
  #_________________________________________________________________________________________________________________________
757
  #_________________________________________________________________________________________________________________________
758
 
 
1
  from __future__ import print_function
2
  from flask import Flask, render_template,request,flash , json, url_for,g , redirect , jsonify , send_file ,make_response
 
3
  import json
4
  import fitz
5
  from PIL import Image
6
  import cv2
7
  import numpy as np
8
  import pilecaps_adr
9
+ import base64
10
  from db import dropbox_connect
 
11
  import cv2
12
  import pandas as pd
13
  import time
14
  from io import BytesIO, StringIO
15
+ import urllib
16
  import tempfile
17
  from flask import Flask, Response
18
  from werkzeug.wsgi import wrap_file
 
20
  import pypdfium2 as pdfium
21
  import pixelconversion
22
  import tameem2_1
 
23
  import io
24
  from urllib.parse import unquote
25
  import API
 
28
  import doc_search
29
  import google_sheet_Legend
30
  import dxf__omar3_2
31
+ import requests
32
  import google_sheet_to_xml
33
  from threading import Thread
34
  import mainDBAlaa
35
  import datetime
36
  import doors_fasterrcnn
37
  import deploying_3_3
38
+ import Code_2_7
39
  import Counting_Columns_2_1
40
+ import Find_Hyperlinking_text
41
  import ezdxf
42
  app = Flask(__name__)
43
 
 
53
  def getInfotoMeasure():
54
  API.AppendtablestoSheets()
55
  return render_template("proposed-GUI.html")
 
56
  @app.route("/WordSearch",methods=["GET", "POST"])
57
  def getInfo2toMeasure():
58
  API.AppendtablestoSheets()
59
  return render_template("wordSearch.html")
60
 
 
61
  @app.route('/getprojectnames/',methods=['GET'])
62
  def getprjnamesfromTestAPI():
63
  prjnames,prjids=API.getPrjNames()
 
83
  #get project from the first dropdown
84
  # jsdata=eval(jsdata)
85
  print('tsa')
86
+ documnetsToMeasure,RelevantDocuments,extracted_path=tsadropboxretrieval.retrieveProjects(jsdata)
87
  if RelevantDocuments:
88
+ return jsonify ([documnetsToMeasure, RelevantDocuments,extracted_path])
89
  else:
90
  return ['No projects found']
91
  #---------------------------------------------------------------
 
173
  def stringToRGB():
174
  vv = eval(request.form.get('answers'))
175
  print(vv)
176
+ if ( vv[5][2].startswith('3.2') or vv[5][2].startswith('3.3') or vv[5][2].startswith('2.7')) :
177
  print('3.2 section')
178
  pdfpath,pdflink=tsadropboxretrieval.getPathtoPDF_File(nameofPDF=vv[0])
179
  dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
 
187
  arr=measureproject(vv,dataDoc,0,file)
188
  return jsonify(arr)
189
 
190
+
191
+
192
  if vv[5][2].startswith('1.0'):
193
  opencv_img,dataDoc = plan2img( str(vv[0]) )
194
  if vv[1]==220:
 
273
  #
274
  pdflink= tsadropboxretrieval.uploadanyFile(doc=doc,path=dbPath,pdfname=result[0]) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/'
275
 
276
+ _, buffer = cv2.imencode('.png', outputimg)
277
+ arr=[ base64.b64encode(buffer).decode('utf-8'),SimilarAreaDictionary.to_dict(), spreadsheet_url,spreadsheetId,[],list1.to_dict(),pdflink,hatched_areas,namepathArr]#,hatched_areas,namepathArr]# , spreadsheetId, spreadsheet_url , list1.to_dict()]
278
+ ################################# 2.7- #################################
279
+ elif result[5][2].startswith('2.7') : #section value - 2.7 floor finishes
280
+ print('code of 2.7')
281
+ dxfpath=dxffile.read()
282
+ with tempfile.NamedTemporaryFile(suffix='.dxf', delete=False) as temp_file:
283
+ temp_file.write(dxfpath)
284
+ temp_filename = temp_file.name
285
+ print(temp_filename)
286
+
287
+ SearchArray=result[6]
288
+ print(result)
289
+ print("SearchArray = ",SearchArray)
290
+
291
+ doc,outputimg, SimilarAreaDictionary ,spreadsheetId, spreadsheet_url , namepathArr , list1,hatched_areas=Code_2_7.mainFunctionDrawImgPdf(dataDoc,temp_filename,result[4],SearchArray, pdfpath,result[0])
292
+ # global colorsused
293
+ colorsused=list(SimilarAreaDictionary['Color'])
294
+ dbPath='/TSA JOBS/ADR Test'+pdfpath+'Measured Plan/'
295
+ print(dbPath,result[0])
296
+ #
297
+ pdflink= tsadropboxretrieval.uploadanyFile(doc=doc,path=dbPath,pdfname=result[0]) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/'
298
+
299
  _, buffer = cv2.imencode('.png', outputimg)
300
  arr=[ base64.b64encode(buffer).decode('utf-8'),SimilarAreaDictionary.to_dict(), spreadsheet_url,spreadsheetId,[],list1.to_dict(),pdflink,hatched_areas,namepathArr]#,hatched_areas,namepathArr]# , spreadsheetId, spreadsheet_url , list1.to_dict()]
301
  ################################# -2.8- #################################
 
306
  legendLinks=[]
307
  listofmarkups=[]
308
 
309
+ annotatedimg,pdf_document,spreadsheet_url, list1 , df_doors=doors_fasterrcnn.main_run(img,dataDoc,'separated_classes_all.pth',result[0],pdfpath, result[4]) #single_double.pth
310
 
311
  dbPath='/TSA JOBS/ADR Test'+pdfpath+'Measured Plan/'
312
  pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=result[0]) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
 
320
  dpxlinks=[]
321
  legendLinks=[]
322
  listofmarkups=[]
323
+
324
  annotatedimg,pdf_document,spreadsheet_url, list1 , df_doors=Counting_Columns_2_1.mainfun(dataDoc,pdfpath,result[0])
325
 
326
+ # dbPath='/TSA JOBS/ADR Test'+pdfpath+'Measured Plan/'
327
+ # pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=result[0]) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
328
+ # _, buffer = cv2.imencode('.png', annotatedimg)
329
  arr=[base64.b64encode(buffer).decode('utf-8') ,pdflink,spreadsheet_url,list1.to_dict(), df_doors.to_dict()]# , spreadsheetId, spreadsheet_url , list1.to_dict()]
330
 
331
  ################################# -2.2- #################################
 
589
  #get pdf dropbox url after measurement is done
590
  @app.route('/getdropboxurl/<jsdata>',methods=["GET", "POST"])
591
  def calldropboxurl(jsdata):
592
+ print('jsdata',jsdata)
593
  pdfurl=tsadropboxretrieval.getPathtoPDF_File(nameofPDF=jsdata)[1]
594
+ print('urll',pdfurl)
595
+ if pdfurl and ('http' in pdfurl or 'dropbox' in pdfurl):
596
+ if 'dl=0' in pdfurl:
597
+ pdfurl = pdfurl.replace('dl=0', 'dl=1')
598
+ print('urll1',pdfurl)
599
+ # Download the PDF content from the shareable link
600
+ response = requests.get(pdfurl)
601
+ pdf_content = BytesIO(response.content) # Store the content in memory
602
+ if pdf_content is None:
603
+ raise ValueError("No valid PDF content found.")
604
+
605
+ # Open the PDF using PyMuPDF
606
+ pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
607
+ pdf_bytes = BytesIO()
608
+ pdf_document.save(pdf_bytes)
609
+ return Response(pdf_bytes.getvalue(), content_type='application/pdf')
610
 
611
  #_________________________________________________________________________________________________________________________
612
  #_________________________________________________________________________________________________________________________
613
+
614
+ pdf_content = None
615
+ pageNumTextFound = 0
616
+ BASE_URL = "https://marthee-nbslink.hf.space" # Hugging Face Spaces base URL
617
+
618
+ @app.route("/", methods=["GET", "POST"])
619
+ def thismain():
620
+ print('ayhaga')
621
+ return render_template("gui.html")
622
+
623
+ @app.route('/view-pdf', methods=['GET'])
624
+ def download_pdf():
625
+
626
+ # Manually parse the query parameters
627
+ full_query_string = request.query_string.decode() # Get raw query string
628
+ parsed_params = urllib.parse.parse_qs(full_query_string) # Parse it
629
+
630
+ # Extract pdfLink and keyword manually
631
+ pdf_link = parsed_params.get('pdfLink', [None])[0]
632
+ keyword = parsed_params.get('keyword', [None])[0]
633
+
634
+ if not pdf_link or not keyword:
635
+ return "Missing required parameters.", 400
636
+
637
+ # Decode the extracted values
638
+ pdf_link = urllib.parse.unquote(pdf_link)
639
+ keyword = urllib.parse.unquote(keyword)
640
+
641
+ # If the keyword is a JSON string, convert it back to a list
642
+ try:
643
+ keyword = json.loads(keyword)
644
+ except json.JSONDecodeError:
645
+ keyword = [keyword] # Treat it as a single keyword if not JSON
646
+
647
+ print("Extracted PDF Link:", pdf_link)
648
+ print("Extracted Keywords:", keyword)
649
+ createDF=False
650
+ pdf_content = Find_Hyperlinking_text.annotate_text_from_pdf([pdf_link], keyword)[0]
651
+ if pdf_content is None:
652
+ return "PDF content not found.", 404
653
+
654
+ pdf_bytes = BytesIO(pdf_content)
655
+ return send_file(
656
+ pdf_bytes,
657
+ mimetype='application/pdf',
658
+ as_attachment=False,
659
+ download_name=f"annotated_page_{pageNumTextFound}.pdf"
660
+ )
661
+
662
+ @app.route('/api/process-data', methods=['POST'])
663
+ def receive_pdf_data():
664
+ global pdf_content, pageNumTextFound
665
+
666
+ # Get PDF link and keyword from finddata()
667
+ pdfLink, keyword = finddata()
668
+
669
+ if not pdfLink or not keyword:
670
+ return jsonify({"error": "Both 'pdfLink' and 'keyword' must be provided."}), 400
671
+
672
+ try:
673
+ print(pdfLink, keyword)
674
+
675
+
676
+ pdfbytes, pdf_document , df ,tablepdfoutput= Find_Hyperlinking_text.annotate_text_from_pdf([pdfLink], keyword)
677
+ dbxTeam= tsadropboxretrieval.ADR_Access_DropboxTeam('user')
678
+
679
+ # Get metadata using the shared link
680
+ metadata = dbxTeam.sharing_get_shared_link_metadata(pdfLink)
681
+ dbPath='/TSA JOBS/ADR Test/FIND/'
682
+ pdflink= tsadropboxretrieval.uploadanyFile(doc=pdf_document,path=dbPath,pdfname=metadata.name) #doc=doc,pdfname=path,pdfpath=pdfpath+'Measured Plan/
683
+ print('LINKS0',pdflink)
684
+
685
+ dbPath='/TSA JOBS/ADR Test/FIND/'
686
+ tablepdfLink=tsadropboxretrieval.uploadanyFile(doc=tablepdfoutput,path=dbPath,pdfname=metadata.name.rsplit(".pdf", 1)[0] +' Markup Summary'+'.pdf')
687
+ print(f"PDF successfully uploaded to Dropbox at")
688
+ print('LINKS1',tablepdfLink)
689
+ return jsonify({
690
+ "message": "PDF processed successfully.",
691
+ "PDF_MarkedUp": pdflink,
692
+ 'Table_PDF_Markup_Summary': tablepdfLink
693
+ })
694
+
695
+ except Exception as e:
696
+ return jsonify({"error": str(e)}), 500
697
+
698
+ def finddata():
699
+ pdfLink = 'https://www.dropbox.com/scl/fi/hnp4mqigb51a5kp89kgfa/00801-ARC-20-ZZ-S-A-0002.pdf?rlkey=45abeoebzqw4qwnslnei6dkd6&st=m4yrcjm2&dl=1'
700
+ keyword = ['115 INTEGRATED MRI ROOM LININGS', '310 ACCURACY']
701
+ return pdfLink, keyword
702
+
703
+
704
+ #_________________________________________________________________________________________________________________________
705
+ #_________________________________________________________________________________________________________________________
706
+
707
  #Google sheet links
708
  @app.route('/getdrivelinks/<jsdata>',methods=["GET", "POST"])
709
  def getlinkscreated(jsdata):
 
834
  @app.route('/refreshDropbox',methods=["GET", "POST"])
835
  def checkdropbox():
836
  print('checkingggdf')
837
+ dfFromDropbox=tsadropboxretrieval.DropboxItemstoDF("/TSA JOBS")[0]
838
  dfParquet=tsadropboxretrieval.GetParquetDF()
839
 
840
  dfParquet1 = dfParquet[['name', 'path_display', 'client_modified', 'server_modified']]
 
849
  stringReturned= 'Updated Sucessfully.'
850
  else:
851
  stringReturned= 'Nothing to update.'
852
+ return 'stringReturned'
853
+
854
+ def refreshDropboxRetrievals(extractedPath):
855
+ dfFromDropbox = tsadropboxretrieval.DropboxItemstoDF(extractedPath)[0]
856
+ dfParquet = tsadropboxretrieval.GetParquetDF()
857
+
858
+ # print("Original Parquet size:", len(dfParquet))
859
+
860
+ # Keep only relevant columns
861
+ dfParquet = dfParquet[['name', 'path_display', 'client_modified', 'server_modified']]
862
+ dfFromDropbox = dfFromDropbox[['name', 'path_display', 'client_modified', 'server_modified']]
863
+
864
+ # **Drop rows that start with extractedPath**
865
+ dfParquetUpdated = dfParquet[~dfParquet['path_display'].str.startswith(extractedPath)]
866
+
867
+ # **Append new data from Dropbox**
868
+ dfParquetUpdated = pd.concat([dfParquetUpdated, dfFromDropbox], ignore_index=True)
869
 
870
+ # print("Updated Parquet size:", len(dfParquetUpdated),dfParquetUpdated)
871
 
872
+ # **Save the updated Parquet file**
873
+ tsadropboxretrieval.dropbox_upload_file(dfParquetUpdated)
874
 
875
+ if len(dfFromDropbox) > 0:
876
+ print("Updated entries:", dfFromDropbox)
877
+ return 'Updated Successfully.'
878
+ else:
879
+ return 'Nothing to update.'
880
+
881
+
882
+
883
+
884
+
885
+ @app.route('/postdropboxprojects/<path:encoded_path>')
886
+ def handle_path(encoded_path):
887
+ decoded_path = urllib.parse.unquote(encoded_path) # Decode URL encoding
888
+ extracted_path = json.loads(decoded_path) # Convert back to Python object
889
+ print('path to refresh',extracted_path)
890
+ stringReturned=refreshDropboxRetrievals(extracted_path)
891
  print(stringReturned)
892
  return stringReturned
893
 
 
913
  # print(result[1])
914
  xmllink=google_sheet_to_xml.create_xml(documentname=result[1],dbPath=path)
915
  return jsonify(xmllink)
 
 
 
916
  #_________________________________________________________________________________________________________________________
917
  #_________________________________________________________________________________________________________________________
918