| | |
| | """TSADropboxRetrieval.ipynb |
| | Automatically generated by Colaboratory. |
| | Original file is located at |
| | https://colab.research.google.com/drive/1d-UI3Y-z7Dj-vqu69CxluOUnN4rvsUuE |
| | """ |
| |
|
| | |
| |
|
| | |
| | import os |
| | import base64 |
| | import requests |
| | import json |
| | import pathlib |
| | import pandas as pd |
| | import dropbox |
| | from dropbox.exceptions import AuthError |
| | import fitz |
| | import io |
| | import re |
| | import pyarrow |
| | from io import BytesIO |
| | from functools import lru_cache |
| |
|
| |
|
| |
|
| | @lru_cache(maxsize=1) |
| | def load_parquet_df(): |
| | return GetParquetDF() |
| |
|
| |
|
| | files_list=[] |
| |
|
| | app_key='9bljerefjumct38' |
| | app_secret='nl6k66clw1j1k12' |
| | access_code='sl.Bou05Rb15xPy851-I1UV8oOabHPY21AEPl5nrYl-Q0ninFSy0kTuRWPSve_JPbd3Z03E7eBY4r9R454rdzDM0AxLkyqrQEDzyAGUwP7kZ7s2CR6EwvdLD2a7Xh8nFEs38voLTH2IHzrQ2QEx7rji4OJ8aSQStKtJkI7_dh8tYHj5' |
| | refresh_token='qK2VqvbxWMMAAAAAAAAAAXFQvrHM4xUwWUcZ6l5vGOygn1iAA6zlDjmAQNBbZprL' |
| | basic_auth=base64.b64encode(f'{app_key}:{app_secret}'.encode()) |
| |
|
| | def ADR_Access_DropboxTeam(flag): |
| | if flag=='user': |
| | dbxTeam = dropbox.DropboxTeam(app_key=app_key, |
| | app_secret=app_secret, |
| | oauth2_refresh_token = refresh_token).as_user('dbmid:AACjJg2GKc3tI42iOnD01dd6s0XDyyx6Thw') |
| | elif flag=='admin': |
| | dbxTeam = dropbox.DropboxTeam(app_key=app_key, |
| | app_secret=app_secret, |
| | oauth2_refresh_token = refresh_token).as_admin('dbmid:AACjJg2GKc3tI42iOnD01dd6s0XDyyx6Thw') |
| | root_namespace_id = dbxTeam.users_get_current_account().root_info.root_namespace_id |
| | dbxTeam = dbxTeam.with_path_root(dropbox.common.PathRoot.root(root_namespace_id)) |
| | return dbxTeam |
| |
|
| | def getSharedLink(path): |
| | dbxTeam=ADR_Access_DropboxTeam('user') |
| | try: |
| | shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings(path) |
| | except: |
| | shared_link_metadata=dbxTeam.sharing_create_shared_link(path) |
| | return shared_link_metadata.url |
| |
|
| | def handle_entries(entries , files_list): |
| |
|
| | for file in entries: |
| | if isinstance(file, dropbox.files.FileMetadata): |
| | |
| | metadata = { |
| | 'name': file.name, |
| | 'path_display': file.path_display, |
| | 'client_modified': file.client_modified, |
| | 'server_modified': file.server_modified |
| | } |
| |
|
| | files_list.append(metadata) |
| | df = pd.DataFrame.from_records(files_list) |
| | return df |
| |
|
| | def dropbox_connect(): |
| | """Create a connection to myyyyyyy Dropbox.""" |
| | print('connecy') |
| | try: |
| | |
| | dbxMe = dropbox.Dropbox( |
| | app_key='67w6ibpa9d2b60x', |
| | app_secret='d3ecz8g1604fu04', |
| | oauth2_refresh_token = 'R_LACBBNhysAAAAAAAAAAXt9mMy9OYIV_v4pF45lG6Z8DHNV66rq1q7acWjj_H5g', |
| |
|
| | ) |
| | |
| |
|
| | except AuthError as e: |
| | print('Error connecting to Dropbox with access token: ' + str(e)) |
| | return dbxMe |
| |
|
| |
|
| | def dropbox_upload_file(df, flag=0): |
| | try: |
| | dbxTeam= ADR_Access_DropboxTeam('admin') |
| | path='/TSA JOBS/ADR Test/DropboxDirectory/df.parquet.gzip' |
| | doc=df.to_parquet() |
| | dbxTeam.files_delete(path) |
| | meta=dbxTeam.files_upload(doc,path) |
| |
|
| | except Exception as e: |
| | print('Error uploading file to Dropbox: ' + str(e)) |
| | return dbxTeam |
| |
|
| |
|
| | def check_if_file_exists(dbxTeam,path): |
| | try: |
| | md = dbxTeam.files_get_metadata(path) |
| | exists_bool = True |
| | return exists_bool |
| | except Exception as error_response: |
| | exists_bool = False |
| | return exists_bool |
| | def uploadmarkupPDFTable(doc,pdfname,path): |
| | dbxTeam= ADR_Access_DropboxTeam('admin') |
| | try: |
| | path=path+pdfname |
| | exists_bool=check_if_file_exists(dbxTeam,path) |
| | if exists_bool: |
| | print("if gowa el else <3") |
| | dbxTeam.files_delete(path) |
| | print("abl el meta <3") |
| | meta=dbxTeam.files_upload(doc.read() ,path) |
| | try: |
| | shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings(path) |
| | except: |
| | shared_link_metadata=dbxTeam.sharing_create_shared_link(path) |
| | |
| | return shared_link_metadata.url |
| | except Exception as e: |
| | print('Error uploading file to Dropbox: ' + str(e)) |
| | |
| |
|
| |
|
| | def upload_string_file(content_str, filename, path): |
| | try: |
| | dbxTeam = ADR_Access_DropboxTeam('admin') |
| | full_path = path + filename |
| | |
| | f = BytesIO(content_str.encode("utf-8")) |
| | |
| | if check_if_file_exists(dbxTeam, full_path): |
| | dbxTeam.files_delete(full_path) |
| | |
| | meta = dbxTeam.files_upload(f.getvalue(), full_path) |
| | |
| | try: |
| | shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings(full_path) |
| | except: |
| | shared_link_metadata = dbxTeam.sharing_create_shared_link(full_path) |
| | return shared_link_metadata.url |
| | except Exception as e: |
| | print("Error uploading file to Dropbox:", str(e)) |
| | return "Error uploading file to Dropbox." |
| |
|
| |
|
| |
|
| | def uploadanyFile(doc,pdfname,path,flag=0): |
| | try: |
| | dbxTeam= ADR_Access_DropboxTeam('admin') |
| | if flag: |
| | pdfname=str(pdfname).split('.pdf')[0]+'.xml' |
| | path=path+pdfname |
| | f = BytesIO() |
| | doc.write(f, encoding='utf-8', xml_declaration=True) |
| | exists_bool=check_if_file_exists(dbxTeam,path) |
| | if exists_bool: |
| | dbxTeam.files_delete(path) |
| | meta=dbxTeam.files_upload(f.getvalue() ,path) |
| | else: |
| | path=path+pdfname |
| | exists_bool=check_if_file_exists(dbxTeam,path) |
| | if exists_bool: |
| | dbxTeam.files_delete(path) |
| | meta=dbxTeam.files_upload(doc.write() ,path) |
| | try: |
| | shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings(path) |
| | except: |
| | shared_link_metadata=dbxTeam.sharing_create_shared_link(path) |
| | |
| | return shared_link_metadata.url |
| | except Exception as e: |
| | print('Error uploading file to Dropbox: ' + str(e)) |
| | |
| | return 'Error uploading file to Dropbox.' |
| |
|
| | |
| | |
| | def DropboxItemstoDF(folder_path): |
| | files_list=[] |
| | dbxTeam=ADR_Access_DropboxTeam('user') |
| | |
| | res = dbxTeam.files_list_folder(path=folder_path, recursive=True ) |
| | |
| | if res.has_more: |
| | while res.has_more: |
| | res = dbxTeam.files_list_folder_continue(cursor=res.cursor) |
| | df2=handle_entries(res.entries , files_list) |
| |
|
| | |
| | |
| | return df2 , files_list |
| |
|
| |
|
| | def GetParquetDF(): |
| | |
| | dbxTeam = ADR_Access_DropboxTeam('user') |
| | |
| | path = '/TSA JOBS/ADR Test/DropboxDirectory/df.parquet.gzip' |
| | try: |
| | |
| | shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings(path=path) |
| | except dropbox.exceptions.ApiError: |
| | |
| | shared_link_metadata = dbxTeam.sharing_create_shared_link(path=path) |
| | |
| | _, res = dbxTeam.sharing_get_shared_link_file(url=shared_link_metadata.url) |
| | data = res.content |
| | |
| | with io.BytesIO(data) as pq_file: |
| | df = pd.read_parquet(pq_file) |
| |
|
| | return df |
| |
|
| |
|
| | def getPathtoPDF_File(nameofPDF,progress_callback=None): |
| | parquetDf = load_parquet_df() |
| | nameofPDF=nameofPDF.replace('"', '') |
| | try: |
| | |
| | path = parquetDf.at[parquetDf.index[parquetDf['name'] == nameofPDF][0], 'path_display'] |
| | if progress_callback: |
| | progress_callback(60) |
| | link=getSharedLink(path) |
| | print(path,link) |
| | except: |
| | return 'Project does not exist' |
| | return path,link |
| | |
| |
|
| | |
| | def getPDFData(path): |
| | dbxTeam= ADR_Access_DropboxTeam('admin') |
| | md, res =dbxTeam.files_download(path) |
| | data = res.content |
| | return data |
| |
|
| | def retrieveProjects(projname, progress_callback=None): |
| | |
| | progress_callback(20) |
| | projnameNospaces = projname.strip().replace('"', '').replace("'", '').replace(" ", "") |
| | print(projname,projnameNospaces) |
| | projname = '/' + projnameNospaces[:4] |
| | projname = projname.replace('/"', '') |
| | print('projname', projname) |
| | |
| | parquetDf = load_parquet_df() |
| | documentsToMeasure = [] |
| | RelevantDocuments = [] |
| |
|
| | |
| | if progress_callback: |
| | progress_callback(40) |
| |
|
| | |
| | parquetDf['original_path_display'] = parquetDf['path_display'] |
| |
|
| | |
| | parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower() |
| | if progress_callback: |
| | progress_callback(50) |
| | |
| | |
| | def path_matches(x): |
| | |
| | folder_path = os.path.dirname(x) |
| | |
| | has_projnum = projname in folder_path |
| | |
| | has_details = '01 project details' in folder_path |
| | return has_projnum and has_details |
| |
|
| | |
| | mask = parquetDf['path_display_lower'].apply(path_matches) |
| | |
| | if progress_callback: |
| | progress_callback(60) |
| | |
| | RelevantDocuments = parquetDf[mask][['name', 'original_path_display']].values.tolist() |
| | documentsToMeasure = [doc for doc in RelevantDocuments if( doc[0].lower().endswith('.pdf') )] |
| | print('documentsToMeasure',documentsToMeasure) |
| | |
| | if progress_callback: |
| | progress_callback(70) |
| |
|
| | |
| | if RelevantDocuments: |
| | extracted_path = RelevantDocuments[0][1].split("01 Project Details")[0] + "01 Project Details" |
| | else: |
| | extracted_path = None |
| |
|
| | |
| | parquetDf.drop(columns=['original_path_display', 'path_display_lower'], inplace=True) |
| | if progress_callback: |
| | progress_callback(80) |
| | return documentsToMeasure, RelevantDocuments, extracted_path |
| | |