Spaces:
Sleeping
Sleeping
| import pathlib | |
| import pandas as pd | |
| import dropbox | |
| from dropbox.exceptions import AuthError | |
| from datetime import datetime | |
| import time | |
| # from main import keep_alive | |
| import re | |
| import os | |
| import tsadropboxretrieval | |
| # active_path = "/TSA Team Folder/ADR Test/" | |
| # file1 = open("newprojects.txt", "w") | |
| # file1.close() | |
| # ###################################################### | |
| # def jan_update(): #if month==1 call this function to change active path | |
| # year = datetime.now().date().year | |
| # df = dropbox_list_files('') | |
| # fol = [] | |
| # for l0 in df.name.values: | |
| # matched = re.findall('.*' + str(year) + '.pdf', l0) | |
| # #print(l0) | |
| # if matched: | |
| # fol.append(matched) | |
| # if len(fol) > 0: | |
| # filtered = df[df.name == fol[0][0]] | |
| # path = filtered.iloc[0]['path_display'] | |
| # active_path = '/' + path.split('/')[1] | |
| # return active_path | |
| # else: | |
| # return | |
| # ####################################################### | |
| # def feb_update(reponame): # if month=2, clean 2022 folder | |
| # year = datetime.now().date().year | |
| # li = api.list_repo_files( | |
| # repo_id=reponame, | |
| # repo_type='space', | |
| # ) | |
| # found = [] | |
| # for i in li: #match regex to find prev year files | |
| # matched = re.findall(str(year - 1)[2:4] + '.*.pdf$', i) | |
| # if matched: | |
| # found.append(matched) | |
| # for f in found: #delete files | |
| # print(f[0]) | |
| # api.delete_file( | |
| # path_in_repo=f[0], | |
| # repo_id=reponame, | |
| # repo_type="space", | |
| # ) | |
| # ####################################################### | |
| # #sorting/cleaning files(Jan & Feb only) | |
| # def yearly_update(): | |
| # month = datetime.now().date().month | |
| # day=datetime.now().date().day | |
| # if month == 1 and day == 10: #move to new years folder | |
| # global active_path | |
| # active_path = jan_update() | |
| # elif month == 2 and day == 1: | |
| # print('feb') | |
| # feb_update('alaaadr/SearchDocuments') | |
| # feb_update('alaaadr/ImageSearch') | |
| # feb_update('alaaadr/Measurements') | |
| ############################################ | |
| def download_NewlyAddedFiles(active_path): | |
| today = datetime.now().date() | |
| df= tsadropboxretrieval.DropboxItemstoDF(active_path)[0] | |
| #print(df.head()) | |
| d = df[df.server_modified.dt.date == today].reset_index(drop=True) | |
| dfParquet=tsadropboxretrieval.GetParquetDF() | |
| dictionaryDF= pd.DataFrame(columns=['name','path_display','client_modified','server_modified']) | |
| # dictionaryDF={} | |
| for i in range(0, len(d)): | |
| #print('am here') | |
| down_name = d.iloc[i]['name'] | |
| path = d.iloc[i]['path_display'] | |
| clientDate = d.iloc[i]['client_modified'] | |
| serverDate = d.iloc[i]['server_modified'] | |
| dfParquet.loc[-1]=[down_name,path,clientDate,serverDate] | |
| dfParquet.index = dfParquet.index + 1 # shifting index | |
| dfParquet = dfParquet.sort_index() # sorting by index | |
| print(dfParquet) | |
| tsadropboxretrieval.dropbox_upload_file(dfParquet) | |
| # parquetDF=tsadropboxretrieval.GetParquetDF() | |
| # Append-adds at last | |
| file1 = open("newprojects.txt", "a") # append mode | |
| file1.write(str(dictionaryDF)) | |
| file1.close() | |
| return dictionaryDF | |
| #download todays files | |
| # dropbox_download_file(path, 'dropbox/' + down_name) | |
| #push to hf | |
| # push_to_hf() | |
| ################################################################################################# | |
| # def dropbox_connect(): | |
| # """Create a connection to Dropbox.""" | |
| # try: | |
| # dbx = dropbox.Dropbox(app_key='cujlb5a2hoesvpy', | |
| # app_secret='eb8khe4fuqda1kh', | |
| # oauth2_refresh_token=refresh_token) | |
| # except AuthError as e: | |
| # print('Error connecting to Dropbox with access token: ' + str(e)) | |
| # return dbx | |
| ####################################################### | |
| def dropbox_list_files(path): | |
| """Return a Pandas dataframe of files in a given Dropbox folder path in the Apps directory. | |
| """ | |
| dbx = tsadropboxretrieval.ADR_Access_DropboxTeam('admin') | |
| print(dbx) | |
| try: | |
| files = dbx.files_list_folder(path, recursive=True) | |
| files_list = [] | |
| for file in files: | |
| if isinstance(file, dropbox.files.FileMetadata): | |
| metadata = { | |
| 'name': file.name, | |
| 'path_display': file.path_display, | |
| 'client_modified': file.client_modified, | |
| 'server_modified': file.server_modified | |
| } | |
| files_list.append(metadata) | |
| df = pd.DataFrame.from_records(files_list) | |
| return df.sort_values(by='server_modified', ascending=False) | |
| except Exception as e: | |
| print('Error getting list of files from Dropbox: ' + str(e)) | |
| ############################################### | |
| # def dropbox_download_file(dropbox_file_path, local_file_path): | |
| # """Download a file from Dropbox to the local machine.""" | |
| # print('downloading....') | |
| # try: | |
| # dbx = dropbox_connect() | |
| # with open(local_file_path, 'wb') as f: | |
| # metadata, result = dbx.files_download(path=dropbox_file_path) | |
| # f.write(result.content) | |
| # except Exception as e: | |
| # print('Error downloading file from Dropbox: ' + str(e)) | |