Marthee commited on
Commit
8cdb841
·
verified ·
1 Parent(s): 73b5241

Upload mainDBAlaa.py

Browse files
Files changed (1) hide show
  1. mainDBAlaa.py +170 -0
mainDBAlaa.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pathlib
2
+ import pandas as pd
3
+ import dropbox
4
+ from dropbox.exceptions import AuthError
5
+ from datetime import datetime
6
+
7
+ import time
8
+ # from main import keep_alive
9
+ import re
10
+ import os
11
+ import tsadropboxretrieval
12
+
13
+ # active_path = "/TSA Team Folder/ADR Test/"
14
+
15
+ # file1 = open("newprojects.txt", "w")
16
+ # file1.close()
17
+
18
+
19
+ # ######################################################
20
+ # def jan_update(): #if month==1 call this function to change active path
21
+ # year = datetime.now().date().year
22
+
23
+ # df = dropbox_list_files('')
24
+ # fol = []
25
+ # for l0 in df.name.values:
26
+ # matched = re.findall('.*' + str(year) + '.pdf', l0)
27
+ # #print(l0)
28
+ # if matched:
29
+ # fol.append(matched)
30
+ # if len(fol) > 0:
31
+ # filtered = df[df.name == fol[0][0]]
32
+
33
+ # path = filtered.iloc[0]['path_display']
34
+ # active_path = '/' + path.split('/')[1]
35
+ # return active_path
36
+ # else:
37
+ # return
38
+
39
+
40
+ # #######################################################
41
+ # def feb_update(reponame): # if month=2, clean 2022 folder
42
+ # year = datetime.now().date().year
43
+ # li = api.list_repo_files(
44
+ # repo_id=reponame,
45
+ # repo_type='space',
46
+ # )
47
+ # found = []
48
+ # for i in li: #match regex to find prev year files
49
+ # matched = re.findall(str(year - 1)[2:4] + '.*.pdf$', i)
50
+
51
+ # if matched:
52
+ # found.append(matched)
53
+ # for f in found: #delete files
54
+ # print(f[0])
55
+ # api.delete_file(
56
+ # path_in_repo=f[0],
57
+ # repo_id=reponame,
58
+ # repo_type="space",
59
+ # )
60
+
61
+
62
+ # #######################################################
63
+ # #sorting/cleaning files(Jan & Feb only)
64
+ # def yearly_update():
65
+ # month = datetime.now().date().month
66
+ # day=datetime.now().date().day
67
+ # if month == 1 and day == 10: #move to new years folder
68
+ # global active_path
69
+ # active_path = jan_update()
70
+ # elif month == 2 and day == 1:
71
+ # print('feb')
72
+ # feb_update('alaaadr/SearchDocuments')
73
+ # feb_update('alaaadr/ImageSearch')
74
+ # feb_update('alaaadr/Measurements')
75
+
76
+
77
+ ############################################
78
+ def download_NewlyAddedFiles(active_path):
79
+ today = datetime.now().date()
80
+ df= tsadropboxretrieval.DropboxItemstoDF(active_path)[0]
81
+
82
+ #print(df.head())
83
+ d = df[df.server_modified.dt.date == today].reset_index(drop=True)
84
+ dfParquet=tsadropboxretrieval.GetParquetDF()
85
+
86
+ dictionaryDF= pd.DataFrame(columns=['name','path_display','client_modified','server_modified'])
87
+ # dictionaryDF={}
88
+ for i in range(0, len(d)):
89
+ #print('am here')
90
+ down_name = d.iloc[i]['name']
91
+ path = d.iloc[i]['path_display']
92
+ clientDate = d.iloc[i]['client_modified']
93
+ serverDate = d.iloc[i]['server_modified']
94
+
95
+ dfParquet.loc[-1]=[down_name,path,clientDate,serverDate]
96
+ dfParquet.index = dfParquet.index + 1 # shifting index
97
+ dfParquet = dfParquet.sort_index() # sorting by index
98
+ print(dfParquet)
99
+ tsadropboxretrieval.dropbox_upload_file(dfParquet)
100
+
101
+ # parquetDF=tsadropboxretrieval.GetParquetDF()
102
+
103
+ # Append-adds at last
104
+ file1 = open("newprojects.txt", "a") # append mode
105
+ file1.write(str(dictionaryDF))
106
+ file1.close()
107
+
108
+ return dictionaryDF
109
+ #download todays files
110
+ # dropbox_download_file(path, 'dropbox/' + down_name)
111
+ #push to hf
112
+ # push_to_hf()
113
+
114
+
115
+ #################################################################################################
116
+
117
+
118
+ # def dropbox_connect():
119
+ # """Create a connection to Dropbox."""
120
+
121
+ # try:
122
+ # dbx = dropbox.Dropbox(app_key='cujlb5a2hoesvpy',
123
+ # app_secret='eb8khe4fuqda1kh',
124
+ # oauth2_refresh_token=refresh_token)
125
+ # except AuthError as e:
126
+ # print('Error connecting to Dropbox with access token: ' + str(e))
127
+ # return dbx
128
+
129
+
130
+ #######################################################
131
+ def dropbox_list_files(path):
132
+ """Return a Pandas dataframe of files in a given Dropbox folder path in the Apps directory.
133
+ """
134
+
135
+ dbx = tsadropboxretrieval.ADR_Access_DropboxTeam('admin')
136
+ print(dbx)
137
+
138
+ try:
139
+ files = dbx.files_list_folder(path, recursive=True)
140
+ files_list = []
141
+ for file in files:
142
+ if isinstance(file, dropbox.files.FileMetadata):
143
+ metadata = {
144
+ 'name': file.name,
145
+ 'path_display': file.path_display,
146
+ 'client_modified': file.client_modified,
147
+ 'server_modified': file.server_modified
148
+ }
149
+ files_list.append(metadata)
150
+ df = pd.DataFrame.from_records(files_list)
151
+ return df.sort_values(by='server_modified', ascending=False)
152
+
153
+ except Exception as e:
154
+ print('Error getting list of files from Dropbox: ' + str(e))
155
+
156
+
157
+ ###############################################
158
+ # def dropbox_download_file(dropbox_file_path, local_file_path):
159
+ # """Download a file from Dropbox to the local machine."""
160
+ # print('downloading....')
161
+ # try:
162
+ # dbx = dropbox_connect()
163
+
164
+ # with open(local_file_path, 'wb') as f:
165
+ # metadata, result = dbx.files_download(path=dropbox_file_path)
166
+ # f.write(result.content)
167
+ # except Exception as e:
168
+ # print('Error downloading file from Dropbox: ' + str(e))
169
+
170
+