Marthee commited on
Commit
67f3b5b
·
verified ·
1 Parent(s): 087eac8

Update tsadropboxretrieval.py

Browse files
Files changed (1) hide show
  1. tsadropboxretrieval.py +9 -56
tsadropboxretrieval.py CHANGED
@@ -94,7 +94,8 @@ def dropbox_upload_file(df, flag=0):
94
  dbxTeam= ADR_Access_DropboxTeam('admin')
95
  path='/TSA JOBS/ADR Test/DropboxDirectory/df.parquet.gzip'
96
  doc=df.to_parquet()
97
- meta=dbxTeam.files_upload(doc,path,mode=dropbox.files.WriteMode("overwrite"))
 
98
 
99
  except Exception as e:
100
  print('Error uploading file to Dropbox: ' + str(e))
@@ -104,16 +105,17 @@ def dropbox_upload_file(df, flag=0):
104
  def uploadanyFile(doc,pdfname,path,flag=0):
105
  try:
106
  dbxTeam= ADR_Access_DropboxTeam('admin')
107
-
108
  if flag: #tree = doc
109
  pdfname=str(pdfname).split('.pdf')[0]+'.xml'
110
  path=path+pdfname
111
  f = BytesIO()
112
- doc.write(f, encoding='utf-8', xml_declaration=True)
113
- meta=dbxTeam.files_upload(f.getvalue() ,path,mode=dropbox.files.WriteMode("overwrite"))
 
114
  else:
115
  path=path+pdfname
116
- meta=dbxTeam.files_upload( doc.write() ,path,mode=dropbox.files.WriteMode("overwrite"))
 
117
  try:
118
  shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings(path)
119
  except:
@@ -141,28 +143,6 @@ def DropboxItemstoDF(folder_path):
141
  # print(df2)
142
  return df2 , files_list
143
 
144
- # df2=DropboxItemstoDF()
145
-
146
- # Feather format for storing data
147
- # def ToFeather(df2):
148
- # df2.to_feather('df2.feather')
149
- # fthr=pd.read_feather('df2.feather')
150
- # return fthr
151
-
152
- # def GetParquetDF():
153
- # # # read the parquet file in current directory, back into a pandas data frame
154
- # dbxTeam=ADR_Access_DropboxTeam('user') # or pass dbx in parameters
155
- # try:
156
- # shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings( path='/TSA Team Folder/ADR Test/DropboxDirectory/df.parquet.gzip')
157
- # except:
158
- # shared_link_metadata=dbxTeam.sharing_create_shared_link( path='/TSA Team Folder/ADR Test/DropboxDirectory/df.parquet.gzip')
159
- # metadata, res = dbxTeam.sharing_get_shared_link_file(url=shared_link_metadata.url)
160
- # data=res.content # or res.content, or iter_content, or iter_lines, etc. as needed
161
-
162
- # pq_file = io.BytesIO(data)
163
- # df = pd.read_parquet(pq_file)
164
- # return df
165
-
166
 
167
  def GetParquetDF():
168
  # Initialize Dropbox client
@@ -191,6 +171,7 @@ def getPathtoPDF_File(nameofPDF):
191
  try:
192
  path=parquetDf.loc[parquetDf['name'] == nameofPDF, 'path_display'].iloc[0]
193
  link=getSharedLink(path)
 
194
  except:
195
  return 'Project does not exist'
196
  return path,link
@@ -203,33 +184,6 @@ def getPDFData(path):
203
  data = res.content
204
  return data
205
 
206
- # def retrieveProjects(projname):
207
- # print('retrieve')
208
-
209
- # parquetDf=GetParquetDF()
210
- # documentsToMeasure = []
211
- # RelevantDocuments = []
212
- # projnameWithDetails = f'{projname} 01 Project Details'
213
-
214
- # # Split the project name into words and convert to lowercase
215
- # matches = set(re.findall(r'\b\w+\b', projnameWithDetails.lower()))
216
-
217
- # # Convert the 'path_display' column to lowercase for case-insensitive matching
218
- # parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
219
-
220
- # # Check if all words in matches are present in the path
221
- # mask = parquetDf['path_display_lower'].apply(lambda x: all(match in x for match in matches))
222
-
223
- # # Filter relevant documents
224
- # RelevantDocuments = parquetDf.loc[mask, ['name', 'path_display']].values.tolist()
225
-
226
- # # Filter documents to measure
227
- # documentsToMeasure = parquetDf.loc[(mask) & (parquetDf['name'].str.endswith('.pdf')), ['name', 'path_display']].values.tolist()
228
-
229
- # # Remove the temporary 'path_display_lower' column
230
- # parquetDf.drop(columns=['path_display_lower'], inplace=True)
231
- # return documentsToMeasure,RelevantDocuments
232
-
233
  def retrieveProjects(projname):
234
  print('retrieve')
235
 
@@ -252,8 +206,7 @@ def retrieveProjects(projname):
252
 
253
  # Remove the temporary 'path_display_lower' column
254
  parquetDf.drop(columns=['path_display_lower'], inplace=True)
 
255
  return documentsToMeasure,RelevantDocuments
256
 
257
 
258
-
259
-
 
94
  dbxTeam= ADR_Access_DropboxTeam('admin')
95
  path='/TSA JOBS/ADR Test/DropboxDirectory/df.parquet.gzip'
96
  doc=df.to_parquet()
97
+ dbxTeam.files_delete(path)
98
+ meta=dbxTeam.files_upload(doc,path)
99
 
100
  except Exception as e:
101
  print('Error uploading file to Dropbox: ' + str(e))
 
105
  def uploadanyFile(doc,pdfname,path,flag=0):
106
  try:
107
  dbxTeam= ADR_Access_DropboxTeam('admin')
 
108
  if flag: #tree = doc
109
  pdfname=str(pdfname).split('.pdf')[0]+'.xml'
110
  path=path+pdfname
111
  f = BytesIO()
112
+ doc.write(f, encoding='utf-8', xml_declaration=True)
113
+ dbxTeam.files_delete(path)
114
+ meta=dbxTeam.files_upload(f.getvalue() ,path)
115
  else:
116
  path=path+pdfname
117
+ dbxTeam.files_delete(path)
118
+ meta=dbxTeam.files_upload(doc.write() ,path)
119
  try:
120
  shared_link_metadata = dbxTeam.sharing_create_shared_link_with_settings(path)
121
  except:
 
143
  # print(df2)
144
  return df2 , files_list
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  def GetParquetDF():
148
  # Initialize Dropbox client
 
171
  try:
172
  path=parquetDf.loc[parquetDf['name'] == nameofPDF, 'path_display'].iloc[0]
173
  link=getSharedLink(path)
174
+ print(path,link)
175
  except:
176
  return 'Project does not exist'
177
  return path,link
 
184
  data = res.content
185
  return data
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  def retrieveProjects(projname):
188
  print('retrieve')
189
 
 
206
 
207
  # Remove the temporary 'path_display_lower' column
208
  parquetDf.drop(columns=['path_display_lower'], inplace=True)
209
+ print('done')
210
  return documentsToMeasure,RelevantDocuments
211
 
212