Marthee commited on
Commit
c1d20df
·
verified ·
1 Parent(s): 78cfd9e

Update tsadropboxretrieval.py

Browse files
Files changed (1) hide show
  1. tsadropboxretrieval.py +27 -6
tsadropboxretrieval.py CHANGED
@@ -193,6 +193,25 @@ def getPathtoPDF_File(nameofPDF):
193
  return path,link
194
  # parquetDf
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  # getPathtoPDF_File('A5157-EBLA-V5-XX-SH-L-0004-D2-01.pdf')
197
  def getPDFData(path):
198
  dbxTeam= ADR_Access_DropboxTeam('admin')
@@ -201,23 +220,25 @@ def getPDFData(path):
201
  return data
202
 
203
  def retrieveProjects(projname):
 
 
204
  projname='/'+projname.split(' ')[0]
 
 
205
  parquetDf=GetParquetDF()
206
  documentsToMeasure = []
207
  RelevantDocuments = []
208
- projnameWithDetails = f'{projname} 01 Project Details'
209
-
210
  parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
211
 
212
  # Filter based on the presence of '/2221' and '01 Project Details'
213
  mask = parquetDf['path_display_lower'].apply(lambda x: projname in x and '01 project details' in x)
214
-
215
  # Filter RelevantDocuments and documentsToMeasure using the mask
216
- RelevantDocuments = parquetDf[mask][['name', 'path_display']].values.tolist()
217
  documentsToMeasure = [doc for doc in RelevantDocuments if doc[0].endswith('.pdf')] # Filter documentsToMeasure for PDF files later if needed
218
-
219
  # Remove the temporary 'path_display_lower' column
220
  parquetDf.drop(columns=['path_display_lower'], inplace=True)
 
221
  return documentsToMeasure,RelevantDocuments
222
 
223
-
 
193
  return path,link
194
  # parquetDf
195
 
196
+ # getPathtoPDF_File('A5157-EBLA-V5-XX-SH-L-0004-D2-01.pdf')
197
+ def getPDFData(path):
198
+ dbxTeam= ADR_Access_DropboxTeam('admin')
199
+ md, res =dbxTeam.files_download(path)
200
+ data = res.content
201
+ return data
202
+
203
+ def getPathtoPDF_File(nameofPDF):
204
+ parquetDf=GetParquetDF()
205
+ nameofPDF=nameofPDF.replace('"', '')
206
+ try:
207
+ path=parquetDf.loc[parquetDf['name'] == nameofPDF, 'path_display'].iloc[0]
208
+ link=getSharedLink(path)
209
+ print(path,link)
210
+ except:
211
+ return 'Project does not exist'
212
+ return path,link
213
+ # parquetDf
214
+
215
  # getPathtoPDF_File('A5157-EBLA-V5-XX-SH-L-0004-D2-01.pdf')
216
  def getPDFData(path):
217
  dbxTeam= ADR_Access_DropboxTeam('admin')
 
220
  return data
221
 
222
  def retrieveProjects(projname):
223
+ print('retrieve',projname)
224
+
225
  projname='/'+projname.split(' ')[0]
226
+ projname=projname.replace('/"', '')
227
+ print(projname)
228
  parquetDf=GetParquetDF()
229
  documentsToMeasure = []
230
  RelevantDocuments = []
 
 
231
  parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
232
 
233
  # Filter based on the presence of '/2221' and '01 Project Details'
234
  mask = parquetDf['path_display_lower'].apply(lambda x: projname in x and '01 project details' in x)
235
+ print(mask)
236
  # Filter RelevantDocuments and documentsToMeasure using the mask
237
+ RelevantDocuments = parquetDf[mask][['name', 'path_display_lower']].values.tolist()
238
  documentsToMeasure = [doc for doc in RelevantDocuments if doc[0].endswith('.pdf')] # Filter documentsToMeasure for PDF files later if needed
239
+ print(documentsToMeasure)
240
  # Remove the temporary 'path_display_lower' column
241
  parquetDf.drop(columns=['path_display_lower'], inplace=True)
242
+ print(len(documentsToMeasure))
243
  return documentsToMeasure,RelevantDocuments
244