Spaces:
Sleeping
Sleeping
Update tsadropboxretrieval.py
Browse files- tsadropboxretrieval.py +27 -6
tsadropboxretrieval.py
CHANGED
|
@@ -193,6 +193,25 @@ def getPathtoPDF_File(nameofPDF):
|
|
| 193 |
return path,link
|
| 194 |
# parquetDf
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
# getPathtoPDF_File('A5157-EBLA-V5-XX-SH-L-0004-D2-01.pdf')
|
| 197 |
def getPDFData(path):
|
| 198 |
dbxTeam= ADR_Access_DropboxTeam('admin')
|
|
@@ -201,23 +220,25 @@ def getPDFData(path):
|
|
| 201 |
return data
|
| 202 |
|
| 203 |
def retrieveProjects(projname):
|
|
|
|
|
|
|
| 204 |
projname='/'+projname.split(' ')[0]
|
|
|
|
|
|
|
| 205 |
parquetDf=GetParquetDF()
|
| 206 |
documentsToMeasure = []
|
| 207 |
RelevantDocuments = []
|
| 208 |
-
projnameWithDetails = f'{projname} 01 Project Details'
|
| 209 |
-
|
| 210 |
parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
|
| 211 |
|
| 212 |
# Filter based on the presence of '/2221' and '01 Project Details'
|
| 213 |
mask = parquetDf['path_display_lower'].apply(lambda x: projname in x and '01 project details' in x)
|
| 214 |
-
|
| 215 |
# Filter RelevantDocuments and documentsToMeasure using the mask
|
| 216 |
-
RelevantDocuments = parquetDf[mask][['name', '
|
| 217 |
documentsToMeasure = [doc for doc in RelevantDocuments if doc[0].endswith('.pdf')] # Filter documentsToMeasure for PDF files later if needed
|
| 218 |
-
|
| 219 |
# Remove the temporary 'path_display_lower' column
|
| 220 |
parquetDf.drop(columns=['path_display_lower'], inplace=True)
|
|
|
|
| 221 |
return documentsToMeasure,RelevantDocuments
|
| 222 |
|
| 223 |
-
|
|
|
|
| 193 |
return path,link
|
| 194 |
# parquetDf
|
| 195 |
|
| 196 |
+
# getPathtoPDF_File('A5157-EBLA-V5-XX-SH-L-0004-D2-01.pdf')
|
| 197 |
+
def getPDFData(path):
|
| 198 |
+
dbxTeam= ADR_Access_DropboxTeam('admin')
|
| 199 |
+
md, res =dbxTeam.files_download(path)
|
| 200 |
+
data = res.content
|
| 201 |
+
return data
|
| 202 |
+
|
| 203 |
+
def getPathtoPDF_File(nameofPDF):
|
| 204 |
+
parquetDf=GetParquetDF()
|
| 205 |
+
nameofPDF=nameofPDF.replace('"', '')
|
| 206 |
+
try:
|
| 207 |
+
path=parquetDf.loc[parquetDf['name'] == nameofPDF, 'path_display'].iloc[0]
|
| 208 |
+
link=getSharedLink(path)
|
| 209 |
+
print(path,link)
|
| 210 |
+
except:
|
| 211 |
+
return 'Project does not exist'
|
| 212 |
+
return path,link
|
| 213 |
+
# parquetDf
|
| 214 |
+
|
| 215 |
# getPathtoPDF_File('A5157-EBLA-V5-XX-SH-L-0004-D2-01.pdf')
|
| 216 |
def getPDFData(path):
|
| 217 |
dbxTeam= ADR_Access_DropboxTeam('admin')
|
|
|
|
| 220 |
return data
|
| 221 |
|
| 222 |
def retrieveProjects(projname):
|
| 223 |
+
print('retrieve',projname)
|
| 224 |
+
|
| 225 |
projname='/'+projname.split(' ')[0]
|
| 226 |
+
projname=projname.replace('/"', '')
|
| 227 |
+
print(projname)
|
| 228 |
parquetDf=GetParquetDF()
|
| 229 |
documentsToMeasure = []
|
| 230 |
RelevantDocuments = []
|
|
|
|
|
|
|
| 231 |
parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
|
| 232 |
|
| 233 |
# Filter based on the presence of '/2221' and '01 Project Details'
|
| 234 |
mask = parquetDf['path_display_lower'].apply(lambda x: projname in x and '01 project details' in x)
|
| 235 |
+
print(mask)
|
| 236 |
# Filter RelevantDocuments and documentsToMeasure using the mask
|
| 237 |
+
RelevantDocuments = parquetDf[mask][['name', 'path_display_lower']].values.tolist()
|
| 238 |
documentsToMeasure = [doc for doc in RelevantDocuments if doc[0].endswith('.pdf')] # Filter documentsToMeasure for PDF files later if needed
|
| 239 |
+
print(documentsToMeasure)
|
| 240 |
# Remove the temporary 'path_display_lower' column
|
| 241 |
parquetDf.drop(columns=['path_display_lower'], inplace=True)
|
| 242 |
+
print(len(documentsToMeasure))
|
| 243 |
return documentsToMeasure,RelevantDocuments
|
| 244 |
|
|
|