MeasurementTesting

Sleeping

App Files Files Community

Marthee commited on Mar 23, 2025

Commit

0cd855f

verified ·

1 Parent(s): cf4099a

Update tsadropboxretrieval.py

Browse files

Files changed (1) hide show

tsadropboxretrieval.py +20 -10

tsadropboxretrieval.py CHANGED Viewed

@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
 """TSADropboxRetrieval.ipynb
 Automatically generated by Colaboratory.
 Original file is located at
     https://colab.research.google.com/drive/1d-UI3Y-z7Dj-vqu69CxluOUnN4rvsUuE
 """
@@ -205,12 +203,14 @@ def GetParquetDF():
     return df
-def getPathtoPDF_File(nameofPDF):
   parquetDf = load_parquet_df()
   nameofPDF=nameofPDF.replace('"', '')
   try:
     # path=parquetDf.loc[parquetDf['name'] == nameofPDF, 'path_display'].iloc[0]
     path = parquetDf.at[parquetDf.index[parquetDf['name'] == nameofPDF][0], 'path_display']
     link=getSharedLink(path)
     print(path,link)
   except:
@@ -225,9 +225,9 @@ def getPDFData(path):
   data = res.content
   return data
-def retrieveProjects(projname):
-    print('retrieve', projname)
     projname = '/' + projname.split(' ')[0]  # Extract main project name
     projname = projname.replace('/"', '')  # Remove unwanted characters
@@ -237,18 +237,28 @@ def retrieveProjects(projname):
     documentsToMeasure = []
     RelevantDocuments = []
     # Store the original path before converting it to lowercase
     parquetDf['original_path_display'] = parquetDf['path_display']
     # Create a lowercase column for case-insensitive matching
     parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
     # Filter using the lowercase column but retrieve the original paths
     mask = parquetDf['path_display_lower'].apply(lambda x: '/'+projname in x and '01 project details' in x)
     # Retrieve the original (case-sensitive) paths before lowering them
     RelevantDocuments = parquetDf[mask][['name', 'original_path_display']].values.tolist()
     documentsToMeasure = [doc for doc in RelevantDocuments if doc[0].endswith('.pdf')]  # Keep only PDFs
     # Extract path from the original (case-sensitive) column
     if RelevantDocuments:
@@ -258,6 +268,6 @@ def retrieveProjects(projname):
     # Remove temporary columns
     parquetDf.drop(columns=['original_path_display', 'path_display_lower'], inplace=True)
     return documentsToMeasure, RelevantDocuments, extracted_path

 # -*- coding: utf-8 -*-
 """TSADropboxRetrieval.ipynb
 Automatically generated by Colaboratory.
 Original file is located at
     https://colab.research.google.com/drive/1d-UI3Y-z7Dj-vqu69CxluOUnN4rvsUuE
 """
     return df
+def getPathtoPDF_File(nameofPDF,progress_callback=None):
   parquetDf = load_parquet_df()
   nameofPDF=nameofPDF.replace('"', '')
   try:
     # path=parquetDf.loc[parquetDf['name'] == nameofPDF, 'path_display'].iloc[0]
     path = parquetDf.at[parquetDf.index[parquetDf['name'] == nameofPDF][0], 'path_display']
+    if progress_callback:
+        progress_callback(60)
     link=getSharedLink(path)
     print(path,link)
   except:
   data = res.content
   return data
+def retrieveProjects(projname, progress_callback=None):
+    # if progress_callback:
+    progress_callback(20)
     projname = '/' + projname.split(' ')[0]  # Extract main project name
     projname = projname.replace('/"', '')  # Remove unwanted characters
     documentsToMeasure = []
     RelevantDocuments = []
+    # Send progress update (20%)
+    if progress_callback:
+        progress_callback(40)
     # Store the original path before converting it to lowercase
     parquetDf['original_path_display'] = parquetDf['path_display']
     # Create a lowercase column for case-insensitive matching
     parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
+    if progress_callback:
+        progress_callback(50)
     # Filter using the lowercase column but retrieve the original paths
     mask = parquetDf['path_display_lower'].apply(lambda x: '/'+projname in x and '01 project details' in x)
+    if progress_callback:
+        progress_callback(60)
     # Retrieve the original (case-sensitive) paths before lowering them
     RelevantDocuments = parquetDf[mask][['name', 'original_path_display']].values.tolist()
     documentsToMeasure = [doc for doc in RelevantDocuments if doc[0].endswith('.pdf')]  # Keep only PDFs
+    # Send progress update (80%)
+    if progress_callback:
+        progress_callback(70)
     # Extract path from the original (case-sensitive) column
     if RelevantDocuments:
     # Remove temporary columns
     parquetDf.drop(columns=['original_path_display', 'path_display_lower'], inplace=True)
+    if progress_callback:
+        progress_callback(80)
     return documentsToMeasure, RelevantDocuments, extracted_path