Marthee commited on
Commit
78cfd9e
·
verified ·
1 Parent(s): fd8a967

Update tsadropboxretrieval.py

Browse files
Files changed (1) hide show
  1. tsadropboxretrieval.py +4 -8
tsadropboxretrieval.py CHANGED
@@ -201,20 +201,16 @@ def getPDFData(path):
201
  return data
202
 
203
  def retrieveProjects(projname):
204
- print('retrieve')
205
-
206
  parquetDf=GetParquetDF()
207
  documentsToMeasure = []
208
  RelevantDocuments = []
209
  projnameWithDetails = f'{projname} 01 Project Details'
210
- # Split the project name into words and convert to lowercase
211
- matches = set(re.split(r'[`\-= ~!@#$%^&*()_+\[\]{};\'\\:"|<,/<>?]', projnameWithDetails.lower()))
212
-
213
- # Convert the 'path_display' column to lowercase for case-insensitive matching
214
  parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
215
 
216
- # Create a mask to filter relevant documents
217
- mask = parquetDf['path_display_lower'].apply(lambda x: all(match in x for match in matches))
218
 
219
  # Filter RelevantDocuments and documentsToMeasure using the mask
220
  RelevantDocuments = parquetDf[mask][['name', 'path_display']].values.tolist()
 
201
  return data
202
 
203
  def retrieveProjects(projname):
204
+ projname='/'+projname.split(' ')[0]
 
205
  parquetDf=GetParquetDF()
206
  documentsToMeasure = []
207
  RelevantDocuments = []
208
  projnameWithDetails = f'{projname} 01 Project Details'
209
+
 
 
 
210
  parquetDf['path_display_lower'] = parquetDf['path_display'].str.lower()
211
 
212
+ # Filter based on the presence of '/2221' and '01 Project Details'
213
+ mask = parquetDf['path_display_lower'].apply(lambda x: projname in x and '01 project details' in x)
214
 
215
  # Filter RelevantDocuments and documentsToMeasure using the mask
216
  RelevantDocuments = parquetDf[mask][['name', 'path_display']].values.tolist()