Corey Morris
commited on
Commit
·
68bce52
1
Parent(s):
ad0b971
Catching exceptions in processing files. As new data is introduced, I want to know which files may have different formats and cause problems, but the application shouldn't halt if it can't process a single file
Browse files- result_data_processor.py +17 -11
result_data_processor.py
CHANGED
|
@@ -20,6 +20,7 @@ class ResultDataProcessor:
|
|
| 20 |
if fnmatch.fnmatch(basename, pattern):
|
| 21 |
filename = os.path.join(root, basename)
|
| 22 |
matching_files[root] = filename
|
|
|
|
| 23 |
matching_files = {key: value for key, value in matching_files.items() if 'gpt-j-6b' not in key}
|
| 24 |
matching_files = list(matching_files.values())
|
| 25 |
return matching_files
|
|
@@ -94,17 +95,22 @@ class ResultDataProcessor:
|
|
| 94 |
dataframes = []
|
| 95 |
organization_names = []
|
| 96 |
for filename in self._find_files(self.directory, self.pattern):
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
|
| 110 |
data = pd.concat(dataframes, axis=1).transpose()
|
|
|
|
| 20 |
if fnmatch.fnmatch(basename, pattern):
|
| 21 |
filename = os.path.join(root, basename)
|
| 22 |
matching_files[root] = filename
|
| 23 |
+
# TODO decide on removing this since I am catching the error when processing the file
|
| 24 |
matching_files = {key: value for key, value in matching_files.items() if 'gpt-j-6b' not in key}
|
| 25 |
matching_files = list(matching_files.values())
|
| 26 |
return matching_files
|
|
|
|
| 95 |
dataframes = []
|
| 96 |
organization_names = []
|
| 97 |
for filename in self._find_files(self.directory, self.pattern):
|
| 98 |
+
try:
|
| 99 |
+
raw_data = self._read_and_transform_data(filename)
|
| 100 |
+
split_path = filename.split('/')
|
| 101 |
+
model_name = split_path[2]
|
| 102 |
+
organization_name = split_path[1]
|
| 103 |
+
cleaned_data = self._cleanup_dataframe(raw_data, model_name)
|
| 104 |
+
mc1 = self._extract_mc1(raw_data, model_name)
|
| 105 |
+
mc2 = self._extract_mc2(raw_data, model_name)
|
| 106 |
+
cleaned_data = pd.concat([cleaned_data, mc1])
|
| 107 |
+
cleaned_data = pd.concat([cleaned_data, mc2])
|
| 108 |
+
organization_names.append(organization_name)
|
| 109 |
+
dataframes.append(cleaned_data)
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f'Error processing {filename}')
|
| 112 |
+
print("The error is: ", e)
|
| 113 |
+
continue
|
| 114 |
|
| 115 |
|
| 116 |
data = pd.concat(dataframes, axis=1).transpose()
|