Spaces:
No application file
No application file
Update test.py
Browse files
test.py
CHANGED
|
@@ -3,31 +3,35 @@ import re
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
-
#
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
#
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
+
# Load your data (replace 'your_file.csv' with the actual file path)
|
| 7 |
+
merge_data = pd.read_csv('your_file.csv')
|
| 8 |
+
|
| 9 |
+
# Calculate value counts for columns ending with _data and _gt
|
| 10 |
+
columns_data = [col for col in merge_data.columns if col.endswith('_data')]
|
| 11 |
+
columns_gt = [col for col in merge_data.columns if col.endswith('_gt')]
|
| 12 |
+
|
| 13 |
+
# Initialize a dictionary to store value counts and differences
|
| 14 |
+
value_counts_diff = {}
|
| 15 |
+
|
| 16 |
+
for data_col, gt_col in zip(columns_data, columns_gt):
|
| 17 |
+
data_counts = merge_data[data_col].value_counts(dropna=False)
|
| 18 |
+
gt_counts = merge_data[gt_col].value_counts(dropna=False)
|
| 19 |
+
|
| 20 |
+
# Create a DataFrame combining the counts
|
| 21 |
+
combined_counts = pd.DataFrame({
|
| 22 |
+
'data_counts': data_counts,
|
| 23 |
+
'gt_counts': gt_counts
|
| 24 |
+
}).fillna(0)
|
| 25 |
+
|
| 26 |
+
# Calculate the difference between data and gt counts
|
| 27 |
+
combined_counts['difference'] = combined_counts['data_counts'] - combined_counts['gt_counts']
|
| 28 |
+
|
| 29 |
+
# Store in dictionary
|
| 30 |
+
value_counts_diff[data_col] = combined_counts
|
| 31 |
+
|
| 32 |
+
# Display the results for each column
|
| 33 |
+
value_counts_diff
|
| 34 |
+
|
| 35 |
|
| 36 |
|
| 37 |
|