James McCool
commited on
Commit
·
067a5e7
1
Parent(s):
baba885
Remove redundant NaN logging from 'Data Load' section of 'app.py' and add initial data load debug statements in 'load_ss_file.py' to enhance data validation and transparency during file loading.
Browse files- app.py +0 -9
- global_func/load_ss_file.py +10 -0
app.py
CHANGED
|
@@ -310,15 +310,6 @@ if selected_tab == 'Data Load':
|
|
| 310 |
st.session_state['export_portfolio'] = st.session_state['export_portfolio'].dropna(how='all')
|
| 311 |
st.session_state['export_portfolio'] = st.session_state['export_portfolio'].reset_index(drop=True)
|
| 312 |
print(st.session_state['portfolio'].isna().sum())
|
| 313 |
-
st.write(f"Total rows before dropping NaN: {len(st.session_state['portfolio'])}")
|
| 314 |
-
st.write("NaN count per column:")
|
| 315 |
-
st.write(st.session_state['portfolio'].isna().sum())
|
| 316 |
-
|
| 317 |
-
rows_with_na = st.session_state['portfolio'][st.session_state['portfolio'].isna().any(axis=1)]
|
| 318 |
-
st.write(f"Number of rows with at least one NaN: {len(rows_with_na)}")
|
| 319 |
-
if len(rows_with_na) > 0:
|
| 320 |
-
st.write("Sample of rows with NaN values:")
|
| 321 |
-
st.dataframe(rows_with_na.head(10))
|
| 322 |
st.session_state['portfolio'] = st.session_state['portfolio'].dropna(how='all')
|
| 323 |
st.session_state['portfolio'] = st.session_state['portfolio'].reset_index(drop=True)
|
| 324 |
elif upload_toggle == 'Draftkings/Fanduel (Names + IDs)':
|
|
|
|
| 310 |
st.session_state['export_portfolio'] = st.session_state['export_portfolio'].dropna(how='all')
|
| 311 |
st.session_state['export_portfolio'] = st.session_state['export_portfolio'].reset_index(drop=True)
|
| 312 |
print(st.session_state['portfolio'].isna().sum())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
st.session_state['portfolio'] = st.session_state['portfolio'].dropna(how='all')
|
| 314 |
st.session_state['portfolio'] = st.session_state['portfolio'].reset_index(drop=True)
|
| 315 |
elif upload_toggle == 'Draftkings/Fanduel (Names + IDs)':
|
global_func/load_ss_file.py
CHANGED
|
@@ -96,6 +96,16 @@ def load_ss_file(lineups, csv_file, site_var, type_var, sport_var):
|
|
| 96 |
|
| 97 |
if clean_name.endswith('.csv'):
|
| 98 |
lineups_df = pd.read_csv(lineups)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
lineups_df = lineups_df.replace(0, np.nan)
|
| 100 |
elif clean_name.endswith(('.xls', '.xlsx')):
|
| 101 |
lineups_df = pd.read_excel(lineups)
|
|
|
|
| 96 |
|
| 97 |
if clean_name.endswith('.csv'):
|
| 98 |
lineups_df = pd.read_csv(lineups)
|
| 99 |
+
# Debug: Check what we loaded initially
|
| 100 |
+
print(f"Initial load - Total rows: {len(lineups_df)}")
|
| 101 |
+
print(f"Initial load - Total columns: {len(lineups_df.columns)}")
|
| 102 |
+
print("First few rows of raw data:")
|
| 103 |
+
print(lineups_df.head())
|
| 104 |
+
print("Data types:")
|
| 105 |
+
print(lineups_df.dtypes)
|
| 106 |
+
print("Any NaN values in original data:")
|
| 107 |
+
print(lineups_df.isna().sum())
|
| 108 |
+
|
| 109 |
lineups_df = lineups_df.replace(0, np.nan)
|
| 110 |
elif clean_name.endswith(('.xls', '.xlsx')):
|
| 111 |
lineups_df = pd.read_excel(lineups)
|