Spaces:
Sleeping
Sleeping
James McCool
commited on
Commit
·
b41a4e7
1
Parent(s):
2fb81af
Add character normalization for data exports in app.py: implement a function to convert accented characters to ASCII equivalents, ensuring cleaner data output. Update CSV export functions to apply normalization, enhancing data consistency and accessibility.
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import pandas as pd
|
|
| 4 |
import streamlit as st
|
| 5 |
import gspread
|
| 6 |
import pymongo
|
|
|
|
| 7 |
|
| 8 |
st.set_page_config(layout="wide")
|
| 9 |
|
|
@@ -332,18 +333,40 @@ def init_FD_SD_lineups(slate_desig: str, league: str):
|
|
| 332 |
|
| 333 |
return DK_seed
|
| 334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
def convert_df_to_csv(df):
|
| 336 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
|
| 338 |
@st.cache_data
|
| 339 |
def convert_df(array):
|
| 340 |
array = pd.DataFrame(array, columns=column_names)
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
@st.cache_data
|
| 344 |
def convert_pm_df(array):
|
| 345 |
array = pd.DataFrame(array)
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
dk_raw, fd_raw, dk_raw_sec, fd_raw_sec, roo_raw, sd_raw, dk_sd_raw, fd_sd_raw, timestamp = load_overall_stats('NBA')
|
| 349 |
salary_dict = dict(zip(roo_raw.Player, roo_raw.Salary))
|
|
|
|
| 4 |
import streamlit as st
|
| 5 |
import gspread
|
| 6 |
import pymongo
|
| 7 |
+
import unicodedata
|
| 8 |
|
| 9 |
st.set_page_config(layout="wide")
|
| 10 |
|
|
|
|
| 333 |
|
| 334 |
return DK_seed
|
| 335 |
|
| 336 |
+
def normalize_special_characters(text):
|
| 337 |
+
"""Convert accented characters to their ASCII equivalents"""
|
| 338 |
+
if pd.isna(text):
|
| 339 |
+
return text
|
| 340 |
+
# Normalize unicode characters to their closest ASCII equivalents
|
| 341 |
+
normalized = unicodedata.normalize('NFKD', str(text))
|
| 342 |
+
# Remove diacritics (accents, umlauts, etc.)
|
| 343 |
+
ascii_text = ''.join(c for c in normalized if not unicodedata.combining(c))
|
| 344 |
+
return ascii_text
|
| 345 |
+
|
| 346 |
def convert_df_to_csv(df):
|
| 347 |
+
df_clean = df.copy()
|
| 348 |
+
for col in df_clean.columns:
|
| 349 |
+
if df_clean[col].dtype == 'object':
|
| 350 |
+
df_clean[col] = df_clean[col].apply(normalize_special_characters)
|
| 351 |
+
return df_clean.to_csv(index=False).encode('utf-8')
|
| 352 |
|
| 353 |
@st.cache_data
|
| 354 |
def convert_df(array):
|
| 355 |
array = pd.DataFrame(array, columns=column_names)
|
| 356 |
+
# Normalize special characters in the dataframe before export
|
| 357 |
+
for col in array.columns:
|
| 358 |
+
if array[col].dtype == 'object':
|
| 359 |
+
array[col] = array[col].apply(normalize_special_characters)
|
| 360 |
+
return array.to_csv(index=False).encode('utf-8')
|
| 361 |
|
| 362 |
@st.cache_data
|
| 363 |
def convert_pm_df(array):
|
| 364 |
array = pd.DataFrame(array)
|
| 365 |
+
# Normalize special characters in the dataframe before export
|
| 366 |
+
for col in array.columns:
|
| 367 |
+
if array[col].dtype == 'object':
|
| 368 |
+
array[col] = array[col].apply(normalize_special_characters)
|
| 369 |
+
return array.to_csv(index=False).encode('utf-8')
|
| 370 |
|
| 371 |
dk_raw, fd_raw, dk_raw_sec, fd_raw_sec, roo_raw, sd_raw, dk_sd_raw, fd_sd_raw, timestamp = load_overall_stats('NBA')
|
| 372 |
salary_dict = dict(zip(roo_raw.Player, roo_raw.Salary))
|