rasff_shiny_app / data_loader.py
MMOON's picture
Update data_loader.py
58cba83 verified
import pandas as pd
import requests
from io import BytesIO
# URL du fichier principal
MAIN_DATA_URL = "https://raw.githubusercontent.com/M00N69/RASFFPORTAL/main/unified_rasff_data_with_grouping.csv"
# Charger les données principales
def load_data(url: str) -> pd.DataFrame:
try:
df = pd.read_csv(url, parse_dates=['Date of Case'])
df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns] # Standardiser les noms de colonnes
return df
except Exception as e:
raise Exception(f"Failed to load data: {e}")
# Télécharger et nettoyer les données hebdomadaires
def download_and_clean_weekly_data(year: int, weeks: list) -> pd.DataFrame:
url_template = "https://www.sirene-diffusion.fr/regia/000-rasff/{}/rasff-{}-{}.xls"
dfs = []
for week in weeks:
url = url_template.format(str(year)[2:], year, str(week).zfill(2))
response = requests.get(url)
if response.status_code == 200:
try:
df = pd.read_excel(BytesIO(response.content))
df.columns = [col.strip().replace(" ", "_").lower() for col in df.columns] # Standardiser les noms de colonnes
dfs.append(df)
except Exception as e:
print(f"Failed to process data for week {week}: {e}")
else:
print(f"Data for week {week} could not be downloaded.")
return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
# Mappings pour les catégories de produits et de dangers
product_category_mapping = {
"fruits and vegetables": ("Fruits & Veggies", "Fresh Produce"),
"meat and meat products": ("Meat", "Animal Products"),
"fish and fish products": ("Fish", "Seafood"),
"dairy products": ("Dairy", "Animal Products"),
"cereals and bakery products": ("Cereals", "Grains"),
# Ajouter d'autres mappings ici
}
hazard_category_mapping = {
"microbiological contamination": ("Microbial", "Biological"),
"chemical contamination": ("Chemical", "Toxic"),
"foreign bodies": ("Physical", "Foreign Objects"),
"allergens": ("Allergens", "Health Risk"),
# Ajouter d'autres mappings ici
}
# Appliquer les mappings de catégories
def apply_mappings(df: pd.DataFrame) -> pd.DataFrame:
if 'product_category' in df.columns:
df[['prodcat', 'groupprod']] = df['product_category'].apply(
lambda x: pd.Series(product_category_mapping.get(str(x).lower(), ("Unknown", "Unknown")))
)
else:
print("Column 'product_category' not found in DataFrame.")
if 'hazard_category' in df.columns:
df[['hazcat', 'grouphaz']] = df['hazard_category'].apply(
lambda x: pd.Series(hazard_category_mapping.get(str(x).lower(), ("Unknown", "Unknown")))
)
else:
print("Column 'hazard_category' not found in DataFrame.")
return df
# Exemple d'utilisation
if __name__ == "__main__":
df_main = load_data(MAIN_DATA_URL)
df_main = apply_mappings(df_main)
print(df_main.head())