Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +683 -38
src/streamlit_app.py
CHANGED
|
@@ -1,40 +1,685 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import matplotlib
|
| 5 |
+
matplotlib.use('Agg') # Mode non-interactif pour matplotlib
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import seaborn as sns
|
| 8 |
+
from collections import Counter
|
| 9 |
+
import re
|
| 10 |
+
import plotly.express as px
|
| 11 |
+
import plotly.graph_objects as go
|
| 12 |
+
from plotly.subplots import make_subplots
|
| 13 |
+
from matplotlib.backends.backend_pdf import PdfPages
|
| 14 |
+
from datetime import datetime
|
| 15 |
+
import io
|
| 16 |
+
import requests
|
| 17 |
+
# from wordcloud import WordCloud # Si utilisé
|
| 18 |
+
# from sklearn.feature_extraction.text import TfidfVectorizer # Si utilisé
|
| 19 |
+
# from sklearn.cluster import KMeans # Si utilisé
|
| 20 |
+
|
| 21 |
+
# --- Configuration de la Page Streamlit ---
|
| 22 |
+
st.set_page_config(
|
| 23 |
+
page_title="Analyseur Sécurité Alimentaire IFS",
|
| 24 |
+
page_icon="🛡️",
|
| 25 |
+
layout="wide", # Utilise toute la largeur de la page
|
| 26 |
+
initial_sidebar_state="expanded" # Sidebar ouverte par défaut
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# --- Styles CSS Personnalisés (Optionnel, pour un look plus moderne) ---
|
| 30 |
+
# Vous pouvez ajouter du CSS ici pour personnaliser davantage l'apparence.
|
| 31 |
+
# Exemple : st.markdown("<style>...</style>", unsafe_allow_html=True)
|
| 32 |
+
# Pour l'instant, nous allons nous concentrer sur la structure.
|
| 33 |
+
|
| 34 |
+
# --- Classe IFSAnalyzer (copiez-collez votre classe ici) ---
|
| 35 |
+
# Assurez-vous que les méthodes de la classe sont adaptées pour Streamlit :
|
| 36 |
+
# - Elles devraient retourner des données (DataFrames, dicts, figures Plotly/Matplotlib)
|
| 37 |
+
# - La génération PDF devrait enregistrer le fichier et retourner son chemin.
|
| 38 |
+
class IFSAnalyzer:
|
| 39 |
+
def __init__(self, locked_file_io, checklist_file_io=None):
|
| 40 |
+
self.locked_df = None
|
| 41 |
+
self.checklist_df = None
|
| 42 |
+
self.themes_keywords_definition = {
|
| 43 |
+
'HYGIENE_PERSONNEL': ['hygien', 'personnel', 'clothing', 'hand', 'wash', 'uniform', 'gmp', 'locker', 'changing room', 'work clothing', 'protective clothes'],
|
| 44 |
+
'HACCP_CCP_OPRP': ['haccp', 'ccp', 'oprp', 'critical', 'control', 'point', 'monitoring', 'hazard analysis', 'validation haccp', '2.3.9.1'],
|
| 45 |
+
'TRACEABILITY': ['traceability', 'trace', 'record', 'batch', 'lot', 'identification', 'tracking', '4.18.1'],
|
| 46 |
+
'ALLERGEN_MANAGEMENT': ['allergen', 'allergy', 'cross-contamination', 'gluten', 'lactose', 'celery', 'mustard', 'wheat', 'egg', '4.19.2'],
|
| 47 |
+
'PEST_CONTROL': ['pest', 'rodent', 'insect', 'trap', 'bait', 'infestation', 'fly', 'mouse', 'rat', 'moth', 'weevil', 'spider', 'cobweb', '4.13.1', '4.13.2'],
|
| 48 |
+
'CLEANING_SANITATION': ['clean', 'sanitation', 'disinfect', 'chemical', 'cleaning plan', 'dirt', 'residue', '4.10.1', '4.10.2'],
|
| 49 |
+
'TEMPERATURE_CONTROL': ['temperature', 'cold', 'heat', 'refrigerat', 'freez', 'thaw', 'cooling'],
|
| 50 |
+
'MAINTENANCE_EQUIPMENT': ['maintenance', 'equipment', 'calibrat', 'repair', 'infrastructure', 'facility', 'structure', 'conveyor', '4.9.1.1', '4.16.5', '4.17.2', '4.17.4'],
|
| 51 |
+
'DOCUMENTATION_RECORDS': ['document', 'procedure', 'record', 'manual', 'specification', 'not documented', '5.1.1', '5.1.2', '5.3.2'],
|
| 52 |
+
'FOREIGN_BODY_CONTAMINATION': ['foreign body', 'foreign material', 'glass', 'metal', 'detect', 'x-ray', 'contaminat', 'wood', 'plastic', '4.12.1', '4.12.2', '4.12.3'],
|
| 53 |
+
'STORAGE_WAREHOUSING': ['storage', 'warehouse', 'stock', 'segregat', 'pallet', 'raw material storage', '4.14.3', '4.14.5'],
|
| 54 |
+
'SUPPLIER_RAW_MATERIAL_CONTROL': ['supplier', 'vendor', 'purchase', 'raw material', 'ingredient', 'packaging material', 'declaration of conformity', 'doc', '4.5.1', '4.5.2'],
|
| 55 |
+
'LABELLING': ['label', 'labelling', 'declaration', 'ingredient list', 'mrl', 'allergen labelling', 'nutritional information', '4.3.1', '4.3.2'],
|
| 56 |
+
'QUANTITY_CONTROL_WEIGHT': ['quantity control', 'weight', 'fill', 'scale', 'metrological', 'underfilling', '5.5.1', '5.5.2', '5.4.1', '5.4.2'],
|
| 57 |
+
'MANAGEMENT_RESPONSIBILITY_CULTURE': ['management', 'responsibilit', 'food safety culture', 'internal audit', 'corrective action', 'training', '1.1.1', '1.1.2', '1.2.1', '5.11.1', '5.11.2', '5.11.3', '5.11.4'],
|
| 58 |
+
'NON_PAYMENT_ADMINISTRATIVE': ['payment', 'invoice', 'pay', 'closure', 'discontinued', 'bankrupt', 'denies access', 'ceased operation', 'fire', 'merged'],
|
| 59 |
+
'INTEGRITY_PROGRAM_ISSUES': ['integrity', 'on-site check', 'ioc', 'unannounced audit', 'integrity on-site check', 'integrity on site check']
|
| 60 |
+
}
|
| 61 |
+
self.load_data(locked_file_io, checklist_file_io)
|
| 62 |
+
if self.locked_df is not None:
|
| 63 |
+
self.clean_lock_reasons()
|
| 64 |
+
|
| 65 |
+
def load_data(self, locked_file_io, checklist_file_io=None):
|
| 66 |
+
try:
|
| 67 |
+
self.locked_df = pd.read_csv(locked_file_io, encoding='utf-8')
|
| 68 |
+
# print(f"✅ Données de suspension chargées: {len(self.locked_df)} lignes initiales.")
|
| 69 |
+
if 'Standard' in self.locked_df.columns:
|
| 70 |
+
self.locked_df = self.locked_df[self.locked_df['Standard'].str.contains('IFS Food', na=False, case=False)]
|
| 71 |
+
# print(f"📋 Après filtrage IFS Food: {len(self.locked_df)} lignes.")
|
| 72 |
+
if checklist_file_io:
|
| 73 |
+
try:
|
| 74 |
+
self.checklist_df = pd.read_csv(checklist_file_io, encoding='utf-8')
|
| 75 |
+
# print(f"✅ Checklist IFS chargée: {len(self.checklist_df)} exigences.")
|
| 76 |
+
if 'Requirement Number' not in self.checklist_df.columns or \
|
| 77 |
+
'Requirement text (English)' not in self.checklist_df.columns:
|
| 78 |
+
st.warning("Colonnes 'Requirement Number' ou 'Requirement text (English)' manquantes dans la checklist. L'analyse des exigences sera limitée.")
|
| 79 |
+
self.checklist_df = None
|
| 80 |
+
except Exception as e_checklist:
|
| 81 |
+
st.error(f"Erreur lors du chargement du fichier checklist : {e_checklist}")
|
| 82 |
+
self.checklist_df = None
|
| 83 |
+
# else:
|
| 84 |
+
# print("ℹ️ Fichier checklist non fourni. L'analyse des exigences sera basée sur les numéros de chapitre uniquement.")
|
| 85 |
+
except Exception as e:
|
| 86 |
+
st.error(f"❌ Erreur lors du chargement du fichier des suspensions : {e}")
|
| 87 |
+
self.locked_df = None
|
| 88 |
+
|
| 89 |
+
def clean_lock_reasons(self):
|
| 90 |
+
if self.locked_df is None or 'Lock reason' not in self.locked_df.columns: return
|
| 91 |
+
self.locked_df['lock_reason_clean'] = self.locked_df['Lock reason'].astype(str).fillna('') \
|
| 92 |
+
.str.lower() \
|
| 93 |
+
.str.replace(r'[\n\r\t]', ' ', regex=True) \
|
| 94 |
+
.str.replace(r'[^\w\s\.\-\/\%]', ' ', regex=True) \
|
| 95 |
+
.str.replace(r'\s+', ' ', regex=True).str.strip()
|
| 96 |
+
|
| 97 |
+
def extract_ifs_chapters(self, text):
|
| 98 |
+
if pd.isna(text) or not isinstance(text, str) or text.strip() == '': return []
|
| 99 |
+
patterns = [
|
| 100 |
+
r'(?:ko|major|cl\.|req\.|requirement(?: item)?|chapter|section|point|§|cl\s+|clause)?\s*(\d\.\d{1,2}(?:\.\d{1,2})?)', # X.Y ou X.Y.Z avec préfixes
|
| 101 |
+
r'(\d\.\d{1,2}(?:\.\d{1,2})?)\s*(?:ko|major|:|-|\(ko\)|\(major\))', # X.Y ou X.Y.Z suivi de KO/Major
|
| 102 |
+
r'(\d{1,2})\s*-\s*ko', # Ex: 5.11.3 - KO (capture le numéro seul)
|
| 103 |
+
r'requirement\s+(\d\.\d\.\d)',
|
| 104 |
+
r'cl\s+(\d\.\d+(?:\.\d+)?)', # Ex: cl 4.12.1
|
| 105 |
+
r'§\s*(\d\.\d+(?:\.\d+)?)' # Ex: § 4.13.1
|
| 106 |
+
]
|
| 107 |
+
chapters_found = []
|
| 108 |
+
normalized_text = text.lower().replace('\n', ' ').replace('\r', ' ')
|
| 109 |
+
for pattern in patterns:
|
| 110 |
+
matches = re.findall(pattern, normalized_text)
|
| 111 |
+
for match in matches:
|
| 112 |
+
chapter_num = match if isinstance(match, str) else (match[-1] if isinstance(match, tuple) and match[-1] else match[0] if isinstance(match, tuple) and match[0] else None)
|
| 113 |
+
if chapter_num:
|
| 114 |
+
chapter_num = chapter_num.strip().rstrip('.').strip()
|
| 115 |
+
if re.fullmatch(r'\d(\.\d+){1,2}', chapter_num) or re.fullmatch(r'\d\.\d+', chapter_num) :
|
| 116 |
+
main_chapter_part = chapter_num.split('.')[0]
|
| 117 |
+
if main_chapter_part.isdigit() and 1 <= int(main_chapter_part) <= 6:
|
| 118 |
+
chapters_found.append(chapter_num)
|
| 119 |
+
return sorted(list(set(chapters_found)))
|
| 120 |
+
|
| 121 |
+
def analyze_themes(self):
|
| 122 |
+
if self.locked_df is None or 'lock_reason_clean' not in self.locked_df.columns: return {}, {}
|
| 123 |
+
theme_counts = {theme: 0 for theme in self.themes_keywords_definition}
|
| 124 |
+
theme_details = {theme: [] for theme in self.themes_keywords_definition}
|
| 125 |
+
for index, row in self.locked_df.iterrows():
|
| 126 |
+
reason_text = row['lock_reason_clean']
|
| 127 |
+
original_reason = row['Lock reason']
|
| 128 |
+
supplier = row['Supplier']
|
| 129 |
+
country = row.get('Country/Region', 'N/A')
|
| 130 |
+
for theme, keywords in self.themes_keywords_definition.items():
|
| 131 |
+
if any(keyword in reason_text for keyword in keywords):
|
| 132 |
+
theme_counts[theme] += 1
|
| 133 |
+
theme_details[theme].append({
|
| 134 |
+
"reason": original_reason,
|
| 135 |
+
"supplier": supplier,
|
| 136 |
+
"country": country
|
| 137 |
+
})
|
| 138 |
+
return theme_counts, theme_details
|
| 139 |
+
|
| 140 |
+
def geographic_analysis(self):
|
| 141 |
+
if self.locked_df is None or 'Country/Region' not in self.locked_df.columns: return None
|
| 142 |
+
return self.locked_df.groupby('Country/Region').size().sort_values(ascending=False).reset_index(name='total_suspensions')
|
| 143 |
+
|
| 144 |
+
def clean_product_scopes(self, scope_text):
|
| 145 |
+
if pd.isna(scope_text): return []
|
| 146 |
+
scope_text = str(scope_text)
|
| 147 |
+
raw_scopes = re.split(r'[,;\s"\'’`]|et|\/|&|\.\s', scope_text) # Ajout de guillemets et point suivi d'espace
|
| 148 |
+
cleaned_scopes = []
|
| 149 |
+
for scope in raw_scopes:
|
| 150 |
+
scope = scope.strip().replace('"', '').replace("'", "")
|
| 151 |
+
if not scope or not scope.isdigit(): continue # Ignorer si vide ou non numérique après nettoyage
|
| 152 |
+
num = int(scope)
|
| 153 |
+
if 1 <= num <= 11:
|
| 154 |
+
cleaned_scopes.append(str(num))
|
| 155 |
+
elif num > 1000: # ex: 2005, 2007, 2009, 2010
|
| 156 |
+
potential_scope_2 = str(num % 100) # Pour 10, 11
|
| 157 |
+
potential_scope_1 = str(num % 10) # Pour 1-9
|
| 158 |
+
if potential_scope_2 in ['10', '11']:
|
| 159 |
+
cleaned_scopes.append(potential_scope_2)
|
| 160 |
+
elif potential_scope_1 in [str(i) for i in range(1,10)]:
|
| 161 |
+
cleaned_scopes.append(potential_scope_1)
|
| 162 |
+
return list(set(cleaned_scopes))
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def product_scope_analysis(self):
|
| 166 |
+
if self.locked_df is None or 'Product scopes' not in self.locked_df.columns: return None
|
| 167 |
+
all_scopes = []
|
| 168 |
+
for scopes_text in self.locked_df['Product scopes'].dropna():
|
| 169 |
+
cleaned = self.clean_product_scopes(scopes_text)
|
| 170 |
+
all_scopes.extend(cleaned)
|
| 171 |
+
return Counter(all_scopes)
|
| 172 |
+
|
| 173 |
+
def chapter_frequency_analysis(self):
|
| 174 |
+
if self.locked_df is None or 'Lock reason' not in self.locked_df.columns: return Counter()
|
| 175 |
+
all_chapters = []
|
| 176 |
+
for reason in self.locked_df['Lock reason'].dropna():
|
| 177 |
+
all_chapters.extend(self.extract_ifs_chapters(reason))
|
| 178 |
+
return Counter(all_chapters)
|
| 179 |
+
|
| 180 |
+
def analyze_audit_types(self):
|
| 181 |
+
if self.locked_df is None: return {}, {}
|
| 182 |
+
audit_keywords = {
|
| 183 |
+
'INTEGRITY_PROGRAM_IP': ['integrity program', 'integrity', 'programme intégrité', 'programme integrity','onsite check', 'on site check', 'on-site check', 'on-site integrity check', 'ioc', 'i.o.c', 'ip audit', 'integrity audit', 'spot check', 'unannounced audit', 'audit inopiné', 'control inopiné', 'ifs integrity'],
|
| 184 |
+
'SURVEILLANCE_FOLLOW_UP': ['surveillance', 'surveillance audit', 'follow up audit', 'follow-up', 'suivi', 'corrective action'],
|
| 185 |
+
'COMPLAINT_WITHDRAWAL': ['complaint', 'réclamation', 'plainte', 'customer complaint', 'withdrawal', 'retrait', 'recall'],
|
| 186 |
+
'RECERTIFICATION_RENEWAL': ['recertification', 'renewal', 'renouvellement', 're-certification', 'renewal audit']
|
| 187 |
+
}
|
| 188 |
+
audit_analysis = {audit_type: 0 for audit_type in audit_keywords}
|
| 189 |
+
audit_examples = {audit_type: {'examples': [], 'countries': Counter()} for audit_type in audit_keywords}
|
| 190 |
+
for index, row in self.locked_df.iterrows():
|
| 191 |
+
text_to_search = (str(row.get('Lock reason', '')) + " " + str(row.get('Lock history', ''))).lower()
|
| 192 |
+
for audit_type, keywords in audit_keywords.items():
|
| 193 |
+
if any(keyword in text_to_search for keyword in keywords):
|
| 194 |
+
audit_analysis[audit_type] += 1
|
| 195 |
+
if len(audit_examples[audit_type]['examples']) < 5: # Plus d'exemples
|
| 196 |
+
audit_examples[audit_type]['examples'].append({
|
| 197 |
+
'Supplier': row.get('Supplier', 'N/A'),
|
| 198 |
+
'Country/Region': row.get('Country/Region', 'N/A'),
|
| 199 |
+
'Lock reason': row.get('Lock reason', 'N/A')
|
| 200 |
+
})
|
| 201 |
+
audit_examples[audit_type]['countries'][row.get('Country/Region', 'N/A')] += 1
|
| 202 |
+
for audit_type in audit_examples:
|
| 203 |
+
audit_examples[audit_type]['countries'] = dict(audit_examples[audit_type]['countries'].most_common(5))
|
| 204 |
+
return audit_analysis, audit_examples
|
| 205 |
+
|
| 206 |
+
def generate_ifs_recommendations_analysis(self):
|
| 207 |
+
if self.locked_df is None or self.checklist_df is None: return None
|
| 208 |
+
chapter_counts = self.chapter_frequency_analysis()
|
| 209 |
+
if not chapter_counts: return None
|
| 210 |
+
recommendations = []
|
| 211 |
+
for chapter, count in chapter_counts.most_common():
|
| 212 |
+
norm_chapter = chapter.replace("KO ", "").strip()
|
| 213 |
+
req_text_series = self.checklist_df[self.checklist_df['Requirement Number'].astype(str).str.strip() == norm_chapter]['Requirement text (English)']
|
| 214 |
+
req_text = req_text_series.iloc[0] if not req_text_series.empty else "Texte de l'exigence non trouvé."
|
| 215 |
+
# if len(req_text) > 250: req_text = req_text[:247] + "..." # Tronquer pour affichage résumé
|
| 216 |
+
recommendations.append({'chapter': chapter, 'count': count, 'requirement_text': req_text})
|
| 217 |
+
return recommendations
|
| 218 |
+
|
| 219 |
+
def cross_analysis_scope_themes(self):
|
| 220 |
+
if self.locked_df is None or 'Product scopes' not in self.locked_df.columns or 'lock_reason_clean' not in self.locked_df.columns: return None
|
| 221 |
+
themes_for_cross = {
|
| 222 |
+
'HYGIENE': self.themes_keywords_definition['HYGIENE_PERSONNEL'], 'HACCP': self.themes_keywords_definition['HACCP_CCP_OPRP'],
|
| 223 |
+
'TRACE': self.themes_keywords_definition['TRACEABILITY'], 'ALLERGEN': self.themes_keywords_definition['ALLERGEN_MANAGEMENT'],
|
| 224 |
+
'CLEAN': self.themes_keywords_definition['CLEANING_SANITATION'], 'MAINT': self.themes_keywords_definition['MAINTENANCE_EQUIPMENT'],
|
| 225 |
+
'LABEL': self.themes_keywords_definition['LABELLING'], 'PEST': self.themes_keywords_definition['PEST_CONTROL']
|
| 226 |
+
}
|
| 227 |
+
scope_theme_data = []
|
| 228 |
+
for idx, row in self.locked_df.iterrows():
|
| 229 |
+
scopes_text, reason_text = row['Product scopes'], row['lock_reason_clean']
|
| 230 |
+
if pd.notna(scopes_text) and pd.notna(reason_text) and reason_text:
|
| 231 |
+
for scope in self.clean_product_scopes(scopes_text):
|
| 232 |
+
for theme, keywords in themes_for_cross.items():
|
| 233 |
+
if any(kw in reason_text for kw in keywords):
|
| 234 |
+
scope_theme_data.append({'scope': f"Scope {scope}", 'theme': theme})
|
| 235 |
+
if not scope_theme_data: return None
|
| 236 |
+
return pd.DataFrame(scope_theme_data).pivot_table(index='scope', columns='theme', aggfunc='size', fill_value=0)
|
| 237 |
+
|
| 238 |
+
def _create_plotly_bar_chart(self, data_dict, title, orientation='v', xaxis_title="", yaxis_title="", color='royalblue', height=400):
|
| 239 |
+
if not data_dict : return go.Figure()
|
| 240 |
+
if orientation == 'h':
|
| 241 |
+
y_data = list(data_dict.keys())
|
| 242 |
+
x_data = list(data_dict.values())
|
| 243 |
+
else:
|
| 244 |
+
x_data = list(data_dict.keys())
|
| 245 |
+
y_data = list(data_dict.values())
|
| 246 |
+
|
| 247 |
+
fig = go.Figure(go.Bar(x=x_data, y=y_data, orientation=orientation, marker_color=color, text=x_data if orientation=='h' else y_data, textposition='auto'))
|
| 248 |
+
fig.update_layout(
|
| 249 |
+
title={'text': title, 'x':0.5, 'font': {'size': 16}},
|
| 250 |
+
xaxis_title=xaxis_title,
|
| 251 |
+
yaxis_title=yaxis_title,
|
| 252 |
+
height=height,
|
| 253 |
+
margin=dict(l=10, r=10, t=50, b=10),
|
| 254 |
+
yaxis=dict(autorange="reversed") if orientation == 'h' else {},
|
| 255 |
+
plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)'
|
| 256 |
+
)
|
| 257 |
+
return fig
|
| 258 |
+
|
| 259 |
+
def _create_plotly_choropleth_map(self, geo_data_df, title, height=500):
|
| 260 |
+
if geo_data_df is None or geo_data_df.empty: return go.Figure()
|
| 261 |
+
fig = px.choropleth(geo_data_df, locations="Country/Region",
|
| 262 |
+
locationmode='country names', # S'assurer que les noms de pays sont compatibles
|
| 263 |
+
color="total_suspensions",
|
| 264 |
+
hover_name="Country/Region",
|
| 265 |
+
color_continuous_scale=px.colors.sequential.Plasma,
|
| 266 |
+
title=title,
|
| 267 |
+
height=height)
|
| 268 |
+
fig.update_layout(
|
| 269 |
+
title={'x':0.5, 'font': {'size': 16}},
|
| 270 |
+
geo=dict(showframe=False, showcoastlines=False, projection_type='equirectangular'),
|
| 271 |
+
margin=dict(l=10, r=10, t=50, b=10),
|
| 272 |
+
plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)'
|
| 273 |
+
)
|
| 274 |
+
return fig
|
| 275 |
+
|
| 276 |
+
def _create_plotly_heatmap(self, pivot_matrix, title, height=500):
|
| 277 |
+
if pivot_matrix is None or pivot_matrix.empty: return go.Figure()
|
| 278 |
+
fig = px.imshow(pivot_matrix, text_auto=True, aspect="auto", color_continuous_scale='YlGnBu', title=title, height=height)
|
| 279 |
+
fig.update_layout(
|
| 280 |
+
title={'x':0.5, 'font': {'size': 16}},
|
| 281 |
+
margin=dict(l=10, r=10, t=50, b=10),
|
| 282 |
+
xaxis=dict(tickangle=45),
|
| 283 |
+
plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)'
|
| 284 |
+
)
|
| 285 |
+
return fig
|
| 286 |
+
|
| 287 |
+
# --- Méthodes pour le rapport PDF (utilisant Matplotlib) ---
|
| 288 |
+
def _add_text_to_pdf_page(self, fig, text_lines, start_y=0.95, line_height=0.035, font_size=9, title="", title_font_size=14, max_chars_per_line=100):
|
| 289 |
+
ax = fig.gca()
|
| 290 |
+
ax.clear()
|
| 291 |
+
ax.axis('off')
|
| 292 |
+
if title:
|
| 293 |
+
ax.text(0.5, start_y, title, ha='center', va='top', fontsize=title_font_size, fontweight='bold')
|
| 294 |
+
start_y -= (line_height * 2.5)
|
| 295 |
+
current_y = start_y
|
| 296 |
+
for line in text_lines:
|
| 297 |
+
wrapped_lines = [line[i:i+max_chars_per_line] for i in range(0, len(line), max_chars_per_line)]
|
| 298 |
+
for wrapped_line in wrapped_lines:
|
| 299 |
+
if current_y < 0.05: return False # Page pleine
|
| 300 |
+
fw = 'bold' if line.startswith(tuple(["🎯","📊","🌍","🏭","📋","🔍"])) else 'normal'
|
| 301 |
+
fs = font_size + 1 if fw == 'bold' else font_size
|
| 302 |
+
ax.text(0.03, current_y, wrapped_line, ha='left', va='top', fontsize=fs, fontweight=fw)
|
| 303 |
+
current_y -= line_height
|
| 304 |
+
if not line.strip(): current_y -= (line_height * 0.5)
|
| 305 |
+
return True
|
| 306 |
+
|
| 307 |
+
def _create_matplotlib_figure_for_pdf(self, data_dict_or_df, title, x_label="", y_label="", chart_type='barh', top_n=10, color='skyblue', xtick_rotation=0, ytick_fontsize=8):
|
| 308 |
+
if not data_dict_or_df and not isinstance(data_dict_or_df, pd.DataFrame) : return None
|
| 309 |
+
fig, ax = plt.subplots(figsize=(10, 6.5))
|
| 310 |
+
items, values = [], []
|
| 311 |
+
|
| 312 |
+
if isinstance(data_dict_or_df, (Counter, dict)):
|
| 313 |
+
sorted_data = dict(sorted(data_dict_or_df.items(), key=lambda item: item[1], reverse=True)[:top_n])
|
| 314 |
+
items = [str(k).replace('_',' ').replace('MANAGEMENT','MGMT').replace('RESPONSIBILITY','RESP.')[:30] for k in sorted_data.keys()] # Tronquer labels longs
|
| 315 |
+
values = list(sorted_data.values())
|
| 316 |
+
elif isinstance(data_dict_or_df, pd.DataFrame):
|
| 317 |
+
df_top = data_dict_or_df.head(top_n)
|
| 318 |
+
if 'Country/Region' in df_top.columns and 'total_suspensions' in df_top.columns:
|
| 319 |
+
items = df_top['Country/Region'].tolist()
|
| 320 |
+
values = df_top['total_suspensions'].tolist()
|
| 321 |
+
chart_type = 'bar'
|
| 322 |
+
elif 'chapter' in df_top.columns and 'count' in df_top.columns and 'requirement_text' in df_top.columns:
|
| 323 |
+
items = [f"{row['chapter']}\n({row['requirement_text'][:35]}...)" if row['requirement_text'] != "Texte de l'exigence non trouvé." else row['chapter'] for index, row in df_top.iterrows()]
|
| 324 |
+
values = df_top['count'].tolist()
|
| 325 |
+
chart_type = 'bar'
|
| 326 |
+
else: # Cas générique
|
| 327 |
+
if not df_top.empty:
|
| 328 |
+
items = df_top.index.astype(str).tolist() if len(df_top.columns) == 1 else df_top.iloc[:,0].astype(str).tolist()
|
| 329 |
+
values = df_top.iloc[:,0].tolist() if len(df_top.columns) == 1 else df_top.iloc[:,1].tolist()
|
| 330 |
+
|
| 331 |
+
if not items or not values or all(v == 0 for v in values): return None
|
| 332 |
+
|
| 333 |
+
if chart_type == 'barh':
|
| 334 |
+
ax.barh(items, values, color=color, edgecolor='grey')
|
| 335 |
+
ax.set_yticklabels(items, fontsize=ytick_fontsize)
|
| 336 |
+
ax.invert_yaxis()
|
| 337 |
+
ax.set_xlabel(x_label if x_label else 'Nombre de cas', fontsize=10)
|
| 338 |
+
for i, v_ in enumerate(values): ax.text(v_ + (max(values)*0.01), i, str(v_), va='center', fontsize=8)
|
| 339 |
+
ax.set_xlim(0, max(values) * 1.12 if values else 1)
|
| 340 |
+
elif chart_type == 'bar':
|
| 341 |
+
bars = ax.bar(items, values, color=color, edgecolor='grey')
|
| 342 |
+
ax.set_xticklabels(items, rotation=xtick_rotation, ha='right' if xtick_rotation > 0 else 'center', fontsize=ytick_fontsize)
|
| 343 |
+
ax.set_ylabel(y_label if y_label else 'Nombre de cas', fontsize=10)
|
| 344 |
+
for bar in bars:
|
| 345 |
+
yval = bar.get_height()
|
| 346 |
+
ax.text(bar.get_x() + bar.get_width()/2.0, yval + (max(values)*0.01), int(yval), ha='center', va='bottom', fontsize=8)
|
| 347 |
+
ax.set_ylim(0, max(values) * 1.12 if values else 1)
|
| 348 |
+
|
| 349 |
+
ax.set_title(title, fontsize=13, fontweight='bold', pad=15)
|
| 350 |
+
ax.grid(axis='x' if chart_type == 'barh' else 'y', linestyle='--', alpha=0.7)
|
| 351 |
+
sns.despine(left=True, bottom=True)
|
| 352 |
+
plt.tight_layout(pad=1.5)
|
| 353 |
+
return fig
|
| 354 |
+
|
| 355 |
+
def export_report_to_pdf(self, filename='IFS_Analysis_Report.pdf'):
|
| 356 |
+
if self.locked_df is None: return None
|
| 357 |
+
try:
|
| 358 |
+
with PdfPages(filename) as pdf:
|
| 359 |
+
# print(f"📄 Génération du rapport PDF: {filename}")
|
| 360 |
+
total_suspensions = len(self.locked_df)
|
| 361 |
+
if total_suspensions == 0:
|
| 362 |
+
fig = plt.figure(figsize=(8.5, 11)); self._add_text_to_pdf_page(fig, ["Aucune donnée."], title="Rapport"); pdf.savefig(fig); plt.close(fig); return filename
|
| 363 |
+
|
| 364 |
+
# Page 1: Couverture
|
| 365 |
+
fig = plt.figure(figsize=(8.5, 11))
|
| 366 |
+
title_page_text = [ f"Date: {datetime.now().strftime('%d/%m/%Y %H:%M')}", "",
|
| 367 |
+
f"Source Suspensions: {st.session_state.get('locked_file_name_original', 'N/A')}",
|
| 368 |
+
f"Source Checklist: {st.session_state.get('checklist_file_name_original', 'Non fournie')}", "",
|
| 369 |
+
"📊 VUE D'ENSEMBLE",
|
| 370 |
+
f" • Total suspensions IFS Food: {total_suspensions}",
|
| 371 |
+
f" • Avec motifs: {self.locked_df['Lock reason'].notna().sum()} ({self.locked_df['Lock reason'].notna().sum()/total_suspensions*100:.1f}%)" ]
|
| 372 |
+
audit_s, _ = self.analyze_audit_types(); total_as = sum(audit_s.values())
|
| 373 |
+
title_page_text.append(f" • Liées à audits spécifiques: {total_as} ({total_as/total_suspensions*100:.1f}%)")
|
| 374 |
+
self._add_text_to_pdf_page(fig, title_page_text, title="Rapport d'Analyse IFS Food Safety"); pdf.savefig(fig, bbox_inches='tight'); plt.close(fig)
|
| 375 |
+
|
| 376 |
+
# Graphiques
|
| 377 |
+
tc, _ = self.analyze_themes(); fig_t = self._create_matplotlib_figure_for_pdf(tc, 'Top 10 Thèmes NC', color='indianred', ytick_fontsize=7);
|
| 378 |
+
if fig_t: pdf.savefig(fig_t, bbox_inches='tight'); plt.close(fig_t)
|
| 379 |
+
|
| 380 |
+
gs = self.geographic_analysis(); fig_g = self._create_matplotlib_figure_for_pdf(gs, 'Top 10 Pays', chart_type='bar', color='lightseagreen', xtick_rotation=30, ytick_fontsize=7);
|
| 381 |
+
if fig_g: pdf.savefig(fig_g, bbox_inches='tight'); plt.close(fig_g)
|
| 382 |
+
|
| 383 |
+
sc = self.product_scope_analysis(); sc_plot = {f"Sc {k}": v for k,v in sc.items()}; fig_s = self._create_matplotlib_figure_for_pdf(sc_plot, 'Top 10 Product Scopes', color='cornflowerblue', ytick_fontsize=7);
|
| 384 |
+
if fig_s: pdf.savefig(fig_s, bbox_inches='tight'); plt.close(fig_s)
|
| 385 |
+
|
| 386 |
+
reco = self.generate_ifs_recommendations_analysis()
|
| 387 |
+
if reco:
|
| 388 |
+
df_reco = pd.DataFrame(reco)
|
| 389 |
+
fig_c = self._create_matplotlib_figure_for_pdf(df_reco, 'Top 10 Exigences IFS', chart_type='bar', color='gold', xtick_rotation=30, ytick_fontsize=6);
|
| 390 |
+
else:
|
| 391 |
+
cc_direct = self.chapter_frequency_analysis()
|
| 392 |
+
fig_c = self._create_matplotlib_figure_for_pdf(cc_direct, 'Top 10 Chapitres IFS (Numéros)', chart_type='bar', color='gold', xtick_rotation=30, ytick_fontsize=7);
|
| 393 |
+
if fig_c: pdf.savefig(fig_c, bbox_inches='tight'); plt.close(fig_c)
|
| 394 |
+
|
| 395 |
+
# Heatmap
|
| 396 |
+
cpm = self.cross_analysis_scope_themes()
|
| 397 |
+
if cpm is not None and not cpm.empty:
|
| 398 |
+
top_n = min(8, len(cpm.index)); scope_tots = cpm.sum(axis=1).sort_values(ascending=False)
|
| 399 |
+
cpm_f = cpm.loc[scope_tots.head(top_n).index] if len(cpm.index) > top_n else cpm
|
| 400 |
+
if not cpm_f.empty:
|
| 401 |
+
fig_h, ax_h = plt.subplots(figsize=(10, max(5, len(cpm_f.index)*0.6)))
|
| 402 |
+
sns.heatmap(cpm_f, annot=True, cmap="YlGnBu", fmt='d', ax=ax_h, annot_kws={"size":7});
|
| 403 |
+
ax_h.set_title('Corrélations: Thèmes vs Scopes (Top)', fontsize=13, pad=15)
|
| 404 |
+
ax_h.tick_params(axis='x', labelsize=8, rotation=30, ha='right'); ax_h.tick_params(axis='y', labelsize=8)
|
| 405 |
+
plt.tight_layout(pad=1.5); pdf.savefig(fig_h, bbox_inches='tight'); plt.close(fig_h)
|
| 406 |
+
|
| 407 |
+
# Pages Texte Détaillées
|
| 408 |
+
for gen_func, title_str, lh, fs, mcpl in [
|
| 409 |
+
(self.generate_detailed_theme_analysis, "Analyse Thématique Détaillée", 0.03, 8, 110),
|
| 410 |
+
(self.generate_audit_analysis_report, "Analyse des Types d'Audits", 0.03, 8, 110)
|
| 411 |
+
]:
|
| 412 |
+
fig = plt.figure(figsize=(8.5, 11)); s_io = io.StringIO(); gen_func(stream=s_io)
|
| 413 |
+
self._add_text_to_pdf_page(fig, s_io.getvalue().splitlines(), title=title_str, line_height=lh, font_size=fs, max_chars_per_line=mcpl)
|
| 414 |
+
pdf.savefig(fig, bbox_inches='tight'); plt.close(fig)
|
| 415 |
+
|
| 416 |
+
if reco:
|
| 417 |
+
fig = plt.figure(figsize=(8.5, 11))
|
| 418 |
+
req_tl = ["Note: Texte de l'exigence de la checklist IFS Food v8 (si fournie).\n"]
|
| 419 |
+
for r_ in sorted(reco, key=lambda x: x['count'], reverse=True)[:20]:
|
| 420 |
+
req_tl.extend([f"📋 Chap {r_['chapter']} ({r_['count']} mentions)", f" Txt: {r_['requirement_text']}", ""])
|
| 421 |
+
self._add_text_to_pdf_page(fig, req_tl, title="Détail Exigences IFS", line_height=0.028, font_size=7, max_chars_per_line=120)
|
| 422 |
+
pdf.savefig(fig, bbox_inches='tight'); plt.close(fig)
|
| 423 |
+
|
| 424 |
+
# print(f"✅ Rapport PDF complet généré: {filename}")
|
| 425 |
+
return filename
|
| 426 |
+
except Exception as e:
|
| 427 |
+
st.error(f"❌ Erreur majeure PDF: {e}")
|
| 428 |
+
return None # Pas de fallback txt dans Streamlit, l'erreur est affichée
|
| 429 |
+
|
| 430 |
+
# --- Fonctions Utilitaires pour Streamlit ---
|
| 431 |
+
@st.cache_data # Mise en cache des données téléchargées pour éviter de recharger
|
| 432 |
+
def load_csv_data(uploaded_file):
|
| 433 |
+
if uploaded_file is not None:
|
| 434 |
+
try:
|
| 435 |
+
return pd.read_csv(uploaded_file)
|
| 436 |
+
except Exception as e:
|
| 437 |
+
st.error(f"Erreur lors de la lecture du fichier CSV : {e}")
|
| 438 |
+
return None
|
| 439 |
+
return None
|
| 440 |
+
|
| 441 |
+
@st.cache_resource # Mise en cache de l'objet analyseur
|
| 442 |
+
def get_analyzer(locked_data_io, checklist_data_io):
|
| 443 |
+
return IFSAnalyzer(locked_data_io, checklist_data_io)
|
| 444 |
+
|
| 445 |
+
def download_checklist_from_github_st(url="https://raw.githubusercontent.com/M00N69/Action-plan/main/Guide%20Checklist_IFS%20Food%20V%208%20-%20CHECKLIST.csv"):
|
| 446 |
+
try:
|
| 447 |
+
response = requests.get(url, timeout=10) # Ajout d'un timeout
|
| 448 |
+
response.raise_for_status()
|
| 449 |
+
return io.StringIO(response.text)
|
| 450 |
+
except requests.exceptions.RequestException as e:
|
| 451 |
+
st.warning(f"Impossible de télécharger la checklist depuis GitHub ({e}). L'analyse des exigences sera limitée.")
|
| 452 |
+
return None
|
| 453 |
+
|
| 454 |
+
# --- Interface Streamlit ---
|
| 455 |
+
def main():
|
| 456 |
+
st.title("🛡️ Analyseur de Sécurité Alimentaire IFS")
|
| 457 |
+
st.markdown("Téléversez votre fichier de suspensions IFS pour générer une analyse détaillée et des visualisations.")
|
| 458 |
+
|
| 459 |
+
# --- Sidebar pour les options ---
|
| 460 |
+
with st.sidebar:
|
| 461 |
+
st.header("⚙️ Options d'Analyse")
|
| 462 |
+
locked_file_uploaded = st.file_uploader("1. Fichier des suspensions IFS (.csv)", type="csv", key="locked_uploader")
|
| 463 |
+
|
| 464 |
+
st.markdown("---")
|
| 465 |
+
checklist_source = st.radio(
|
| 466 |
+
"2. Source de la Checklist IFS Food V8:",
|
| 467 |
+
("Utiliser celle de GitHub (Recommandé)", "Téléverser ma checklist", "Ne pas utiliser de checklist"),
|
| 468 |
+
key="checklist_source_radio"
|
| 469 |
+
)
|
| 470 |
+
checklist_file_uploaded = None
|
| 471 |
+
if checklist_source == "Téléverser ma checklist":
|
| 472 |
+
checklist_file_uploaded = st.file_uploader("Téléversez votre fichier checklist (.csv)", type="csv", key="checklist_uploader")
|
| 473 |
+
|
| 474 |
+
# --- Logique principale ---
|
| 475 |
+
if locked_file_uploaded is not None:
|
| 476 |
+
# Stocker les noms de fichiers originaux pour le rapport PDF
|
| 477 |
+
st.session_state.locked_file_name_original = locked_file_uploaded.name
|
| 478 |
+
if checklist_file_uploaded:
|
| 479 |
+
st.session_state.checklist_file_name_original = checklist_file_uploaded.name
|
| 480 |
+
elif checklist_source == "Utiliser celle de GitHub (Recommandé)":
|
| 481 |
+
st.session_state.checklist_file_name_original = "Checklist IFS Food V8 (GitHub)"
|
| 482 |
+
else:
|
| 483 |
+
st.session_state.checklist_file_name_original = "Non fournie"
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
locked_data_io = io.BytesIO(locked_file_uploaded.getvalue()) # Convertir en BytesIO pour l'analyseur
|
| 487 |
+
checklist_data_io = None
|
| 488 |
+
|
| 489 |
+
if checklist_source == "Téléverser ma checklist" and checklist_file_uploaded is not None:
|
| 490 |
+
checklist_data_io = io.BytesIO(checklist_file_uploaded.getvalue())
|
| 491 |
+
elif checklist_source == "Utiliser celle de GitHub (Recommandé)":
|
| 492 |
+
# Ne pas mettre en cache le téléchargement lui-même, mais le résultat si nécessaire
|
| 493 |
+
checklist_data_io = download_checklist_from_github_st()
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
# Utiliser BytesIO pour l'analyseur
|
| 497 |
+
analyzer = get_analyzer(locked_data_io, checklist_data_io)
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
if analyzer.locked_df is not None and not analyzer.locked_df.empty:
|
| 501 |
+
st.success(f"Fichier '{locked_file_uploaded.name}' chargé et analysé. {len(analyzer.locked_df)} suspensions IFS Food trouvées.")
|
| 502 |
+
display_dashboard_tabs(analyzer) # Afficher les onglets avec les résultats
|
| 503 |
+
|
| 504 |
+
# Bouton pour générer le rapport PDF
|
| 505 |
+
st.sidebar.markdown("---")
|
| 506 |
+
if st.sidebar.button("📄 Générer le Rapport PDF Complet", key="pdf_button"):
|
| 507 |
+
with st.spinner("Génération du rapport PDF en cours... Veuillez patienter."):
|
| 508 |
+
pdf_path = analyzer.export_report_to_pdf() # La méthode gère l'enregistrement
|
| 509 |
+
if pdf_path:
|
| 510 |
+
with open(pdf_path, "rb") as pdf_file:
|
| 511 |
+
st.sidebar.download_button(
|
| 512 |
+
label="📥 Télécharger le Rapport PDF",
|
| 513 |
+
data=pdf_file,
|
| 514 |
+
file_name="Analyse_IFS_Suspensions_Report.pdf",
|
| 515 |
+
mime="application/pdf"
|
| 516 |
+
)
|
| 517 |
+
st.sidebar.success("Rapport PDF généré !")
|
| 518 |
+
else:
|
| 519 |
+
st.sidebar.error("Erreur lors de la création du rapport PDF.")
|
| 520 |
+
else:
|
| 521 |
+
st.error("Aucune donnée IFS Food n'a été trouvée dans le fichier ou après filtrage. Veuillez vérifier votre fichier.")
|
| 522 |
+
else:
|
| 523 |
+
st.info("Veuillez téléverser un fichier CSV des suspensions IFS pour commencer l'analyse.")
|
| 524 |
+
|
| 525 |
+
st.sidebar.markdown("---")
|
| 526 |
+
st.sidebar.markdown("Développé avec ❤️ par IA")
|
| 527 |
+
|
| 528 |
+
|
| 529 |
+
def display_dashboard_tabs(analyzer):
|
| 530 |
+
tab_titles = [
|
| 531 |
+
"📊 Vue d'Ensemble", "🌍 Géographie", "🏷️ Thèmes Détaillés",
|
| 532 |
+
"📋 Exigences IFS", "🕵️ Types d'Audits", "🔗 Analyse Croisée"
|
| 533 |
+
]
|
| 534 |
+
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(tab_titles)
|
| 535 |
+
|
| 536 |
+
with tab1: # Vue d'Ensemble
|
| 537 |
+
st.header("📊 Vue d'Ensemble des Suspensions")
|
| 538 |
+
col1, col2 = st.columns(2)
|
| 539 |
+
total_suspensions = len(analyzer.locked_df)
|
| 540 |
+
with_reasons_count = analyzer.locked_df['Lock reason'].notna().sum()
|
| 541 |
+
audit_analysis_summary, _ = analyzer.analyze_audit_types()
|
| 542 |
+
total_audit_special = sum(audit_analysis_summary.values())
|
| 543 |
+
|
| 544 |
+
with col1:
|
| 545 |
+
st.metric("Total Suspensions IFS Food", total_suspensions)
|
| 546 |
+
st.metric("Avec Motifs Documentés", f"{with_reasons_count} ({with_reasons_count/total_suspensions*100:.1f}%)" if total_suspensions > 0 else "0")
|
| 547 |
+
with col2:
|
| 548 |
+
st.metric("Liées à Audits Spécifiques (IP, etc.)", f"{total_audit_special} ({total_audit_special/total_suspensions*100:.1f}%)" if total_suspensions > 0 else "0")
|
| 549 |
+
# Vous pouvez ajouter d'autres métriques ici
|
| 550 |
+
|
| 551 |
+
st.markdown("---")
|
| 552 |
+
st.subheader("Visualisations Clés")
|
| 553 |
+
# Thèmes
|
| 554 |
+
theme_counts, _ = analyzer.analyze_themes()
|
| 555 |
+
if theme_counts:
|
| 556 |
+
top_themes = dict(sorted(theme_counts.items(), key=lambda x:x[1], reverse=True)[:10])
|
| 557 |
+
top_themes_clean = {k.replace('_',' ').replace('MANAGEMENT','MGMT').replace('RESPONSIBILITY','RESP.'):v for k,v in top_themes.items() if v > 0}
|
| 558 |
+
if top_themes_clean: st.plotly_chart(analyzer._create_plotly_bar_chart(top_themes_clean, "Top 10 Thèmes de Non-Conformités", orientation='h', color='indianred', height=450), use_container_width=True)
|
| 559 |
+
|
| 560 |
+
# Product Scopes
|
| 561 |
+
scope_counts = analyzer.product_scope_analysis()
|
| 562 |
+
if scope_counts:
|
| 563 |
+
top_scopes = dict(scope_counts.most_common(10))
|
| 564 |
+
top_scopes_clean = {f"Scope {k}": v for k, v in top_scopes.items() if v > 0}
|
| 565 |
+
if top_scopes_clean: st.plotly_chart(analyzer._create_plotly_bar_chart(top_scopes_clean, "Top 10 Product Scopes Impactés", orientation='h', color='cornflowerblue', height=450), use_container_width=True)
|
| 566 |
+
|
| 567 |
+
with tab2: # Géographie
|
| 568 |
+
st.header("🌍 Analyse Géographique")
|
| 569 |
+
geo_stats_df = analyzer.geographic_analysis()
|
| 570 |
+
if geo_stats_df is not None and not geo_stats_df.empty:
|
| 571 |
+
st.plotly_chart(analyzer._create_plotly_choropleth_map(geo_stats_df, "Suspensions par Pays"), use_container_width=True)
|
| 572 |
+
st.markdown("---")
|
| 573 |
+
st.subheader("Tableau des Suspensions par Pays (Top 15)")
|
| 574 |
+
st.dataframe(geo_stats_df.head(15), use_container_width=True)
|
| 575 |
+
else:
|
| 576 |
+
st.info("Données géographiques non disponibles.")
|
| 577 |
+
|
| 578 |
+
with tab3: # Thèmes Détaillés
|
| 579 |
+
st.header("🏷️ Analyse Thématique Détaillée")
|
| 580 |
+
theme_counts, theme_details = analyzer.analyze_themes()
|
| 581 |
+
for theme, count in sorted(theme_counts.items(), key=lambda x: x[1], reverse=True):
|
| 582 |
+
if count > 0:
|
| 583 |
+
with st.expander(f"{theme.replace('_', ' ')} ({count} cas)", expanded=False):
|
| 584 |
+
st.markdown(f"**Exemples de motifs (jusqu'à 3) pour le thème : {theme.replace('_', ' ')}**")
|
| 585 |
+
for i, detail in enumerate(theme_details[theme][:3]):
|
| 586 |
+
st.markdown(f"**Cas {i+1} (Fournisseur: {detail['supplier']}, Pays: {detail['country']})**")
|
| 587 |
+
st.caption(f"{detail['reason'][:500]}...") # Afficher une partie du motif
|
| 588 |
+
st.markdown("---")
|
| 589 |
+
# Ajouter les pays les plus touchés pour ce thème
|
| 590 |
+
theme_keywords_current_theme = analyzer.themes_keywords_definition.get(theme, [])
|
| 591 |
+
if theme_keywords_current_theme:
|
| 592 |
+
theme_mask = analyzer.locked_df['lock_reason_clean'].str.contains('|'.join(theme_keywords_current_theme), case=False, na=False, regex=True)
|
| 593 |
+
if theme_mask.sum() > 0:
|
| 594 |
+
theme_countries_df = analyzer.locked_df[theme_mask]['Country/Region'].value_counts().reset_index()
|
| 595 |
+
theme_countries_df.columns = ['Pays', 'Nombre de cas']
|
| 596 |
+
if not theme_countries_df.empty:
|
| 597 |
+
st.markdown("**Pays les plus affectés par ce thème :**")
|
| 598 |
+
st.dataframe(theme_countries_df.head(5), use_container_width=True)
|
| 599 |
+
|
| 600 |
+
|
| 601 |
+
with tab4: # Exigences IFS
|
| 602 |
+
st.header("📋 Analyse des Exigences IFS")
|
| 603 |
+
recommendations = analyzer.generate_ifs_recommendations_analysis()
|
| 604 |
+
if recommendations and analyzer.checklist_df is not None:
|
| 605 |
+
st.info("Les textes des exigences proviennent de la checklist IFS Food V8. Les numéros sont extraits des motifs de suspension.")
|
| 606 |
+
df_reco = pd.DataFrame(recommendations)
|
| 607 |
+
df_reco_sorted = df_reco.sort_values(by='count', ascending=False)
|
| 608 |
+
|
| 609 |
+
# Graphique des exigences les plus citées
|
| 610 |
+
top_reco_chart = df_reco_sorted.head(10).copy() # Pour le graphique
|
| 611 |
+
# Raccourcir les labels pour le graphique
|
| 612 |
+
top_reco_chart['display_label'] = top_reco_chart.apply(lambda row: f"{row['chapter']} ({row['requirement_text'][:25]}...)", axis=1)
|
| 613 |
+
reco_chart_data = pd.Series(top_reco_chart['count'].values, index=top_reco_chart['display_label']).to_dict()
|
| 614 |
+
st.plotly_chart(analyzer._create_plotly_bar_chart(reco_chart_data, "Top 10 Exigences IFS Citées", orientation='v', color='gold', height=500), use_container_width=True)
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
st.markdown("---")
|
| 618 |
+
st.subheader("Détail des Exigences Citées")
|
| 619 |
+
for index, row in df_reco_sorted.iterrows():
|
| 620 |
+
with st.expander(f"Exigence {row['chapter']} ({row['count']} mentions)", expanded=False):
|
| 621 |
+
st.markdown(f"**Texte de l'exigence :**")
|
| 622 |
+
st.markdown(f"> {row['requirement_text']}")
|
| 623 |
+
elif recommendations: # Pas de checklist mais des chapitres extraits
|
| 624 |
+
st.warning("Checklist non chargée. Affichage des numéros de chapitres uniquement.")
|
| 625 |
+
df_reco = pd.DataFrame(recommendations)
|
| 626 |
+
df_reco_sorted = df_reco.sort_values(by='count', ascending=False)
|
| 627 |
+
chapter_counts_dict = pd.Series(df_reco_sorted['count'].values, index=df_reco_sorted['chapter']).to_dict()
|
| 628 |
+
st.plotly_chart(analyzer._create_plotly_bar_chart(chapter_counts_dict, "Top Chapitres IFS Cités (Numéros)", orientation='v', color='gold', height=500), use_container_width=True)
|
| 629 |
+
st.dataframe(df_reco_sorted, use_container_width=True)
|
| 630 |
+
else:
|
| 631 |
+
st.info("Aucune exigence IFS spécifique n'a pu être extraite des motifs ou la checklist n'est pas disponible.")
|
| 632 |
+
|
| 633 |
+
|
| 634 |
+
with tab5: # Types d'Audits
|
| 635 |
+
st.header("🕵️ Analyse par Types d'Audits")
|
| 636 |
+
audit_analysis, audit_examples = analyzer.analyze_audit_types()
|
| 637 |
+
if audit_analysis:
|
| 638 |
+
# Graphique
|
| 639 |
+
audit_analysis_clean = {k.replace('_', ' '):v for k,v in audit_analysis.items() if v > 0}
|
| 640 |
+
if audit_analysis_clean: st.plotly_chart(analyzer._create_plotly_bar_chart(audit_analysis_clean, "Répartition par Type d'Audit Spécifique", color='darkorange', height=400), use_container_width=True)
|
| 641 |
+
|
| 642 |
+
st.markdown("---")
|
| 643 |
+
st.subheader("Détails et Exemples par Type d'Audit")
|
| 644 |
+
for audit_type, count in sorted(audit_analysis.items(), key=lambda x: x[1], reverse=True):
|
| 645 |
+
if count > 0:
|
| 646 |
+
with st.expander(f"{audit_type.replace('_', ' ')} ({count} cas)", expanded=False):
|
| 647 |
+
st.markdown(f"**Exemples (jusqu'à 3) pour : {audit_type.replace('_', ' ')}**")
|
| 648 |
+
for i, ex_data in enumerate(audit_examples[audit_type]['examples'][:3]):
|
| 649 |
+
st.markdown(f"**Cas {i+1} (Fournisseur: {ex_data.get('Supplier', 'N/A')}, Pays: {ex_data.get('Country/Region', 'N/A')})**")
|
| 650 |
+
st.caption(f"{ex_data.get('Lock reason', 'N/A')[:500]}...")
|
| 651 |
+
st.markdown("---")
|
| 652 |
+
countries_data = audit_examples[audit_type]['countries']
|
| 653 |
+
if countries_data:
|
| 654 |
+
st.markdown(f"**Répartition géographique (Top 5 pays) pour ce type d'audit :** {', '.join([f'{c} ({n})' for c, n in countries_data.items()])}")
|
| 655 |
+
else:
|
| 656 |
+
st.info("Aucune donnée sur les types d'audits disponible.")
|
| 657 |
+
|
| 658 |
+
with tab6: # Analyse Croisée
|
| 659 |
+
st.header("🔗 Analyse Croisée : Thèmes vs Product Scopes")
|
| 660 |
+
cross_pivot_matrix = analyzer.cross_analysis_scope_themes()
|
| 661 |
+
if cross_pivot_matrix is not None and not cross_pivot_matrix.empty:
|
| 662 |
+
# Pour la lisibilité, on peut filtrer les scopes avec peu de données ou transposer
|
| 663 |
+
# Limiter le nombre de scopes affichés dans la heatmap pour la lisibilité
|
| 664 |
+
top_n_scopes_for_heatmap = min(15, len(cross_pivot_matrix.index))
|
| 665 |
+
if len(cross_pivot_matrix.index) > top_n_scopes_for_heatmap:
|
| 666 |
+
scope_totals = cross_pivot_matrix.sum(axis=1).sort_values(ascending=False)
|
| 667 |
+
top_scopes_names = scope_totals.head(top_n_scopes_for_heatmap).index
|
| 668 |
+
cross_pivot_matrix_filtered = cross_pivot_matrix.loc[top_scopes_names]
|
| 669 |
+
else:
|
| 670 |
+
cross_pivot_matrix_filtered = cross_pivot_matrix
|
| 671 |
+
|
| 672 |
+
if not cross_pivot_matrix_filtered.empty:
|
| 673 |
+
st.plotly_chart(analyzer._create_plotly_heatmap(cross_pivot_matrix_filtered, "Fréquence des Thèmes de NC par Product Scope (Top Scopes)", height=max(500, len(cross_pivot_matrix_filtered.index) * 40)), use_container_width=True)
|
| 674 |
+
st.markdown("---")
|
| 675 |
+
st.subheader("Tableau de Corrélation Complet (Scopes vs Thèmes)")
|
| 676 |
+
st.dataframe(cross_pivot_matrix.style.background_gradient(cmap='YlGnBu', axis=None), use_container_width=True) # axis=None pour colorer toute la cellule
|
| 677 |
+
else:
|
| 678 |
+
st.info("Pas assez de données pour afficher la heatmap après filtrage.")
|
| 679 |
+
else:
|
| 680 |
+
st.info("Données insuffisantes pour l'analyse croisée Thèmes vs Product Scopes.")
|
| 681 |
+
|
| 682 |
|
| 683 |
+
# --- Exécution de l'application ---
|
| 684 |
+
if __name__ == "__main__":
|
| 685 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|