Spaces:
Sleeping
Sleeping
Create Neoexctract.py
Browse files- Neoexctract.py +261 -0
Neoexctract.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import streamlit as st
|
| 4 |
+
from io import BytesIO
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
# Set Streamlit to wide mode
|
| 8 |
+
st.set_page_config(layout="wide")
|
| 9 |
+
|
| 10 |
+
# Function to flatten the nested JSON structure
|
| 11 |
+
def flatten_json_safe(nested_json, parent_key='', sep='_'):
|
| 12 |
+
"""Flatten a nested JSON dictionary, safely handling strings and primitives."""
|
| 13 |
+
items = []
|
| 14 |
+
if isinstance(nested_json, dict):
|
| 15 |
+
for k, v in nested_json.items():
|
| 16 |
+
new_key = f'{parent_key}{sep}{k}' if parent_key else k
|
| 17 |
+
if isinstance(v, dict):
|
| 18 |
+
items.extend(flatten_json_safe(v, new_key, sep=sep).items())
|
| 19 |
+
elif isinstance(v, list):
|
| 20 |
+
for i, item in enumerate(v):
|
| 21 |
+
items.extend(flatten_json_safe(item, f'{new_key}{sep}{i}', sep=sep).items())
|
| 22 |
+
else:
|
| 23 |
+
items.append((new_key, v))
|
| 24 |
+
else:
|
| 25 |
+
items.append((parent_key, nested_json))
|
| 26 |
+
return dict(items)
|
| 27 |
+
|
| 28 |
+
# Function to extract data from the flattened JSON
|
| 29 |
+
def extract_from_flattened(flattened_data, mapping, selected_fields):
|
| 30 |
+
extracted_data = {}
|
| 31 |
+
for label, flat_path in mapping.items():
|
| 32 |
+
if label in selected_fields:
|
| 33 |
+
extracted_data[label] = flattened_data.get(flat_path, 'N/A')
|
| 34 |
+
return extracted_data
|
| 35 |
+
|
| 36 |
+
# Custom CSS for the table display
|
| 37 |
+
def apply_table_css():
|
| 38 |
+
st.markdown(
|
| 39 |
+
"""
|
| 40 |
+
<style>
|
| 41 |
+
table {
|
| 42 |
+
width: 100%;
|
| 43 |
+
border-collapse: collapse;
|
| 44 |
+
background-color: #f9f9f9;
|
| 45 |
+
}
|
| 46 |
+
th, td {
|
| 47 |
+
border: 1px solid #ddd;
|
| 48 |
+
padding: 10px;
|
| 49 |
+
text-align: left;
|
| 50 |
+
}
|
| 51 |
+
th {
|
| 52 |
+
background-color: #f2f2f2;
|
| 53 |
+
}
|
| 54 |
+
</style>
|
| 55 |
+
""", unsafe_allow_html=True
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
# Load the CSV mapping for UUIDs corresponding to NUM from a URL
|
| 59 |
+
def load_uuid_mapping_from_url(url):
|
| 60 |
+
response = requests.get(url)
|
| 61 |
+
if response.status_code == 200:
|
| 62 |
+
from io import StringIO
|
| 63 |
+
csv_data = StringIO(response.text)
|
| 64 |
+
uuid_mapping_df = pd.read_csv(csv_data)
|
| 65 |
+
|
| 66 |
+
# Check if the columns 'UUID', 'Num', 'Chapitre', 'Theme', and 'SSTheme' exist and have non-empty values
|
| 67 |
+
required_columns = ['UUID', 'Num', 'Chapitre', 'Theme', 'SSTheme']
|
| 68 |
+
for column in required_columns:
|
| 69 |
+
if column not in uuid_mapping_df.columns:
|
| 70 |
+
st.error(f"Le fichier CSV doit contenir une colonne '{column}' avec des valeurs valides.")
|
| 71 |
+
return {}
|
| 72 |
+
|
| 73 |
+
uuid_mapping_df = uuid_mapping_df.dropna(subset=['UUID', 'Num']) # Drop rows with empty 'UUID' or 'Num' values
|
| 74 |
+
uuid_mapping_df['Chapitre'] = uuid_mapping_df['Chapitre'].astype(str).str.strip()
|
| 75 |
+
uuid_mapping_df = uuid_mapping_df.drop_duplicates(subset=['Chapitre', 'Num']) # Remove duplicate rows based on 'Chapitre' and 'Num'
|
| 76 |
+
return uuid_mapping_df
|
| 77 |
+
else:
|
| 78 |
+
st.error("Impossible de charger le fichier CSV des UUID depuis l'URL fourni.")
|
| 79 |
+
return pd.DataFrame()
|
| 80 |
+
|
| 81 |
+
# URL for the UUID CSV
|
| 82 |
+
UUID_MAPPING_URL = "https://raw.githubusercontent.com/M00N69/Gemini-Knowledge/refs/heads/main/IFSV8listUUID.csv"
|
| 83 |
+
|
| 84 |
+
UUID_MAPPING_DF = load_uuid_mapping_from_url(UUID_MAPPING_URL)
|
| 85 |
+
|
| 86 |
+
# Complete mapping based on your provided field names and JSON structure
|
| 87 |
+
FLATTENED_FIELD_MAPPING = {
|
| 88 |
+
"Nom du site à auditer": "data_modules_food_8_questions_companyName_answer",
|
| 89 |
+
"N° COID du portail": "data_modules_food_8_questions_companyCoid_answer",
|
| 90 |
+
"Code GLN": "data_modules_food_8_questions_companyGln_answer_0_rootQuestions_companyGlnNumber_answer",
|
| 91 |
+
"Rue": "data_modules_food_8_questions_companyStreetNo_answer",
|
| 92 |
+
"Code postal": "data_modules_food_8_questions_companyZip_answer",
|
| 93 |
+
"Nom de la ville": "data_modules_food_8_questions_companyCity_answer",
|
| 94 |
+
"Pays": "data_modules_food_8_questions_companyCountry_answer",
|
| 95 |
+
"Téléphone": "data_modules_food_8_questions_companyTelephone_answer",
|
| 96 |
+
"Latitude": "data_modules_food_8_questions_companyGpsLatitude_answer",
|
| 97 |
+
"Longitude": "data_modules_food_8_questions_companyGpsLongitude_answer",
|
| 98 |
+
"Email": "data_modules_food_8_questions_companyEmail_answer",
|
| 99 |
+
"Nom du siège social": "data_modules_food_8_questions_headquartersName_answer",
|
| 100 |
+
"Rue (siège social)": "data_modules_food_8_questions_headquartersStreetNo_answer",
|
| 101 |
+
"Nom de la ville (siège social)": "data_modules_food_8_questions_headquartersCity_answer",
|
| 102 |
+
"Code postal (siège social)": "data_modules_food_8_questions_headquartersZip_answer",
|
| 103 |
+
"Pays (siège social)": "data_modules_food_8_questions_headquartersCountry_answer",
|
| 104 |
+
"Téléphone (siège social)": "data_modules_food_8_questions_headquartersTelephone_answer",
|
| 105 |
+
"Surface couverte de l'entreprise (m²)": "data_modules_food_8_questions_productionAreaSize_answer",
|
| 106 |
+
"Nombre de bâtiments": "data_modules_food_8_questions_numberOfBuildings_answer",
|
| 107 |
+
"Nombre de lignes de production": "data_modules_food_8_questions_numberOfProductionLines_answer",
|
| 108 |
+
"Nombre d'étages": "data_modules_food_8_questions_numberOfFloors_answer",
|
| 109 |
+
"Nombre maximum d'employés dans l'année, au pic de production": "data_modules_food_8_questions_numberOfEmployeesForTimeCalculation_answer",
|
| 110 |
+
"Langue parlée et écrite sur le site": "data_modules_food_8_questions_workingLanguage_answer",
|
| 111 |
+
"Périmètre de l'audit": "data_modules_food_8_questions_scopeCertificateScopeDescription_en_answer",
|
| 112 |
+
"Process et activités": "data_modules_food_8_questions_scopeProductGroupsDescription_answer",
|
| 113 |
+
"Activité saisonnière ? (O/N)": "data_modules_food_8_questions_seasonalProduction_answer",
|
| 114 |
+
"Une partie du procédé de fabrication est-elle sous traitée? (OUI/NON)": "data_modules_food_8_questions_partlyOutsourcedProcesses_answer",
|
| 115 |
+
"Si oui lister les procédés sous-traités": "data_modules_food_8_questions_partlyOutsourcedProcessesDescription_answer",
|
| 116 |
+
"Avez-vous des produits totalement sous-traités? (OUI/NON)": "data_modules_food_8_questions_fullyOutsourcedProducts_answer",
|
| 117 |
+
"Si oui, lister les produits totalement sous-traités": "data_modules_food_8_questions_fullyOutsourcedProductsDescription_answer",
|
| 118 |
+
"Avez-vous des produits de négoce? (OUI/NON)": "data_modules_food_8_questions_tradedProductsBrokerActivity_answer",
|
| 119 |
+
"Si oui, lister les produits de négoce": "data_modules_food_8_questions_tradedProductsBrokerActivityDescription_answer",
|
| 120 |
+
"Produits à exclure du champ d'audit (OUI/NON)": "data_modules_food_8_questions_exclusions_answer",
|
| 121 |
+
"Préciser les produits à exclure": "data_modules_food_8_questions_exclusionsDescription_answer"
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
# Streamlit app
|
| 125 |
+
st.sidebar.title("Menu de Navigation")
|
| 126 |
+
option = st.sidebar.radio("Choisissez une option:", ["Extraction des données", "Exigences de la checklist", "Modification des données", "Exportation", "Plan d'actions"])
|
| 127 |
+
|
| 128 |
+
st.title("IFS NEO Form Data Extractor")
|
| 129 |
+
|
| 130 |
+
# Step 1: Upload the JSON (.ifs) file
|
| 131 |
+
uploaded_json_file = st.file_uploader("Charger le fichier IFS de NEO", type="ifs")
|
| 132 |
+
|
| 133 |
+
if uploaded_json_file:
|
| 134 |
+
try:
|
| 135 |
+
# Step 2: Load the uploaded JSON file
|
| 136 |
+
json_data = json.load(uploaded_json_file)
|
| 137 |
+
|
| 138 |
+
# Step 3: Flatten the JSON data
|
| 139 |
+
flattened_json_data_safe = flatten_json_safe(json_data)
|
| 140 |
+
|
| 141 |
+
if option == "Extraction des données":
|
| 142 |
+
st.subheader("Champs disponibles pour l'extraction")
|
| 143 |
+
select_all = st.checkbox("Sélectionner tous les champs")
|
| 144 |
+
if select_all:
|
| 145 |
+
selected_fields = list(FLATTENED_FIELD_MAPPING.keys())
|
| 146 |
+
else:
|
| 147 |
+
selected_fields = st.multiselect("Sélectionnez les champs que vous souhaitez extraire", list(FLATTENED_FIELD_MAPPING.keys()))
|
| 148 |
+
if selected_fields:
|
| 149 |
+
# Step 4: Extract the required data based on the selected fields
|
| 150 |
+
extracted_data = extract_from_flattened(flattened_json_data_safe, FLATTENED_FIELD_MAPPING, selected_fields)
|
| 151 |
+
|
| 152 |
+
# Step 5: Display the extracted data using Streamlit widgets for real editing
|
| 153 |
+
st.subheader("Données extraites")
|
| 154 |
+
edit_mode = st.checkbox("Modifier les données")
|
| 155 |
+
updated_data = extracted_data.copy()
|
| 156 |
+
|
| 157 |
+
if edit_mode:
|
| 158 |
+
for field, value in extracted_data.items():
|
| 159 |
+
if field in ["Périmètre de l'audit", "Process et activités", "Si oui lister les procédés sous-traités", "Si oui, lister les produits totalement sous-traités", "Si oui, lister les produits de négoce", "Préciser les produits à exclure"]:
|
| 160 |
+
updated_data[field] = st.text_area(f"{field}", value=value, height=150)
|
| 161 |
+
else:
|
| 162 |
+
updated_data[field] = st.text_input(f"{field}", value=value)
|
| 163 |
+
else:
|
| 164 |
+
# Display in read-only table format
|
| 165 |
+
apply_table_css()
|
| 166 |
+
table_html = "<table><thead><tr><th>Field</th><th>Value</th></tr></thead><tbody>"
|
| 167 |
+
for field, value in extracted_data.items():
|
| 168 |
+
table_html += f"<tr><td>{field}</td><td>{value}</td></tr>"
|
| 169 |
+
table_html += "</tbody></table>"
|
| 170 |
+
st.markdown(table_html, unsafe_allow_html=True)
|
| 171 |
+
|
| 172 |
+
# Step 6: Option to download the extracted data as an Excel file with formatting and COID in the name
|
| 173 |
+
df = pd.DataFrame(list(updated_data.items()), columns=["Field", "Value"])
|
| 174 |
+
|
| 175 |
+
# Extract the COID number to use in the file name
|
| 176 |
+
numero_coid = updated_data.get("N° COID du portail", "inconnu")
|
| 177 |
+
|
| 178 |
+
# Create the Excel file with column formatting
|
| 179 |
+
output = BytesIO()
|
| 180 |
+
|
| 181 |
+
# Create Excel writer and adjust column widths
|
| 182 |
+
with pd.ExcelWriter(output, engine='openpyxl') as writer:
|
| 183 |
+
df.to_excel(writer, index=False, sheet_name="Données extraites")
|
| 184 |
+
|
| 185 |
+
# Access the worksheet to modify the formatting
|
| 186 |
+
worksheet = writer.sheets["Données extraites"]
|
| 187 |
+
|
| 188 |
+
# Adjust the width of each column based on the longest entry
|
| 189 |
+
for col in worksheet.columns:
|
| 190 |
+
max_length = max(len(str(cell.value)) for cell in col)
|
| 191 |
+
col_letter = col[0].column_letter # Get the column letter
|
| 192 |
+
worksheet.column_dimensions[col_letter].width = max_length + 5 # Adjust column width
|
| 193 |
+
|
| 194 |
+
# Reset the position of the output to the start
|
| 195 |
+
output.seek(0)
|
| 196 |
+
|
| 197 |
+
# Provide the download button with the COID number in the filename
|
| 198 |
+
st.download_button(
|
| 199 |
+
label="Télécharger le fichier Excel",
|
| 200 |
+
data=output,
|
| 201 |
+
file_name=f'extraction_{numero_coid}.xlsx',
|
| 202 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
elif option == "Exigences de la checklist":
|
| 206 |
+
st.subheader("Exigences de la checklist")
|
| 207 |
+
if not UUID_MAPPING_DF.empty:
|
| 208 |
+
# Filtering options with linked filtering
|
| 209 |
+
chapitre_options = ["Tous"] + sorted(UUID_MAPPING_DF['Chapitre'].dropna().unique())
|
| 210 |
+
chapitre_filter = st.selectbox("Filtrer par Chapitre", options=chapitre_options)
|
| 211 |
+
filtered_df = UUID_MAPPING_DF
|
| 212 |
+
|
| 213 |
+
if chapitre_filter != "Tous":
|
| 214 |
+
filtered_df = filtered_df[filtered_df['Chapitre'] == chapitre_filter]
|
| 215 |
+
theme_options = ["Tous"] + sorted(filtered_df['Theme'].dropna().unique())
|
| 216 |
+
else:
|
| 217 |
+
theme_options = ["Tous"] + sorted(UUID_MAPPING_DF['Theme'].dropna().unique())
|
| 218 |
+
theme_filter = st.selectbox("Filtrer par Thème", options=theme_options)
|
| 219 |
+
|
| 220 |
+
if theme_filter != "Tous":
|
| 221 |
+
filtered_df = filtered_df[filtered_df['Theme'] == theme_filter]
|
| 222 |
+
sstheme_options = ["Tous"] + sorted(filtered_df['SSTheme'].dropna().unique())
|
| 223 |
+
else:
|
| 224 |
+
sstheme_options = ["Tous"] + sorted(UUID_MAPPING_DF['SSTheme'].dropna().unique())
|
| 225 |
+
sstheme_filter = st.selectbox("Filtrer par Sous-Thème", options=sstheme_options)
|
| 226 |
+
|
| 227 |
+
if sstheme_filter != "Tous":
|
| 228 |
+
filtered_df = filtered_df[filtered_df['SSTheme'] == sstheme_filter]
|
| 229 |
+
|
| 230 |
+
# Extracting checklist requirements from flattened JSON data
|
| 231 |
+
checklist_requirements = []
|
| 232 |
+
for _, row in filtered_df.iterrows():
|
| 233 |
+
key = row['Num']
|
| 234 |
+
uuid = row['UUID']
|
| 235 |
+
prefix = f"data_modules_food_8_checklists_checklistFood8_resultScorings_{uuid}"
|
| 236 |
+
explanation_text = flattened_json_data_safe.get(f"{prefix}_answers_englishExplanationText", "N/A")
|
| 237 |
+
detailed_explanation = flattened_json_data_safe.get(f"{prefix}_answers_explanationText", "N/A")
|
| 238 |
+
score_label = flattened_json_data_safe.get(f"{prefix}_score_label", "N/A")
|
| 239 |
+
response = flattened_json_data_safe.get(f"{prefix}_answers_fieldAnswers", "N/A")
|
| 240 |
+
checklist_requirements.append({
|
| 241 |
+
"Num": key,
|
| 242 |
+
"Explanation": explanation_text,
|
| 243 |
+
"Detailed Explanation": detailed_explanation,
|
| 244 |
+
"Score": score_label,
|
| 245 |
+
"Response": response
|
| 246 |
+
})
|
| 247 |
+
|
| 248 |
+
# Convert to filtered table display
|
| 249 |
+
apply_table_css()
|
| 250 |
+
table_html = "<table><thead><tr><th>Numéro d'exigence</th><th>Explication</th><th>Explication Détaillée</th><th>Note</th><th>Réponse</th></tr></thead><tbody>"
|
| 251 |
+
for req in checklist_requirements:
|
| 252 |
+
table_html += f"<tr><td>{req['Num']}</td><td>{req['Explanation']}</td><td>{req['Detailed Explanation']}</td><td>{req['Score']}</td><td>{req['Response']}</td></tr>"
|
| 253 |
+
table_html += "</tbody></table>"
|
| 254 |
+
st.markdown(table_html, unsafe_allow_html=True)
|
| 255 |
+
else:
|
| 256 |
+
st.error("Impossible de charger les données des UUID. Veuillez vérifier l'URL.")
|
| 257 |
+
|
| 258 |
+
except json.JSONDecodeError:
|
| 259 |
+
st.error("Erreur lors du décodage du fichier JSON. Veuillez vous assurer qu'il est au format correct.")
|
| 260 |
+
else:
|
| 261 |
+
st.write("Le fichier de NEO doit être un (.ifs)")
|