MMOON commited on
Commit
328f421
·
verified ·
1 Parent(s): 30e05d0

Create Neoexctract.py

Browse files
Files changed (1) hide show
  1. Neoexctract.py +261 -0
Neoexctract.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pandas as pd
3
+ import streamlit as st
4
+ from io import BytesIO
5
+ import requests
6
+
7
+ # Set Streamlit to wide mode
8
+ st.set_page_config(layout="wide")
9
+
10
+ # Function to flatten the nested JSON structure
11
+ def flatten_json_safe(nested_json, parent_key='', sep='_'):
12
+ """Flatten a nested JSON dictionary, safely handling strings and primitives."""
13
+ items = []
14
+ if isinstance(nested_json, dict):
15
+ for k, v in nested_json.items():
16
+ new_key = f'{parent_key}{sep}{k}' if parent_key else k
17
+ if isinstance(v, dict):
18
+ items.extend(flatten_json_safe(v, new_key, sep=sep).items())
19
+ elif isinstance(v, list):
20
+ for i, item in enumerate(v):
21
+ items.extend(flatten_json_safe(item, f'{new_key}{sep}{i}', sep=sep).items())
22
+ else:
23
+ items.append((new_key, v))
24
+ else:
25
+ items.append((parent_key, nested_json))
26
+ return dict(items)
27
+
28
+ # Function to extract data from the flattened JSON
29
+ def extract_from_flattened(flattened_data, mapping, selected_fields):
30
+ extracted_data = {}
31
+ for label, flat_path in mapping.items():
32
+ if label in selected_fields:
33
+ extracted_data[label] = flattened_data.get(flat_path, 'N/A')
34
+ return extracted_data
35
+
36
+ # Custom CSS for the table display
37
+ def apply_table_css():
38
+ st.markdown(
39
+ """
40
+ <style>
41
+ table {
42
+ width: 100%;
43
+ border-collapse: collapse;
44
+ background-color: #f9f9f9;
45
+ }
46
+ th, td {
47
+ border: 1px solid #ddd;
48
+ padding: 10px;
49
+ text-align: left;
50
+ }
51
+ th {
52
+ background-color: #f2f2f2;
53
+ }
54
+ </style>
55
+ """, unsafe_allow_html=True
56
+ )
57
+
58
+ # Load the CSV mapping for UUIDs corresponding to NUM from a URL
59
+ def load_uuid_mapping_from_url(url):
60
+ response = requests.get(url)
61
+ if response.status_code == 200:
62
+ from io import StringIO
63
+ csv_data = StringIO(response.text)
64
+ uuid_mapping_df = pd.read_csv(csv_data)
65
+
66
+ # Check if the columns 'UUID', 'Num', 'Chapitre', 'Theme', and 'SSTheme' exist and have non-empty values
67
+ required_columns = ['UUID', 'Num', 'Chapitre', 'Theme', 'SSTheme']
68
+ for column in required_columns:
69
+ if column not in uuid_mapping_df.columns:
70
+ st.error(f"Le fichier CSV doit contenir une colonne '{column}' avec des valeurs valides.")
71
+ return {}
72
+
73
+ uuid_mapping_df = uuid_mapping_df.dropna(subset=['UUID', 'Num']) # Drop rows with empty 'UUID' or 'Num' values
74
+ uuid_mapping_df['Chapitre'] = uuid_mapping_df['Chapitre'].astype(str).str.strip()
75
+ uuid_mapping_df = uuid_mapping_df.drop_duplicates(subset=['Chapitre', 'Num']) # Remove duplicate rows based on 'Chapitre' and 'Num'
76
+ return uuid_mapping_df
77
+ else:
78
+ st.error("Impossible de charger le fichier CSV des UUID depuis l'URL fourni.")
79
+ return pd.DataFrame()
80
+
81
+ # URL for the UUID CSV
82
+ UUID_MAPPING_URL = "https://raw.githubusercontent.com/M00N69/Gemini-Knowledge/refs/heads/main/IFSV8listUUID.csv"
83
+
84
+ UUID_MAPPING_DF = load_uuid_mapping_from_url(UUID_MAPPING_URL)
85
+
86
+ # Complete mapping based on your provided field names and JSON structure
87
+ FLATTENED_FIELD_MAPPING = {
88
+ "Nom du site à auditer": "data_modules_food_8_questions_companyName_answer",
89
+ "N° COID du portail": "data_modules_food_8_questions_companyCoid_answer",
90
+ "Code GLN": "data_modules_food_8_questions_companyGln_answer_0_rootQuestions_companyGlnNumber_answer",
91
+ "Rue": "data_modules_food_8_questions_companyStreetNo_answer",
92
+ "Code postal": "data_modules_food_8_questions_companyZip_answer",
93
+ "Nom de la ville": "data_modules_food_8_questions_companyCity_answer",
94
+ "Pays": "data_modules_food_8_questions_companyCountry_answer",
95
+ "Téléphone": "data_modules_food_8_questions_companyTelephone_answer",
96
+ "Latitude": "data_modules_food_8_questions_companyGpsLatitude_answer",
97
+ "Longitude": "data_modules_food_8_questions_companyGpsLongitude_answer",
98
+ "Email": "data_modules_food_8_questions_companyEmail_answer",
99
+ "Nom du siège social": "data_modules_food_8_questions_headquartersName_answer",
100
+ "Rue (siège social)": "data_modules_food_8_questions_headquartersStreetNo_answer",
101
+ "Nom de la ville (siège social)": "data_modules_food_8_questions_headquartersCity_answer",
102
+ "Code postal (siège social)": "data_modules_food_8_questions_headquartersZip_answer",
103
+ "Pays (siège social)": "data_modules_food_8_questions_headquartersCountry_answer",
104
+ "Téléphone (siège social)": "data_modules_food_8_questions_headquartersTelephone_answer",
105
+ "Surface couverte de l'entreprise (m²)": "data_modules_food_8_questions_productionAreaSize_answer",
106
+ "Nombre de bâtiments": "data_modules_food_8_questions_numberOfBuildings_answer",
107
+ "Nombre de lignes de production": "data_modules_food_8_questions_numberOfProductionLines_answer",
108
+ "Nombre d'étages": "data_modules_food_8_questions_numberOfFloors_answer",
109
+ "Nombre maximum d'employés dans l'année, au pic de production": "data_modules_food_8_questions_numberOfEmployeesForTimeCalculation_answer",
110
+ "Langue parlée et écrite sur le site": "data_modules_food_8_questions_workingLanguage_answer",
111
+ "Périmètre de l'audit": "data_modules_food_8_questions_scopeCertificateScopeDescription_en_answer",
112
+ "Process et activités": "data_modules_food_8_questions_scopeProductGroupsDescription_answer",
113
+ "Activité saisonnière ? (O/N)": "data_modules_food_8_questions_seasonalProduction_answer",
114
+ "Une partie du procédé de fabrication est-elle sous traitée? (OUI/NON)": "data_modules_food_8_questions_partlyOutsourcedProcesses_answer",
115
+ "Si oui lister les procédés sous-traités": "data_modules_food_8_questions_partlyOutsourcedProcessesDescription_answer",
116
+ "Avez-vous des produits totalement sous-traités? (OUI/NON)": "data_modules_food_8_questions_fullyOutsourcedProducts_answer",
117
+ "Si oui, lister les produits totalement sous-traités": "data_modules_food_8_questions_fullyOutsourcedProductsDescription_answer",
118
+ "Avez-vous des produits de négoce? (OUI/NON)": "data_modules_food_8_questions_tradedProductsBrokerActivity_answer",
119
+ "Si oui, lister les produits de négoce": "data_modules_food_8_questions_tradedProductsBrokerActivityDescription_answer",
120
+ "Produits à exclure du champ d'audit (OUI/NON)": "data_modules_food_8_questions_exclusions_answer",
121
+ "Préciser les produits à exclure": "data_modules_food_8_questions_exclusionsDescription_answer"
122
+ }
123
+
124
+ # Streamlit app
125
+ st.sidebar.title("Menu de Navigation")
126
+ option = st.sidebar.radio("Choisissez une option:", ["Extraction des données", "Exigences de la checklist", "Modification des données", "Exportation", "Plan d'actions"])
127
+
128
+ st.title("IFS NEO Form Data Extractor")
129
+
130
+ # Step 1: Upload the JSON (.ifs) file
131
+ uploaded_json_file = st.file_uploader("Charger le fichier IFS de NEO", type="ifs")
132
+
133
+ if uploaded_json_file:
134
+ try:
135
+ # Step 2: Load the uploaded JSON file
136
+ json_data = json.load(uploaded_json_file)
137
+
138
+ # Step 3: Flatten the JSON data
139
+ flattened_json_data_safe = flatten_json_safe(json_data)
140
+
141
+ if option == "Extraction des données":
142
+ st.subheader("Champs disponibles pour l'extraction")
143
+ select_all = st.checkbox("Sélectionner tous les champs")
144
+ if select_all:
145
+ selected_fields = list(FLATTENED_FIELD_MAPPING.keys())
146
+ else:
147
+ selected_fields = st.multiselect("Sélectionnez les champs que vous souhaitez extraire", list(FLATTENED_FIELD_MAPPING.keys()))
148
+ if selected_fields:
149
+ # Step 4: Extract the required data based on the selected fields
150
+ extracted_data = extract_from_flattened(flattened_json_data_safe, FLATTENED_FIELD_MAPPING, selected_fields)
151
+
152
+ # Step 5: Display the extracted data using Streamlit widgets for real editing
153
+ st.subheader("Données extraites")
154
+ edit_mode = st.checkbox("Modifier les données")
155
+ updated_data = extracted_data.copy()
156
+
157
+ if edit_mode:
158
+ for field, value in extracted_data.items():
159
+ if field in ["Périmètre de l'audit", "Process et activités", "Si oui lister les procédés sous-traités", "Si oui, lister les produits totalement sous-traités", "Si oui, lister les produits de négoce", "Préciser les produits à exclure"]:
160
+ updated_data[field] = st.text_area(f"{field}", value=value, height=150)
161
+ else:
162
+ updated_data[field] = st.text_input(f"{field}", value=value)
163
+ else:
164
+ # Display in read-only table format
165
+ apply_table_css()
166
+ table_html = "<table><thead><tr><th>Field</th><th>Value</th></tr></thead><tbody>"
167
+ for field, value in extracted_data.items():
168
+ table_html += f"<tr><td>{field}</td><td>{value}</td></tr>"
169
+ table_html += "</tbody></table>"
170
+ st.markdown(table_html, unsafe_allow_html=True)
171
+
172
+ # Step 6: Option to download the extracted data as an Excel file with formatting and COID in the name
173
+ df = pd.DataFrame(list(updated_data.items()), columns=["Field", "Value"])
174
+
175
+ # Extract the COID number to use in the file name
176
+ numero_coid = updated_data.get("N° COID du portail", "inconnu")
177
+
178
+ # Create the Excel file with column formatting
179
+ output = BytesIO()
180
+
181
+ # Create Excel writer and adjust column widths
182
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
183
+ df.to_excel(writer, index=False, sheet_name="Données extraites")
184
+
185
+ # Access the worksheet to modify the formatting
186
+ worksheet = writer.sheets["Données extraites"]
187
+
188
+ # Adjust the width of each column based on the longest entry
189
+ for col in worksheet.columns:
190
+ max_length = max(len(str(cell.value)) for cell in col)
191
+ col_letter = col[0].column_letter # Get the column letter
192
+ worksheet.column_dimensions[col_letter].width = max_length + 5 # Adjust column width
193
+
194
+ # Reset the position of the output to the start
195
+ output.seek(0)
196
+
197
+ # Provide the download button with the COID number in the filename
198
+ st.download_button(
199
+ label="Télécharger le fichier Excel",
200
+ data=output,
201
+ file_name=f'extraction_{numero_coid}.xlsx',
202
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
203
+ )
204
+
205
+ elif option == "Exigences de la checklist":
206
+ st.subheader("Exigences de la checklist")
207
+ if not UUID_MAPPING_DF.empty:
208
+ # Filtering options with linked filtering
209
+ chapitre_options = ["Tous"] + sorted(UUID_MAPPING_DF['Chapitre'].dropna().unique())
210
+ chapitre_filter = st.selectbox("Filtrer par Chapitre", options=chapitre_options)
211
+ filtered_df = UUID_MAPPING_DF
212
+
213
+ if chapitre_filter != "Tous":
214
+ filtered_df = filtered_df[filtered_df['Chapitre'] == chapitre_filter]
215
+ theme_options = ["Tous"] + sorted(filtered_df['Theme'].dropna().unique())
216
+ else:
217
+ theme_options = ["Tous"] + sorted(UUID_MAPPING_DF['Theme'].dropna().unique())
218
+ theme_filter = st.selectbox("Filtrer par Thème", options=theme_options)
219
+
220
+ if theme_filter != "Tous":
221
+ filtered_df = filtered_df[filtered_df['Theme'] == theme_filter]
222
+ sstheme_options = ["Tous"] + sorted(filtered_df['SSTheme'].dropna().unique())
223
+ else:
224
+ sstheme_options = ["Tous"] + sorted(UUID_MAPPING_DF['SSTheme'].dropna().unique())
225
+ sstheme_filter = st.selectbox("Filtrer par Sous-Thème", options=sstheme_options)
226
+
227
+ if sstheme_filter != "Tous":
228
+ filtered_df = filtered_df[filtered_df['SSTheme'] == sstheme_filter]
229
+
230
+ # Extracting checklist requirements from flattened JSON data
231
+ checklist_requirements = []
232
+ for _, row in filtered_df.iterrows():
233
+ key = row['Num']
234
+ uuid = row['UUID']
235
+ prefix = f"data_modules_food_8_checklists_checklistFood8_resultScorings_{uuid}"
236
+ explanation_text = flattened_json_data_safe.get(f"{prefix}_answers_englishExplanationText", "N/A")
237
+ detailed_explanation = flattened_json_data_safe.get(f"{prefix}_answers_explanationText", "N/A")
238
+ score_label = flattened_json_data_safe.get(f"{prefix}_score_label", "N/A")
239
+ response = flattened_json_data_safe.get(f"{prefix}_answers_fieldAnswers", "N/A")
240
+ checklist_requirements.append({
241
+ "Num": key,
242
+ "Explanation": explanation_text,
243
+ "Detailed Explanation": detailed_explanation,
244
+ "Score": score_label,
245
+ "Response": response
246
+ })
247
+
248
+ # Convert to filtered table display
249
+ apply_table_css()
250
+ table_html = "<table><thead><tr><th>Numéro d'exigence</th><th>Explication</th><th>Explication Détaillée</th><th>Note</th><th>Réponse</th></tr></thead><tbody>"
251
+ for req in checklist_requirements:
252
+ table_html += f"<tr><td>{req['Num']}</td><td>{req['Explanation']}</td><td>{req['Detailed Explanation']}</td><td>{req['Score']}</td><td>{req['Response']}</td></tr>"
253
+ table_html += "</tbody></table>"
254
+ st.markdown(table_html, unsafe_allow_html=True)
255
+ else:
256
+ st.error("Impossible de charger les données des UUID. Veuillez vérifier l'URL.")
257
+
258
+ except json.JSONDecodeError:
259
+ st.error("Erreur lors du décodage du fichier JSON. Veuillez vous assurer qu'il est au format correct.")
260
+ else:
261
+ st.write("Le fichier de NEO doit être un (.ifs)")