MMOON commited on
Commit
c2fbcdb
·
verified ·
1 Parent(s): 2a80c18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +228 -113
app.py CHANGED
@@ -1,8 +1,11 @@
1
  import logging
2
- from datetime import datetime
 
 
3
  import gradio as gr
4
  import pandas as pd
5
  import requests
 
6
  from tenacity import retry, stop_after_attempt, wait_fixed
7
  import plotly.express as px
8
 
@@ -13,148 +16,260 @@ logging.basicConfig(
13
  )
14
  logger = logging.getLogger(__name__)
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  class PesticideDataFetcher:
17
  BASE_URL = "https://api.datalake.sante.service.ec.europa.eu/sante/pesticides"
18
  HEADERS = {
19
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
 
 
20
  }
21
 
22
  def __init__(self):
23
- self.session = requests.Session()
24
- self.session.headers.update(self.HEADERS)
25
- self.substance_cache = {}
26
- self.product_cache = {}
27
- self.preload_substances()
 
 
 
 
 
28
 
29
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
30
- def fetch_data(self, url: str) -> dict:
31
  try:
32
  response = self.session.get(url, timeout=10)
33
  response.raise_for_status()
34
- return response.json()
 
 
35
  except requests.RequestException as e:
36
- logger.error(f"Échec de la requête {url}: {e}")
37
  return {"error": str(e)}
38
 
39
- def preload_substances(self):
40
  url = f"{self.BASE_URL}/active_substances?format=json&api-version=v2.0"
41
  while url:
42
- data = self.fetch_data(url)
43
- for item in data.get("value", []):
44
- self.substance_cache[item["substance_id"]] = {
45
- "name": item["substance_name"],
46
- "status": item.get("substance_status"),
47
- "approval_date": item.get("approval_date"),
48
- "expiry_date": item.get("expiry_date")
49
- }
50
- url = data.get("nextLink")
51
- logger.info(f"Cache substances prérempli avec {len(self.substance_cache)} entrées")
52
-
53
- def get_substance_name(self, substance_id: int) -> str:
54
- return self.substance_cache.get(substance_id, {}).get("name", f"Substance {substance_id}")
55
-
56
- def get_product_mrls(self, product_id: int) -> list:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  url = f"{self.BASE_URL}/pesticide_residues_mrls?format=json&product_id={product_id}&api-version=v2.0"
58
- return self.fetch_data(url).get("value", [])
 
59
 
60
- class PesticideApp:
61
  def __init__(self):
62
  self.fetcher = PesticideDataFetcher()
63
- self.product_list = self._get_products()
64
- self.substance_list = self._get_substances()
 
 
 
65
 
66
- def _get_products(self) -> dict:
67
- if not self.fetcher.product_cache:
68
- url = f"{self.fetcher.BASE_URL}/pesticide_residues_products?format=json&language=FR&api-version=v2.0"
69
- data = self.fetcher.fetch_data(url)
70
- self.fetcher.product_cache = {p['product_name']: p['product_id'] for p in data['value']}
71
- return self.fetcher.product_cache
 
 
 
72
 
73
- def _get_substances(self) -> list:
74
- return [s['name'] for s in self.fetcher.substance_cache.values()]
75
-
76
- def format_date(self, date_str: str) -> str:
77
- try:
78
- return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S").strftime("%d/%m/%Y")
79
- except (ValueError, TypeError):
80
- return "Date non disponible"
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- def search_products(self, query: str) -> pd.DataFrame:
83
- filtered = {k: v for k, v in self.product_list.items() if query.lower() in k.lower()}
84
- return pd.DataFrame(list(filtered.items()), columns=["Produit", "ID"])
85
 
86
- def get_product_details(self, product_name: str) -> pd.DataFrame:
87
- product_id = self.product_list.get(product_name)
 
 
88
  if not product_id:
89
- return pd.DataFrame([{"erreur": "Produit non trouvé"}])
90
 
91
- mrls = self.fetcher.get_product_mrls(product_id)
92
- data = []
93
- for mrl in mrls:
94
- substance_id = mrl["pesticide_residue_id"]
95
- substance_info = self.fetcher.substance_cache.get(substance_id, {})
96
-
97
- data.append({
98
- "Substance": self.fetcher.get_substance_name(substance_id),
99
- "Valeur LMR": mrl.get("mrl_value", "N/C"),
100
- "Date d'effet": self.format_date(mrl.get("entry_into_force_date")),
101
- "Statut": substance_info.get("status", "N/C"),
102
- "Date d'approbation": self.format_date(substance_info.get("approval_date"))
103
- })
104
- return pd.DataFrame(data)
105
-
106
- def create_ui(self) -> gr.Blocks:
107
- with gr.Blocks(theme=gr.themes.Default(), title="Base de Données Pesticides UE") as ui:
108
- gr.HTML("""
109
- <div style="text-align: center; padding: 20px; background: #006633; color: white;">
110
- <h1>;base de Données des Pesticides</h1>
111
- <p>Consultez les LMR et informations sur les substances actives</p>
112
- </div>
113
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- with gr.Tab("Recherche par Produit"):
116
- product_search = gr.Dropdown(
117
- list(self.product_list.keys()),
118
- label="Sélectionnez un produit",
119
- info="Commencez à taper pour filtrer"
120
- )
121
- product_results = gr.Dataframe(max_rows=20, interactive=False)
122
- product_search.change(
123
- self.get_product_details,
124
- inputs=product_search,
125
- outputs=product_results
126
- )
 
 
 
 
 
 
 
127
 
128
- with gr.Tab("Recherche par Substance"):
129
- substance_search = gr.Textbox(
130
- placeholder="Rechercher une substance...",
131
- label="Nom de la substance"
132
- )
133
- substance_results = gr.Dataframe(max_rows=10, interactive=False)
134
- substance_search.submit(
135
- self.search_substances,
136
- inputs=substance_search,
137
- outputs=substance_results
138
- )
139
-
140
- gr.Markdown("""
141
- **Légende**
142
- - LMR : Limite Maximale de Résidus
143
- - Date d'effet : Date d'entrée en vigueur
144
- - Statut : Approbation/Expiration
145
- """)
146
-
147
- gr.Button("Actualiser les données", variant="secondary").click(
148
- lambda: self.fetcher.preload_substances()
149
  )
150
-
151
- return ui
152
 
153
  def main():
154
- app = PesticideApp()
155
- ui = app.create_ui()
156
- ui.launch(share=True)
157
 
158
  if __name__ == "__main__":
159
- main()
160
-
 
1
  import logging
2
+ from concurrent.futures import ThreadPoolExecutor
3
+ from datetime import datetime, timedelta
4
+ from typing import Dict, List, Optional
5
  import gradio as gr
6
  import pandas as pd
7
  import requests
8
+ from dataclasses import dataclass
9
  from tenacity import retry, stop_after_attempt, wait_fixed
10
  import plotly.express as px
11
 
 
16
  )
17
  logger = logging.getLogger(__name__)
18
 
19
+ @dataclass
20
+ class SubstanceDetails:
21
+ name: str
22
+ status: Optional[str] = None
23
+ approval_date: Optional[str] = None
24
+ expiry_date: Optional[str] = None
25
+
26
+ @dataclass
27
+ class PesticideRecord:
28
+ substance_name: str
29
+ mrl_value: float
30
+ entry_into_force_date: str
31
+ regulation_number: str
32
+ regulation_url: str
33
+ modification_date: Optional[str] = None
34
+ substance_status: Optional[str] = None
35
+ approval_date: Optional[str] = None
36
+ expiry_date: Optional[str] = None
37
+
38
  class PesticideDataFetcher:
39
  BASE_URL = "https://api.datalake.sante.service.ec.europa.eu/sante/pesticides"
40
  HEADERS = {
41
+ 'Content-Type': 'application/json',
42
+ 'Cache-Control': 'no-cache',
43
+ 'User-Agent': 'Mozilla/5.0'
44
  }
45
 
46
  def __init__(self):
47
+ self.session = self._create_session()
48
+ self._substance_cache: Dict[int, SubstanceDetails] = {}
49
+ self._product_cache = {}
50
+ self.preload_substance_names()
51
+
52
+ def _create_session(self):
53
+ session = requests.Session()
54
+ for header, value in self.HEADERS.items():
55
+ session.headers[header] = value
56
+ return session
57
 
58
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
59
+ def fetch_data(self, url: str) -> Dict:
60
  try:
61
  response = self.session.get(url, timeout=10)
62
  response.raise_for_status()
63
+ data = response.json()
64
+ logger.info(f"Fetched data from {url}: {str(data)[:200]}...")
65
+ return data
66
  except requests.RequestException as e:
67
+ logger.error(f"Failed to fetch data from {url}: {str(e)}", exc_info=True)
68
  return {"error": str(e)}
69
 
70
+ def preload_substance_names(self):
71
  url = f"{self.BASE_URL}/active_substances?format=json&api-version=v2.0"
72
  while url:
73
+ response = self.fetch_data(url)
74
+ if not response.get("value"):
75
+ break
76
+ for item in response["value"]:
77
+ substance_id = item.get("substanceId")
78
+ if substance_id:
79
+ self._substance_cache[substance_id] = SubstanceDetails(
80
+ name=item.get("substanceName", "Nom non trouvé"),
81
+ status=item.get("substanceStatus"),
82
+ approval_date=item.get("approvalDate"),
83
+ expiry_date=item.get("expiryDate")
84
+ )
85
+ url = response.get("nextLink")
86
+ logger.info(f"Préchargé {len(self._substance_cache)} substances.")
87
+
88
+ def get_substance_name_by_id(self, substance_id: int) -> str:
89
+ if substance_id in self._substance_cache:
90
+ return self._substance_cache[substance_id].name
91
+ return f"Substance {substance_id}"
92
+
93
+ def get_active_substance_details(self, substance_name: str) -> Optional[SubstanceDetails]:
94
+ for details in self._substance_cache.values():
95
+ if details.name.lower() == substance_name.lower():
96
+ return details
97
+ return None
98
+
99
+ def get_substance_details(self, pesticide_residue_id: int) -> SubstanceDetails:
100
+ if pesticide_residue_id in self._substance_cache:
101
+ return self._substance_cache[pesticide_residue_id]
102
+ return SubstanceDetails(name=f"Substance {pesticide_residue_id}")
103
+
104
+ def get_all_substances(self) -> List[str]:
105
+ return sorted({details.name for details in self._substance_cache.values()})
106
+
107
+ def get_products(self) -> List[Dict]:
108
+ if self._product_cache:
109
+ return self._product_cache
110
+ all_products = []
111
+ base_url = f"{self.BASE_URL}/pesticide_residues_products?format=json&language=FR&api-version=v2.0"
112
+ url = base_url
113
+ while url:
114
+ response = self.fetch_data(url)
115
+ all_products.extend(response.get("value", []))
116
+ url = response.get("nextLink")
117
+ self._product_cache = all_products
118
+ logger.info(f"Récupéré {len(all_products)} produits au total")
119
+ return all_products
120
+
121
+ def get_mrls(self, product_id: int) -> List[Dict]:
122
  url = f"{self.BASE_URL}/pesticide_residues_mrls?format=json&product_id={product_id}&api-version=v2.0"
123
+ response = self.fetch_data(url)
124
+ return response.get("value", [])
125
 
126
+ class PesticideInterface:
127
  def __init__(self):
128
  self.fetcher = PesticideDataFetcher()
129
+ self.products = self.fetcher.get_products()
130
+ self.product_choices = {p['productName']: p['productId'] for p in self.products}
131
+ self.substances = self.fetcher.get_all_substances()
132
+ self._cache = {}
133
+ logger.info(f"Initialisé avec {len(self.product_choices)} produits et {len(self.substances)} substances.")
134
 
135
+ def parse_date(self, date_str: str) -> Optional[str]:
136
+ if not date_str:
137
+ return None
138
+ for fmt in ("%Y-%m-%d", "%d/%m/%Y", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"):
139
+ try:
140
+ return datetime.strptime(date_str, fmt).strftime("%Y-%m-%d")
141
+ except ValueError:
142
+ continue
143
+ return None
144
 
145
+ def filter_by_period(self, data: List[Dict], period: str) -> List[Dict]:
146
+ today = datetime.now()
147
+ start_date = {
148
+ "Dernière semaine": today - timedelta(days=7),
149
+ "Dernier mois": today - timedelta(days=30),
150
+ "Prochains 6 mois": today + timedelta(days=180)
151
+ }.get(period)
152
+
153
+ if period == "Toutes les dates":
154
+ return data
155
+ filtered = []
156
+ for item in data:
157
+ date_str = item.get("entryIntoForceDate") or item.get("modificationDate")
158
+ parsed = self.parse_date(date_str)
159
+ if parsed:
160
+ item_date = datetime.strptime(parsed, "%Y-%m-%d")
161
+ if (period == "Prochains 6 mois" and item_date >= today) or (period != "Prochains 6 mois" and item_date >= start_date):
162
+ item["parsed_date"] = parsed
163
+ filtered.append(item)
164
+ return filtered
165
 
166
+ def format_regulation_link(self, regulation_url: str, regulation_number: str) -> str:
167
+ return f'<a href="{regulation_url}" target="_blank">{regulation_number}</a>' if regulation_url else regulation_number
 
168
 
169
+ def get_product_details(self, product_name: str, period: str, show_only_changes: bool) -> pd.DataFrame:
170
+ if not product_name:
171
+ return pd.DataFrame({"Message": ["Sélectionnez un produit"]})
172
+ product_id = self.product_choices.get(product_name)
173
  if not product_id:
174
+ return pd.DataFrame({"Message": ["Produit non trouvé"]})
175
 
176
+ cache_key = f"{product_id}_{period}_{show_only_changes}"
177
+ if cache_key in self._cache:
178
+ return self._cache[cache_key]
179
+
180
+ mrls = self.fetcher.get_mrls(product_id)
181
+ mrls = self.filter_by_period(mrls, period)
182
+
183
+ if not mrls:
184
+ return pd.DataFrame({"Message": ["Aucune donnée"]})
185
+
186
+ processed = []
187
+ with ThreadPoolExecutor(max_workers=10) as executor:
188
+ futures = {executor.submit(self.fetcher.get_substance_name_by_id, mrl["pesticideResidueId"]): mrl for mrl in mrls}
189
+ for future in futures:
190
+ mrl = futures[future]
191
+ try:
192
+ substance_name = future.result()
193
+ mrl_value = mrl.get("mrlValue", "")
194
+ formatted_mrl = f"{mrl_value}*" if isinstance(mrl_value, (int, float)) and str(mrl_value).endswith('*') else str(mrl_value)
195
+
196
+ processed.append({
197
+ "Substance": substance_name,
198
+ "Valeur LMR": formatted_mrl,
199
+ "Date d'application": self.parse_date(mrl.get("entryIntoForceDate")),
200
+ "Date de modification": self.parse_date(mrl.get("modificationDate")),
201
+ "Règlement": self.format_regulation_link(
202
+ mrl.get("regulationUrl", ""),
203
+ mrl.get("regulationNumber") or mrl.get("regulationReference")
204
+ ),
205
+ "Statut": self.parse_date(self.fetcher.get_active_substance_details(substance_name).status),
206
+ "Date d'approbation": self.parse_date(mrl.get("approvalDate")),
207
+ "Date d'expiration": self.parse_date(mrl.get("expiryDate"))
208
+ })
209
+ except Exception as e:
210
+ logger.error(f"Erreur: {str(e)}")
211
+
212
+ df = pd.DataFrame(processed)
213
+ if show_only_changes:
214
+ df = df[df["Date de modification"].notna()]
215
+ df = df.sort_values("Date d'application", ascending=False)
216
+ self._cache[cache_key] = df
217
+ return df
218
+
219
+ def create_graph(self, df: pd.DataFrame) -> gr.Plot:
220
+ return px.scatter(df, x='Date d\'application', y='Valeur LMR', color='Substance', title='LMR par date')
221
+
222
+ def export_data(self, df: pd.DataFrame) -> str:
223
+ df.to_csv("export.csv", index=False)
224
+ return "export.csv"
225
+
226
+ def search_substances(self, query: str) -> pd.DataFrame:
227
+ if not query:
228
+ return pd.DataFrame({"Message": ["Entrez une requête"]})
229
+ matches = [s for s in self.substances if query.lower() in s.lower()]
230
+ return pd.DataFrame(matches, columns=["Substance"]) if matches else pd.DataFrame({"Message": ["Aucun résultat"]})
231
+
232
+ def create_interface(self) -> gr.Blocks:
233
+ with gr.Blocks() as interface:
234
+ gr.Markdown("# 🌿 Base de données des pesticides UE")
235
 
236
+ with gr.Tabs():
237
+ with gr.Tab("Recherche par Produit"):
238
+ product = gr.Dropdown(sorted(self.product_choices.keys()), label="Produit")
239
+ period = gr.Radio(["Dernière semaine", "Dernier mois", "Prochains 6 mois", "Toutes les dates"], label="Période")
240
+ show_changes = gr.Checkbox("Afficher les modifications")
241
+
242
+ btn = gr.Button("Analyser")
243
+ table = gr.Dataframe(max_height=500)
244
+ graph = gr.Plot()
245
+ export = gr.Button("Exporter")
246
+ file = gr.File()
247
+
248
+ btn.click(
249
+ self.get_product_details,
250
+ inputs=[product, period, show_changes],
251
+ outputs=table
252
+ )
253
+ table.change(self.create_graph, inputs=table, outputs=graph)
254
+ export.click(self.export_data, inputs=table, outputs=file)
255
 
256
+ with gr.Tab("Recherche par Substance"):
257
+ search = gr.Textbox()
258
+ btn_search = gr.Button("Rechercher")
259
+ df_substances = gr.Dataframe(max_height=300)
260
+ btn_search.click(self.search_substances, inputs=search, outputs=df_substances)
261
+
262
+ gr.Markdown(
263
+ "**Légende**\n"
264
+ "- LMR : Limite Maximale de Résidus\n"
265
+ "- Date d'application : Date d'entrée en vigueur\n"
266
+ "- Règlement : Cliquez pour ouvrir la source"
 
 
 
 
 
 
 
 
 
 
267
  )
268
+ return interface
 
269
 
270
  def main():
271
+ app = PesticideInterface().create_interface()
272
+ app.launch(share=True)
 
273
 
274
  if __name__ == "__main__":
275
+ main()