MMOON commited on
Commit
e92be3f
·
verified ·
1 Parent(s): c2fbcdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -198
app.py CHANGED
@@ -9,7 +9,6 @@ from dataclasses import dataclass
9
  from tenacity import retry, stop_after_attempt, wait_fixed
10
  import plotly.express as px
11
 
12
- # Configuration du logging
13
  logging.basicConfig(
14
  level=logging.INFO,
15
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
@@ -23,57 +22,35 @@ class SubstanceDetails:
23
  approval_date: Optional[str] = None
24
  expiry_date: Optional[str] = None
25
 
26
- @dataclass
27
- class PesticideRecord:
28
- substance_name: str
29
- mrl_value: float
30
- entry_into_force_date: str
31
- regulation_number: str
32
- regulation_url: str
33
- modification_date: Optional[str] = None
34
- substance_status: Optional[str] = None
35
- approval_date: Optional[str] = None
36
- expiry_date: Optional[str] = None
37
-
38
  class PesticideDataFetcher:
39
  BASE_URL = "https://api.datalake.sante.service.ec.europa.eu/sante/pesticides"
40
  HEADERS = {
41
- 'Content-Type': 'application/json',
42
- 'Cache-Control': 'no-cache',
43
- 'User-Agent': 'Mozilla/5.0'
44
  }
45
 
46
  def __init__(self):
47
- self.session = self._create_session()
 
48
  self._substance_cache: Dict[int, SubstanceDetails] = {}
49
- self._product_cache = {}
50
  self.preload_substance_names()
51
 
52
- def _create_session(self):
53
- session = requests.Session()
54
- for header, value in self.HEADERS.items():
55
- session.headers[header] = value
56
- return session
57
-
58
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
59
  def fetch_data(self, url: str) -> Dict:
60
  try:
61
  response = self.session.get(url, timeout=10)
62
  response.raise_for_status()
63
- data = response.json()
64
- logger.info(f"Fetched data from {url}: {str(data)[:200]}...")
65
- return data
66
  except requests.RequestException as e:
67
- logger.error(f"Failed to fetch data from {url}: {str(e)}", exc_info=True)
68
  return {"error": str(e)}
69
 
70
  def preload_substance_names(self):
71
  url = f"{self.BASE_URL}/active_substances?format=json&api-version=v2.0"
72
  while url:
73
- response = self.fetch_data(url)
74
- if not response.get("value"):
75
- break
76
- for item in response["value"]:
77
  substance_id = item.get("substanceId")
78
  if substance_id:
79
  self._substance_cache[substance_id] = SubstanceDetails(
@@ -82,194 +59,99 @@ class PesticideDataFetcher:
82
  approval_date=item.get("approvalDate"),
83
  expiry_date=item.get("expiryDate")
84
  )
85
- url = response.get("nextLink")
86
- logger.info(f"Préchargé {len(self._substance_cache)} substances.")
87
-
88
- def get_substance_name_by_id(self, substance_id: int) -> str:
89
- if substance_id in self._substance_cache:
90
- return self._substance_cache[substance_id].name
91
- return f"Substance {substance_id}"
92
-
93
- def get_active_substance_details(self, substance_name: str) -> Optional[SubstanceDetails]:
94
- for details in self._substance_cache.values():
95
- if details.name.lower() == substance_name.lower():
96
- return details
97
- return None
98
-
99
- def get_substance_details(self, pesticide_residue_id: int) -> SubstanceDetails:
100
- if pesticide_residue_id in self._substance_cache:
101
- return self._substance_cache[pesticide_residue_id]
102
- return SubstanceDetails(name=f"Substance {pesticide_residue_id}")
103
-
104
- def get_all_substances(self) -> List[str]:
105
- return sorted({details.name for details in self._substance_cache.values()})
106
 
107
- def get_products(self) -> List[Dict]:
108
  if self._product_cache:
109
  return self._product_cache
110
- all_products = []
111
- base_url = f"{self.BASE_URL}/pesticide_residues_products?format=json&language=FR&api-version=v2.0"
112
- url = base_url
113
- while url:
114
- response = self.fetch_data(url)
115
- all_products.extend(response.get("value", []))
116
- url = response.get("nextLink")
117
- self._product_cache = all_products
118
- logger.info(f"Récupéré {len(all_products)} produits au total")
119
- return all_products
120
 
121
  def get_mrls(self, product_id: int) -> List[Dict]:
122
  url = f"{self.BASE_URL}/pesticide_residues_mrls?format=json&product_id={product_id}&api-version=v2.0"
123
- response = self.fetch_data(url)
124
- return response.get("value", [])
125
 
126
- class PesticideInterface:
 
 
 
 
 
127
  def __init__(self):
128
  self.fetcher = PesticideDataFetcher()
129
- self.products = self.fetcher.get_products()
130
- self.product_choices = {p['productName']: p['productId'] for p in self.products}
131
- self.substances = self.fetcher.get_all_substances()
132
- self._cache = {}
133
- logger.info(f"Initialisé avec {len(self.product_choices)} produits et {len(self.substances)} substances.")
134
-
135
- def parse_date(self, date_str: str) -> Optional[str]:
136
- if not date_str:
137
- return None
138
- for fmt in ("%Y-%m-%d", "%d/%m/%Y", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"):
139
- try:
140
- return datetime.strptime(date_str, fmt).strftime("%Y-%m-%d")
141
- except ValueError:
142
- continue
143
- return None
144
-
145
- def filter_by_period(self, data: List[Dict], period: str) -> List[Dict]:
146
- today = datetime.now()
147
- start_date = {
148
- "Dernière semaine": today - timedelta(days=7),
149
- "Dernier mois": today - timedelta(days=30),
150
- "Prochains 6 mois": today + timedelta(days=180)
151
- }.get(period)
152
-
153
- if period == "Toutes les dates":
154
- return data
155
- filtered = []
156
- for item in data:
157
- date_str = item.get("entryIntoForceDate") or item.get("modificationDate")
158
- parsed = self.parse_date(date_str)
159
- if parsed:
160
- item_date = datetime.strptime(parsed, "%Y-%m-%d")
161
- if (period == "Prochains 6 mois" and item_date >= today) or (period != "Prochains 6 mois" and item_date >= start_date):
162
- item["parsed_date"] = parsed
163
- filtered.append(item)
164
- return filtered
165
 
166
- def format_regulation_link(self, regulation_url: str, regulation_number: str) -> str:
167
- return f'<a href="{regulation_url}" target="_blank">{regulation_number}</a>' if regulation_url else regulation_number
 
 
 
168
 
169
- def get_product_details(self, product_name: str, period: str, show_only_changes: bool) -> pd.DataFrame:
170
- if not product_name:
171
- return pd.DataFrame({"Message": ["Sélectionnez un produit"]})
172
- product_id = self.product_choices.get(product_name)
173
  if not product_id:
174
- return pd.DataFrame({"Message": ["Produit non trouvé"]})
175
-
176
- cache_key = f"{product_id}_{period}_{show_only_changes}"
177
- if cache_key in self._cache:
178
- return self._cache[cache_key]
179
 
180
  mrls = self.fetcher.get_mrls(product_id)
181
- mrls = self.filter_by_period(mrls, period)
182
-
183
- if not mrls:
184
- return pd.DataFrame({"Message": ["Aucune donnée"]})
185
-
186
- processed = []
187
- with ThreadPoolExecutor(max_workers=10) as executor:
188
- futures = {executor.submit(self.fetcher.get_substance_name_by_id, mrl["pesticideResidueId"]): mrl for mrl in mrls}
189
- for future in futures:
190
- mrl = futures[future]
191
- try:
192
- substance_name = future.result()
193
- mrl_value = mrl.get("mrlValue", "")
194
- formatted_mrl = f"{mrl_value}*" if isinstance(mrl_value, (int, float)) and str(mrl_value).endswith('*') else str(mrl_value)
195
-
196
- processed.append({
197
- "Substance": substance_name,
198
- "Valeur LMR": formatted_mrl,
199
- "Date d'application": self.parse_date(mrl.get("entryIntoForceDate")),
200
- "Date de modification": self.parse_date(mrl.get("modificationDate")),
201
- "Règlement": self.format_regulation_link(
202
- mrl.get("regulationUrl", ""),
203
- mrl.get("regulationNumber") or mrl.get("regulationReference")
204
- ),
205
- "Statut": self.parse_date(self.fetcher.get_active_substance_details(substance_name).status),
206
- "Date d'approbation": self.parse_date(mrl.get("approvalDate")),
207
- "Date d'expiration": self.parse_date(mrl.get("expiryDate"))
208
- })
209
- except Exception as e:
210
- logger.error(f"Erreur: {str(e)}")
211
-
212
- df = pd.DataFrame(processed)
213
- if show_only_changes:
214
- df = df[df["Date de modification"].notna()]
215
- df = df.sort_values("Date d'application", ascending=False)
216
- self._cache[cache_key] = df
217
- return df
218
-
219
- def create_graph(self, df: pd.DataFrame) -> gr.Plot:
220
- return px.scatter(df, x='Date d\'application', y='Valeur LMR', color='Substance', title='LMR par date')
221
-
222
- def export_data(self, df: pd.DataFrame) -> str:
223
- df.to_csv("export.csv", index=False)
224
- return "export.csv"
225
 
226
  def search_substances(self, query: str) -> pd.DataFrame:
227
- if not query:
228
- return pd.DataFrame({"Message": ["Entrez une requête"]})
229
  matches = [s for s in self.substances if query.lower() in s.lower()]
230
- return pd.DataFrame(matches, columns=["Substance"]) if matches else pd.DataFrame({"Message": ["Aucun résultat"]})
231
-
232
- def create_interface(self) -> gr.Blocks:
233
- with gr.Blocks() as interface:
234
- gr.Markdown("# 🌿 Base de données des pesticides UE")
 
 
 
 
235
 
236
- with gr.Tabs():
237
- with gr.Tab("Recherche par Produit"):
238
- product = gr.Dropdown(sorted(self.product_choices.keys()), label="Produit")
239
- period = gr.Radio(["Dernière semaine", "Dernier mois", "Prochains 6 mois", "Toutes les dates"], label="Période")
240
- show_changes = gr.Checkbox("Afficher les modifications")
241
-
242
- btn = gr.Button("Analyser")
243
- table = gr.Dataframe(max_height=500)
244
- graph = gr.Plot()
245
- export = gr.Button("Exporter")
246
- file = gr.File()
247
-
248
- btn.click(
249
- self.get_product_details,
250
- inputs=[product, period, show_changes],
251
- outputs=table
252
- )
253
- table.change(self.create_graph, inputs=table, outputs=graph)
254
- export.click(self.export_data, inputs=table, outputs=file)
 
255
 
256
- with gr.Tab("Recherche par Substance"):
257
- search = gr.Textbox()
258
- btn_search = gr.Button("Rechercher")
259
- df_substances = gr.Dataframe(max_height=300)
260
- btn_search.click(self.search_substances, inputs=search, outputs=df_substances)
261
-
262
- gr.Markdown(
263
- "**Légende**\n"
264
- "- LMR : Limite Maximale de Résidus\n"
265
- "- Date d'application : Date d'entrée en vigueur\n"
266
- "- Règlement : Cliquez pour ouvrir la source"
267
  )
268
- return interface
269
 
270
  def main():
271
- app = PesticideInterface().create_interface()
272
- app.launch(share=True)
273
 
274
  if __name__ == "__main__":
275
  main()
 
9
  from tenacity import retry, stop_after_attempt, wait_fixed
10
  import plotly.express as px
11
 
 
12
  logging.basicConfig(
13
  level=logging.INFO,
14
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 
22
  approval_date: Optional[str] = None
23
  expiry_date: Optional[str] = None
24
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  class PesticideDataFetcher:
26
  BASE_URL = "https://api.datalake.sante.service.ec.europa.eu/sante/pesticides"
27
  HEADERS = {
28
+ 'User-Agent': 'Mozilla/5.0',
29
+ 'Accept': 'application/json'
 
30
  }
31
 
32
  def __init__(self):
33
+ self.session = requests.Session()
34
+ self.session.headers.update(self.HEADERS)
35
  self._substance_cache: Dict[int, SubstanceDetails] = {}
36
+ self._product_cache = []
37
  self.preload_substance_names()
38
 
 
 
 
 
 
 
39
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
40
  def fetch_data(self, url: str) -> Dict:
41
  try:
42
  response = self.session.get(url, timeout=10)
43
  response.raise_for_status()
44
+ return response.json()
 
 
45
  except requests.RequestException as e:
46
+ logger.error(f"Échec de la requête {url}: {e}")
47
  return {"error": str(e)}
48
 
49
  def preload_substance_names(self):
50
  url = f"{self.BASE_URL}/active_substances?format=json&api-version=v2.0"
51
  while url:
52
+ data = self.fetch_data(url)
53
+ for item in data.get("value", []):
 
 
54
  substance_id = item.get("substanceId")
55
  if substance_id:
56
  self._substance_cache[substance_id] = SubstanceDetails(
 
59
  approval_date=item.get("approvalDate"),
60
  expiry_date=item.get("expiryDate")
61
  )
62
+ url = data.get("nextLink")
63
+ logger.info(f"Cache prérempli avec {len(self._substance_cache)} substances")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ def get_product_list(self) -> List[Dict]:
66
  if self._product_cache:
67
  return self._product_cache
68
+ url = f"{self.BASE_URL}/pesticide_residues_products?format=json&language=FR&api-version=v2.0"
69
+ data = self.fetch_data(url)
70
+ self._product_cache = data.get("value", [])
71
+ logger.info(f"Produits récupérés: {len(self._product_cache)}")
72
+ return self._product_cache
 
 
 
 
 
73
 
74
  def get_mrls(self, product_id: int) -> List[Dict]:
75
  url = f"{self.BASE_URL}/pesticide_residues_mrls?format=json&product_id={product_id}&api-version=v2.0"
76
+ return self.fetch_data(url).get("value", [])
 
77
 
78
+ def get_substance_name(self, substance_id: int) -> str:
79
+ if substance_id in self._substance_cache:
80
+ return self._substance_cache[substance_id].name
81
+ return f"Substance {substance_id}"
82
+
83
+ class PesticideApp:
84
  def __init__(self):
85
  self.fetcher = PesticideDataFetcher()
86
+ self.product_list = {p['productName']: p['productId'] for p in self.fetcher.get_product_list()}
87
+ self.substances = [sd.name for sd in self.fetcher._substance_cache.values()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ def format_date(self, date_str: str) -> str:
90
+ try:
91
+ return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S").strftime("%d/%m/%Y")
92
+ except (ValueError, TypeError):
93
+ return "Date non disponible"
94
 
95
+ def get_product_details(self, product_name: str) -> pd.DataFrame:
96
+ product_id = self.product_list.get(product_name)
 
 
97
  if not product_id:
98
+ return pd.DataFrame([{"erreur": "Produit non trouvé"}])
 
 
 
 
99
 
100
  mrls = self.fetcher.get_mrls(product_id)
101
+ data = []
102
+ for mrl in mrls:
103
+ substance_id = mrl.get("pesticideResidueId")
104
+ substance_name = self.fetcher.get_substance_name(substance_id)
105
+
106
+ data.append({
107
+ "Substance": substance_name,
108
+ "Valeur LMR": mrl.get("mrlValue", "N/C"),
109
+ "Date d'effet": self.format_date(mrl.get("entryIntoForceDate")),
110
+ "Statut": self.fetcher._substance_cache.get(substance_id, {}).get("status", "N/C")
111
+ })
112
+ return pd.DataFrame(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  def search_substances(self, query: str) -> pd.DataFrame:
 
 
115
  matches = [s for s in self.substances if query.lower() in s.lower()]
116
+ return pd.DataFrame(matches, columns=["Substance"]) if matches else pd.DataFrame([{"Message": "Aucun résultat"}])
117
+
118
+ def create_ui(self) -> gr.Blocks:
119
+ with gr.Blocks(theme=gr.themes.Default()) as ui:
120
+ gr.HTML("""
121
+ <div style="text-align: center; padding: 20px; background: #006633; color: white;">
122
+ <h1>Base de Données des Pesticides UE</h1>
123
+ </div>
124
+ """)
125
 
126
+ with gr.Tab("Recherche par Produit"):
127
+ product = gr.Dropdown(
128
+ label="Sélectionnez un produit",
129
+ choices=sorted(self.product_list.keys()),
130
+ interactive=True
131
+ )
132
+ output = gr.Dataframe(
133
+ headers=["Substance", "Valeur LMR", "Date d'effet", "Statut"],
134
+ max_height=500,
135
+ overflow_scroll=True
136
+ )
137
+ product.change(self.get_product_details, product, output)
138
+
139
+ with gr.Tab("Recherche par Substance"):
140
+ substance_search = gr.Textbox(
141
+ label="Rechercher une substance",
142
+ placeholder="Entrez un nom de substance..."
143
+ )
144
+ substance_results = gr.Dataframe(max_height=300)
145
+ substance_search.submit(self.search_substances, substance_search, substance_results)
146
 
147
+ gr.Button("Actualiser les données", variant="secondary").click(
148
+ lambda: self.fetcher.preload_substance_names()
 
 
 
 
 
 
 
 
 
149
  )
150
+ return ui
151
 
152
  def main():
153
+ app = PesticideApp()
154
+ app.create_ui().launch(share=True)
155
 
156
  if __name__ == "__main__":
157
  main()