Eric2mangel commited on
Commit
8817849
·
verified ·
1 Parent(s): 44e2991

Update app.py

Browse files

Version fonctionnelle

Files changed (1) hide show
  1. app.py +85 -14
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import pandas as pd
2
  import duckdb
3
  import polars as pl
@@ -6,6 +7,7 @@ import polars as pl
6
  import time
7
  import os
8
  import matplotlib.pyplot as plt
 
9
  import tempfile
10
  from io import BytesIO
11
 
@@ -21,23 +23,56 @@ st.set_page_config(
21
 
22
  )
23
 
24
-
25
-
26
-
27
-
28
-
29
-
30
-
31
-
32
-
33
-
34
-
35
-
36
-
37
-
38
  # --- FONCTION DE CHARGEMENT TECHNIQUE ---
39
  def load_file_and_measure_time(file_path, library, file_ext, read_kwargs):
40
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  # --- FONCTION POUR CHARGER DEPUIS UN BUFFER UPLOADÉ ---
43
  def load_from_buffer(uploaded_file, library, file_ext, read_kwargs):
@@ -59,6 +94,12 @@ def load_from_buffer(uploaded_file, library, file_ext, read_kwargs):
59
  except Exception as e:
60
  return f"Erreur: {e}", 0
61
 
 
 
 
 
 
 
62
  if 'uploaded_buffer' not in st.session_state:
63
  st.session_state.uploaded_buffer = None
64
 
@@ -116,6 +157,15 @@ if uploaded_file is not None:
116
 
117
  # --- ACTIONS ET AFFICHAGE ---
118
  if st.session_state.target_file is not None:
 
 
 
 
 
 
 
 
 
119
  run_comparison = st.sidebar.button("Lancer la comparaison")
120
 
121
  if run_comparison:
@@ -134,9 +184,30 @@ if st.session_state.target_file is not None:
134
  load_time, row_count = load_file_and_measure_time(st.session_state.target_file, lib_key, st.session_state.file_ext, read_kwargs)
135
  results.append({'Librairie': lib_name, 'Temps de chargement (s)': load_time, 'Nombre de lignes': row_count})
136
 
 
137
 
138
  valid_counts = results_df[results_df['Nombre de lignes'] > 0]['Nombre de lignes']
139
  if not valid_counts.empty:
140
  st.markdown(f"**Nombre de lignes détectées :** **{int(valid_counts.iloc[0]):,}**".replace(',', ' '))
141
 
142
  chart_data = results_df[results_df['Temps de chargement (s)'].apply(lambda x: isinstance(x, (int, float)))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
  import pandas as pd
3
  import duckdb
4
  import polars as pl
 
7
  import time
8
  import os
9
  import matplotlib.pyplot as plt
10
+ import numpy as np
11
  import tempfile
12
  from io import BytesIO
13
 
 
23
 
24
  )
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # --- FONCTION DE CHARGEMENT TECHNIQUE ---
27
  def load_file_and_measure_time(file_path, library, file_ext, read_kwargs):
28
  try:
29
+ start_time = time.time()
30
+ df = None
31
+
32
+ # --- PARQUET ---
33
+ if file_ext == '.parquet':
34
+ if library == 'pandas':
35
+ df = pd.read_parquet(file_path)
36
+ elif library == 'pyarrow':
37
+ df = pd.read_parquet(file_path, engine='pyarrow')
38
+ elif library == 'duckdb':
39
+ con = duckdb.connect()
40
+ df = con.execute(f"SELECT * FROM read_parquet('{file_path}')").fetchdf()
41
+ con.close()
42
+ elif library == 'polars':
43
+ df = pl.read_parquet(file_path)
44
+
45
+ # --- EXCEL ---
46
+ elif file_ext in ['.xlsx', '.xls']:
47
+ sheet_idx = read_kwargs.get('sheet_idx', 0)
48
+ header = 0 if read_kwargs.get('header') else None
49
+ if library in ['pandas', 'pyarrow']:
50
+ df = pd.read_excel(file_path, sheet_name=sheet_idx, header=header)
51
+ elif library == 'duckdb':
52
+ df = pd.read_excel(file_path, sheet_name=sheet_idx, header=header)
53
+ elif library == 'polars':
54
+ df = pl.read_excel(file_path, sheet_id=sheet_idx + 1)
55
+
56
+ # --- CSV ---
57
+ else:
58
+ header_val = 0 if read_kwargs.get('header') else None
59
+ if library == 'pandas':
60
+ df = pd.read_csv(file_path, sep=None, engine='python', header=header_val)
61
+ elif library == 'pyarrow':
62
+ df = pd.read_csv(file_path, sep=None, engine='python', header=header_val, dtype_backend='pyarrow')
63
+ elif library == 'duckdb':
64
+ con = duckdb.connect()
65
+ header_flag = "TRUE" if read_kwargs.get('header') else "FALSE"
66
+ df = con.execute(f"SELECT * FROM read_csv_auto('{file_path}', HEADER={header_flag})").fetchdf()
67
+ con.close()
68
+ elif library == 'polars':
69
+ df = pl.read_csv(file_path, has_header=read_kwargs.get('header'))
70
+
71
+ end_time = time.time()
72
+ return end_time - start_time, len(df)
73
+ except Exception as e:
74
+ return f"Erreur: {e}", 0
75
+
76
 
77
  # --- FONCTION POUR CHARGER DEPUIS UN BUFFER UPLOADÉ ---
78
  def load_from_buffer(uploaded_file, library, file_ext, read_kwargs):
 
94
  except Exception as e:
95
  return f"Erreur: {e}", 0
96
 
97
+
98
+ # --- GESTION DU FICHIER CIBLE DANS LE SESSION STATE ---
99
+ if 'target_file' not in st.session_state:
100
+ st.session_state.target_file = None
101
+ if 'file_ext' not in st.session_state:
102
+ st.session_state.file_ext = None
103
  if 'uploaded_buffer' not in st.session_state:
104
  st.session_state.uploaded_buffer = None
105
 
 
157
 
158
  # --- ACTIONS ET AFFICHAGE ---
159
  if st.session_state.target_file is not None:
160
+ st.sidebar.success(f"Actif : **{st.session_state.target_file}**")
161
+
162
+ has_header = st.sidebar.radio("Ligne de titres en première ligne ?", ["Oui", "Non"], index=0) == "Oui"
163
+ read_kwargs = {'header': has_header}
164
+
165
+ if st.session_state.file_ext in ['.xlsx', '.xls']:
166
+ sheet_num = st.sidebar.number_input("Numéro de l'onglet (1 = premier)", min_value=1, value=1)
167
+ read_kwargs['sheet_idx'] = sheet_num - 1
168
+
169
  run_comparison = st.sidebar.button("Lancer la comparaison")
170
 
171
  if run_comparison:
 
184
  load_time, row_count = load_file_and_measure_time(st.session_state.target_file, lib_key, st.session_state.file_ext, read_kwargs)
185
  results.append({'Librairie': lib_name, 'Temps de chargement (s)': load_time, 'Nombre de lignes': row_count})
186
 
187
+ results_df = pd.DataFrame(results)
188
 
189
  valid_counts = results_df[results_df['Nombre de lignes'] > 0]['Nombre de lignes']
190
  if not valid_counts.empty:
191
  st.markdown(f"**Nombre de lignes détectées :** **{int(valid_counts.iloc[0]):,}**".replace(',', ' '))
192
 
193
  chart_data = results_df[results_df['Temps de chargement (s)'].apply(lambda x: isinstance(x, (int, float)))]
194
+
195
+ if not chart_data.empty:
196
+ chart_data = chart_data.sort_values(by='Temps de chargement (s)', ascending=True)
197
+ fig, ax = plt.subplots(figsize=(8, 2.5))
198
+ bars = ax.barh(chart_data['Librairie'], chart_data['Temps de chargement (s)'],
199
+ color=['#4CAF50', '#2196F3', '#FFC107', '#E91E63'])
200
+
201
+ max_time = chart_data['Temps de chargement (s)'].max()
202
+ ax.set_xlim(right=max_time * 1.35)
203
+ for bar in bars:
204
+ ax.text(bar.get_width() + (max_time * 0.03), bar.get_y() + bar.get_height()/2,
205
+ f'{bar.get_width():.4f}s', va='center', fontsize=10, fontweight='bold')
206
+
207
+ ax.set_xlabel('Temps (secondes)')
208
+ ax.set_title('Comparaison de vitesse')
209
+ st.pyplot(fig)
210
+ plt.close(fig)
211
+
212
+ else:
213
+ st.info("Veuillez charger un fichier ou utiliser un bouton de test à gauche.")