Spaces:
Sleeping
Sleeping
Upload 3 files
Browse filesImport des fichiers
- Dockerfile +34 -20
- app.py +143 -0
- requirements.txt +5 -3
Dockerfile
CHANGED
|
@@ -1,20 +1,34 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
RUN apt-get update && apt-get install -y \
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# syntax = docker/dockerfile:1.4
|
| 2 |
+
FROM python:3.12-slim
|
| 3 |
+
|
| 4 |
+
# Installe les dépendances système nécessaires
|
| 5 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 6 |
+
gcc \
|
| 7 |
+
g++ \
|
| 8 |
+
libjpeg62-turbo-dev \
|
| 9 |
+
zlib1g-dev \
|
| 10 |
+
libpng-dev \
|
| 11 |
+
libfreetype6-dev \
|
| 12 |
+
libopenjp2-7-dev \
|
| 13 |
+
libtiff5-dev \
|
| 14 |
+
curl \
|
| 15 |
+
git \
|
| 16 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 17 |
+
|
| 18 |
+
WORKDIR /app
|
| 19 |
+
|
| 20 |
+
# Copie requirements en premier (meilleur cache)
|
| 21 |
+
COPY requirements.txt .
|
| 22 |
+
|
| 23 |
+
# Installe tout (sans cache pour réduire la taille finale)
|
| 24 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 25 |
+
|
| 26 |
+
# Copie le code
|
| 27 |
+
COPY app.py .
|
| 28 |
+
|
| 29 |
+
# Port + commande obligatoire pour HF Spaces
|
| 30 |
+
EXPOSE 8501
|
| 31 |
+
|
| 32 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health || exit 1
|
| 33 |
+
|
| 34 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableCORS=false", "--server.enableXsrfProtection=false"]
|
app.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import plotly.graph_objects as go
|
| 4 |
+
from datasketch import HyperLogLog
|
| 5 |
+
import random
|
| 6 |
+
import time
|
| 7 |
+
import psutil
|
| 8 |
+
import os
|
| 9 |
+
import gc
|
| 10 |
+
|
| 11 |
+
# 1. Config et CSS pour supprimer le scroll et ajuster les espacements
|
| 12 |
+
st.set_page_config(page_title="HLL POC", layout="wide")
|
| 13 |
+
|
| 14 |
+
st.markdown("""
|
| 15 |
+
<style>
|
| 16 |
+
.block-container {padding-top: 2.5rem; padding-bottom: 0rem;}
|
| 17 |
+
div[data-testid="stMetricValue"] {font-size: 1.8rem;}
|
| 18 |
+
.stTable {font-size: 0.8rem; margin-bottom: 0rem;}
|
| 19 |
+
/* Alignement à droite pour toutes les cellules sauf la première colonne */
|
| 20 |
+
.stTable td:not(:first-child), .stTable th:not(:first-child) {
|
| 21 |
+
text-align: right !important;
|
| 22 |
+
}
|
| 23 |
+
h3 {margin-top: -1rem; margin-bottom: 1rem;}
|
| 24 |
+
/* Supprimer l'espace sous le texte 'Détails des résultats' */
|
| 25 |
+
.compact-text {margin-bottom: -15px; font-weight: bold;}
|
| 26 |
+
</style>
|
| 27 |
+
""", unsafe_allow_html=True)
|
| 28 |
+
|
| 29 |
+
def get_memory():
|
| 30 |
+
gc.collect()
|
| 31 |
+
return psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024
|
| 32 |
+
|
| 33 |
+
def run_simulation(n_visits, conversion_rate, p, unique_ratio):
|
| 34 |
+
# On calcule la plage d'ID en fonction du ratio d'unicité souhaité
|
| 35 |
+
# Si ratio=100%, pool = n_visits. Si ratio=10%, pool = n_visits * 0.1
|
| 36 |
+
pool_size = max(1, int(n_visits * unique_ratio))
|
| 37 |
+
|
| 38 |
+
# EXACT
|
| 39 |
+
mem_start_e = get_memory()
|
| 40 |
+
t_start_e = time.time()
|
| 41 |
+
v_set, b_set = set(), set()
|
| 42 |
+
for _ in range(n_visits):
|
| 43 |
+
v_id = random.randint(1, pool_size)
|
| 44 |
+
v_set.add(v_id)
|
| 45 |
+
if random.random() < conversion_rate: b_set.add(v_id)
|
| 46 |
+
res_e = [len(v_set), len(b_set), time.time() - t_start_e, max(get_memory() - mem_start_e, 0.1)]
|
| 47 |
+
del v_set, b_set
|
| 48 |
+
|
| 49 |
+
# HLL
|
| 50 |
+
mem_start_h = get_memory()
|
| 51 |
+
t_start_h = time.time()
|
| 52 |
+
h_v, h_b = HyperLogLog(p=p), HyperLogLog(p=p)
|
| 53 |
+
for _ in range(n_visits):
|
| 54 |
+
v_id = str(random.randint(1, pool_size)).encode('utf8')
|
| 55 |
+
h_v.update(v_id)
|
| 56 |
+
if random.random() < conversion_rate: h_b.update(v_id)
|
| 57 |
+
res_h = [h_v.count(), h_b.count(), time.time() - t_start_h, max(get_memory() - mem_start_h, 0.01)]
|
| 58 |
+
return {"e": res_e, "h": res_h}
|
| 59 |
+
|
| 60 |
+
# --- Sidebar ---
|
| 61 |
+
with st.sidebar:
|
| 62 |
+
st.header("⚙️ Paramètres")
|
| 63 |
+
n_visits = st.select_slider("Nombre de visites", options=[100, 1000, 10000, 100000, 1000000, 5_000_000, 10_000_000], value=100000)
|
| 64 |
+
|
| 65 |
+
# Nouveau Slider pour les doublons
|
| 66 |
+
unique_ratio = st.slider("Part de visiteurs uniques (%)", 1, 100, 90) / 100
|
| 67 |
+
|
| 68 |
+
conv_rate = st.slider("Taux de conversion (%)", 0.1, 10.0, 5.0) / 100
|
| 69 |
+
p = st.slider("Précision HyperLogLog (p)", 4, 16, 14)
|
| 70 |
+
st.caption(f"Registres : {2**p:,}")
|
| 71 |
+
run = st.button("Lancer la simulation", type="primary", use_container_width=True)
|
| 72 |
+
|
| 73 |
+
# --- Page Principale ---
|
| 74 |
+
st.subheader("🚀 Benchmark : Comptages exacts vs HyperLogLog")
|
| 75 |
+
|
| 76 |
+
if run:
|
| 77 |
+
with st.spinner("Calculs..."):
|
| 78 |
+
data = run_simulation(n_visits, conv_rate, p, unique_ratio)
|
| 79 |
+
|
| 80 |
+
e, h = data["e"], data["h"]
|
| 81 |
+
rate_e, rate_h = e[1]/e[0], h[1]/h[0]
|
| 82 |
+
|
| 83 |
+
# KPIs
|
| 84 |
+
c1, c2, c3 = st.columns(3)
|
| 85 |
+
c1.metric("Gain RAM", f"{e[3]/h[3]:.0f}x")
|
| 86 |
+
c2.metric("Temps", f"{h[2]/e[2]:.1f}x", delta="Plus lent", delta_color="inverse")
|
| 87 |
+
c3.metric("Erreur Taux", f"{(abs(rate_e-rate_h)/rate_e*100):.2f}%")
|
| 88 |
+
|
| 89 |
+
st.markdown('<p class="compact-text">Détails des résultats</p>', unsafe_allow_html=True)
|
| 90 |
+
|
| 91 |
+
col_left, col_right = st.columns([1, 1], vertical_alignment="bottom")
|
| 92 |
+
|
| 93 |
+
with col_left:
|
| 94 |
+
df_res = pd.DataFrame({
|
| 95 |
+
"Métrique": ["Visiteurs", "Acheteurs", "Taux %"],
|
| 96 |
+
"Exact": [f"{e[0]:,}", f"{e[1]:,}", f"{rate_e:.3%}"],
|
| 97 |
+
"HLL": [f"{h[0]:,.0f}", f"{h[1]:,.0f}", f"{rate_h:.2%}"],
|
| 98 |
+
"Erreur %": [
|
| 99 |
+
f"{(abs(e[0]-h[0])/e[0]*100):.2f}%",
|
| 100 |
+
f"{(abs(e[1]-h[1])/e[1]*100):.2f}%",
|
| 101 |
+
f"{(abs(rate_e-rate_h)/rate_e*100):.2f}%"
|
| 102 |
+
]
|
| 103 |
+
})
|
| 104 |
+
st.table(df_res)
|
| 105 |
+
|
| 106 |
+
with col_right:
|
| 107 |
+
fig = go.Figure()
|
| 108 |
+
|
| 109 |
+
# Trace Sets (Bleu)
|
| 110 |
+
fig.add_trace(go.Bar(
|
| 111 |
+
name='Sets',
|
| 112 |
+
x=['RAM (MB)', 'Temps (s)'],
|
| 113 |
+
y=[e[3], e[2]],
|
| 114 |
+
text=[f"{e[3]:.1f}MB", f"{e[2]:.2f}s"],
|
| 115 |
+
textposition='auto',
|
| 116 |
+
marker_color='#1f77b4'
|
| 117 |
+
))
|
| 118 |
+
|
| 119 |
+
# Trace HLL (Vert)
|
| 120 |
+
fig.add_trace(go.Bar(
|
| 121 |
+
name='HLL',
|
| 122 |
+
x=['RAM (MB)', 'Temps (s)'],
|
| 123 |
+
y=[h[3], h[2]],
|
| 124 |
+
text=[f"{h[3]:.2f}MB", f"{h[2]:.2f}s"],
|
| 125 |
+
textposition='auto',
|
| 126 |
+
marker_color='#7fcdbb'
|
| 127 |
+
))
|
| 128 |
+
|
| 129 |
+
fig.update_layout(
|
| 130 |
+
height=250,
|
| 131 |
+
margin=dict(l=0, r=0, t=30, b=0),
|
| 132 |
+
barmode='group',
|
| 133 |
+
legend=dict(orientation="h", yanchor="bottom", y=1.1, xanchor="right", x=1),
|
| 134 |
+
xaxis=dict(
|
| 135 |
+
side='top',
|
| 136 |
+
fixedrange=True
|
| 137 |
+
),
|
| 138 |
+
yaxis=dict(fixedrange=True, visible=False)
|
| 139 |
+
)
|
| 140 |
+
st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
|
| 141 |
+
|
| 142 |
+
else:
|
| 143 |
+
st.info("👈 Réglez les paramètres et lancez la simulation.")
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
-
|
| 2 |
-
pandas
|
| 3 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
plotly
|
| 4 |
+
datasketch
|
| 5 |
+
psutil
|