Upload 6 files
Browse files- databases.py +145 -1
- page_modules/analyze_transcriptions.py +3 -3
- page_modules/statistics.py +58 -21
databases.py
CHANGED
|
@@ -1,13 +1,18 @@
|
|
| 1 |
import os
|
| 2 |
import sqlite3
|
| 3 |
from contextlib import contextmanager
|
| 4 |
-
from typing import Optional, Dict, Any, List, Tuple
|
| 5 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Reutilizamos la misma l贸gica que antes, pero centralizada en este m贸dulo
|
| 8 |
|
| 9 |
DEFAULT_DB_PATH = None # set by set_db_path at runtime
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
def set_db_path(db_path: str):
|
| 13 |
global DEFAULT_DB_PATH
|
|
@@ -134,6 +139,145 @@ def get_feedback_ad_for_video(video_name: str):
|
|
| 134 |
return cur.fetchall()
|
| 135 |
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
def get_feedback_ad_stats():
|
| 138 |
# medias por v铆deo y ranking
|
| 139 |
with get_conn() as conn:
|
|
|
|
| 1 |
import os
|
| 2 |
import sqlite3
|
| 3 |
from contextlib import contextmanager
|
|
|
|
| 4 |
from datetime import datetime
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from statistics import median
|
| 7 |
+
from typing import Optional, Dict, Any, List, Tuple
|
| 8 |
|
| 9 |
# Reutilizamos la misma l贸gica que antes, pero centralizada en este m贸dulo
|
| 10 |
|
| 11 |
DEFAULT_DB_PATH = None # set by set_db_path at runtime
|
| 12 |
|
| 13 |
+
# Ruta a la base de dades de feedback agregat (separa de login.db)
|
| 14 |
+
FEEDBACK_DB_PATH = Path(__file__).resolve().parent / "data" / "feedback.db"
|
| 15 |
+
|
| 16 |
|
| 17 |
def set_db_path(db_path: str):
|
| 18 |
global DEFAULT_DB_PATH
|
|
|
|
| 139 |
return cur.fetchall()
|
| 140 |
|
| 141 |
|
| 142 |
+
def _connect_feedback_db() -> sqlite3.Connection:
|
| 143 |
+
"""Connexi贸 directa a demo/data/feedback.db.
|
| 144 |
+
|
| 145 |
+
脡s independent de DEFAULT_DB_PATH perqu猫 aquesta BD 茅s espec铆fica de feedback
|
| 146 |
+
agregat importat des de engine.
|
| 147 |
+
"""
|
| 148 |
+
|
| 149 |
+
FEEDBACK_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 150 |
+
conn = sqlite3.connect(str(FEEDBACK_DB_PATH))
|
| 151 |
+
conn.row_factory = sqlite3.Row
|
| 152 |
+
return conn
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def get_feedback_video_stats(agg: str = "mitjana") -> List[Dict[str, Any]]:
|
| 156 |
+
"""Retorna estad铆stiques agregades per v铆deo de demo/data/feedback.db.
|
| 157 |
+
|
| 158 |
+
agg pot ser:
|
| 159 |
+
- "mitjana": mitjana dels scores per v铆deo.
|
| 160 |
+
- "mediana": mediana dels scores per v铆deo.
|
| 161 |
+
- "inicial": primer registre (per timestamp) per v铆deo.
|
| 162 |
+
- "actual": darrer registre (per timestamp) per v铆deo.
|
| 163 |
+
"""
|
| 164 |
+
|
| 165 |
+
agg = (agg or "mitjana").lower()
|
| 166 |
+
with _connect_feedback_db() as conn:
|
| 167 |
+
cur = conn.execute(
|
| 168 |
+
"""
|
| 169 |
+
SELECT
|
| 170 |
+
video_name,
|
| 171 |
+
timestamp,
|
| 172 |
+
score_1,
|
| 173 |
+
score_2,
|
| 174 |
+
score_3,
|
| 175 |
+
score_4,
|
| 176 |
+
score_5,
|
| 177 |
+
score_6
|
| 178 |
+
FROM feedback
|
| 179 |
+
"""
|
| 180 |
+
)
|
| 181 |
+
rows = cur.fetchall()
|
| 182 |
+
|
| 183 |
+
by_video: Dict[str, List[sqlite3.Row]] = {}
|
| 184 |
+
for row in rows:
|
| 185 |
+
vn = row["video_name"]
|
| 186 |
+
by_video.setdefault(vn, []).append(row)
|
| 187 |
+
|
| 188 |
+
def parse_ts(ts: str) -> datetime:
|
| 189 |
+
# Format des d'init_feedback.py: "YYYY-MM-DD HH:MM:SS"
|
| 190 |
+
try:
|
| 191 |
+
return datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
|
| 192 |
+
except Exception:
|
| 193 |
+
return datetime.min
|
| 194 |
+
|
| 195 |
+
result: List[Dict[str, Any]] = []
|
| 196 |
+
for video_name, vrows in by_video.items():
|
| 197 |
+
if not vrows:
|
| 198 |
+
continue
|
| 199 |
+
|
| 200 |
+
# Ordenem per timestamp per als modes "inicial" i "actual"
|
| 201 |
+
vrows_sorted = sorted(vrows, key=lambda r: parse_ts(r["timestamp"]))
|
| 202 |
+
|
| 203 |
+
def agg_values(key: str) -> Optional[float]:
|
| 204 |
+
vals = [r[key] for r in vrows if r[key] is not None]
|
| 205 |
+
if not vals:
|
| 206 |
+
return None
|
| 207 |
+
if agg == "mitjana":
|
| 208 |
+
return float(sum(vals) / len(vals))
|
| 209 |
+
if agg == "mediana":
|
| 210 |
+
return float(median(vals))
|
| 211 |
+
if agg == "inicial":
|
| 212 |
+
return float(vrows_sorted[0][key]) if vrows_sorted[0][key] is not None else None
|
| 213 |
+
if agg == "actual":
|
| 214 |
+
return float(vrows_sorted[-1][key]) if vrows_sorted[-1][key] is not None else None
|
| 215 |
+
# fallback a mitjana si el mode no 茅s reconegut
|
| 216 |
+
return float(sum(vals) / len(vals))
|
| 217 |
+
|
| 218 |
+
row_out: Dict[str, Any] = {
|
| 219 |
+
"video_name": video_name,
|
| 220 |
+
"n": len(vrows),
|
| 221 |
+
}
|
| 222 |
+
for i in range(1, 7):
|
| 223 |
+
key = f"score_{i}"
|
| 224 |
+
row_out[key] = agg_values(key)
|
| 225 |
+
|
| 226 |
+
result.append(row_out)
|
| 227 |
+
|
| 228 |
+
# Ordenaci贸 per defecte alfab猫tica pel nom; l'ordre final es decidir脿 a la UI
|
| 229 |
+
result.sort(key=lambda r: r["video_name"])
|
| 230 |
+
return result
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def get_feedback_score_labels() -> List[str]:
|
| 234 |
+
"""Extreu els noms dels sis 铆tems a partir del camp `scores` d'un registre.
|
| 235 |
+
|
| 236 |
+
El camp `scores` cont茅 el contingut d'`eval.csv`. El format esperat 茅s:
|
| 237 |
+
- Primera l铆nia: cap莽alera.
|
| 238 |
+
- L铆nies seg眉ents: fins a 6 files amb "nom_item,valor,justificaci贸".
|
| 239 |
+
|
| 240 |
+
Retorna una llista de 6 etiquetes (strings). Si no es pot determinar, torna
|
| 241 |
+
['score_1', ..., 'score_6'].
|
| 242 |
+
"""
|
| 243 |
+
|
| 244 |
+
default_labels = [f"score_{i}" for i in range(1, 7)]
|
| 245 |
+
|
| 246 |
+
with _connect_feedback_db() as conn:
|
| 247 |
+
cur = conn.execute(
|
| 248 |
+
"SELECT scores FROM feedback WHERE scores IS NOT NULL AND scores != '' LIMIT 1"
|
| 249 |
+
)
|
| 250 |
+
row = cur.fetchone()
|
| 251 |
+
|
| 252 |
+
if row is None:
|
| 253 |
+
return default_labels
|
| 254 |
+
|
| 255 |
+
raw = row["scores"]
|
| 256 |
+
if not raw:
|
| 257 |
+
return default_labels
|
| 258 |
+
|
| 259 |
+
lines = [line.strip() for line in raw.splitlines() if line.strip()]
|
| 260 |
+
if len(lines) <= 1:
|
| 261 |
+
return default_labels
|
| 262 |
+
|
| 263 |
+
data_lines = lines[1:]
|
| 264 |
+
labels: List[str] = []
|
| 265 |
+
for line in data_lines:
|
| 266 |
+
if len(labels) >= 6:
|
| 267 |
+
break
|
| 268 |
+
parts = line.split(",", 2)
|
| 269 |
+
if not parts:
|
| 270 |
+
continue
|
| 271 |
+
label = parts[0].strip().strip('"')
|
| 272 |
+
labels.append(label or default_labels[len(labels)])
|
| 273 |
+
|
| 274 |
+
# Assegurar que sempre hi ha 6 etiquetes
|
| 275 |
+
while len(labels) < 6:
|
| 276 |
+
labels.append(default_labels[len(labels)])
|
| 277 |
+
|
| 278 |
+
return labels[:6]
|
| 279 |
+
|
| 280 |
+
|
| 281 |
def get_feedback_ad_stats():
|
| 282 |
# medias por v铆deo y ranking
|
| 283 |
with get_conn() as conn:
|
page_modules/analyze_transcriptions.py
CHANGED
|
@@ -332,7 +332,7 @@ def render_analyze_transcriptions_page(api, permissions: Dict[str, bool]) -> Non
|
|
| 332 |
else:
|
| 333 |
if st.button("Enviar valoraci贸", type="primary", use_container_width=True):
|
| 334 |
try:
|
| 335 |
-
from
|
| 336 |
|
| 337 |
# Guardar en la base de datos
|
| 338 |
add_feedback_ad(
|
|
@@ -347,8 +347,8 @@ def render_analyze_transcriptions_page(api, permissions: Dict[str, bool]) -> Non
|
|
| 347 |
comments=comments or None,
|
| 348 |
)
|
| 349 |
|
| 350 |
-
# Tambi茅n guardar en CSV
|
| 351 |
-
video_dir = Path("demo/videos") / seleccio
|
| 352 |
version = st.session_state.get("selected_version", "MoE")
|
| 353 |
csv_path = video_dir / version / "eval.csv"
|
| 354 |
|
|
|
|
| 332 |
else:
|
| 333 |
if st.button("Enviar valoraci贸", type="primary", use_container_width=True):
|
| 334 |
try:
|
| 335 |
+
from databases import add_feedback_ad
|
| 336 |
|
| 337 |
# Guardar en la base de datos
|
| 338 |
add_feedback_ad(
|
|
|
|
| 347 |
comments=comments or None,
|
| 348 |
)
|
| 349 |
|
| 350 |
+
# Tambi茅n guardar en CSV (reubicado en demo/data/videos)
|
| 351 |
+
video_dir = Path("demo/data/videos") / seleccio
|
| 352 |
version = st.session_state.get("selected_version", "MoE")
|
| 353 |
csv_path = video_dir / version / "eval.csv"
|
| 354 |
|
page_modules/statistics.py
CHANGED
|
@@ -5,41 +5,78 @@ from __future__ import annotations
|
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
| 7 |
|
| 8 |
-
from
|
| 9 |
|
| 10 |
|
| 11 |
def render_statistics_page() -> None:
|
| 12 |
st.header("Estad铆stiques")
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
if not stats:
|
| 16 |
-
st.caption("Encara no hi ha valoracions.")
|
| 17 |
st.stop()
|
| 18 |
|
| 19 |
-
df = pd.DataFrame(stats
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
)
|
| 25 |
-
if ordre.startswith("Asc"):
|
| 26 |
-
df = df.sort_values("avg_global", ascending=True)
|
| 27 |
-
else:
|
| 28 |
-
df = df.sort_values("avg_global", ascending=False)
|
| 29 |
|
| 30 |
-
st.subheader("
|
| 31 |
st.dataframe(
|
| 32 |
df[
|
| 33 |
[
|
| 34 |
"video_name",
|
| 35 |
"n",
|
| 36 |
-
"
|
| 37 |
-
"
|
| 38 |
-
"
|
| 39 |
-
"
|
| 40 |
-
"
|
| 41 |
-
"
|
| 42 |
-
"avg_expressivitat",
|
| 43 |
]
|
| 44 |
],
|
| 45 |
use_container_width=True,
|
|
|
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
| 7 |
|
| 8 |
+
from databases import get_feedback_video_stats
|
| 9 |
|
| 10 |
|
| 11 |
def render_statistics_page() -> None:
|
| 12 |
st.header("Estad铆stiques")
|
| 13 |
|
| 14 |
+
st.markdown(
|
| 15 |
+
"""
|
| 16 |
+
Aquest panell mostra **estad铆stiques agregades per v铆deo** a partir de la taula
|
| 17 |
+
`feedback` de `demo/data/feedback.db`. Per a cada v铆deo es calcula, segons el
|
| 18 |
+
mode triat, una puntuaci贸 per a cadascun dels `score_1` ... `score_6`.
|
| 19 |
+
"""
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
col1, col2 = st.columns(2)
|
| 23 |
+
|
| 24 |
+
with col1:
|
| 25 |
+
mode_label = st.selectbox(
|
| 26 |
+
"Mode d'agregaci贸",
|
| 27 |
+
["mitjana", "mediana", "inicial", "actual"],
|
| 28 |
+
help=(
|
| 29 |
+
"mitjana: mitjana de totes les valoracions; "
|
| 30 |
+
"mediana: valor central; "
|
| 31 |
+
"inicial: primer registre en el temps; "
|
| 32 |
+
"actual: darrer registre en el temps."
|
| 33 |
+
),
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
order_options = {
|
| 37 |
+
"nom": "video_name",
|
| 38 |
+
"score_1": "score_1",
|
| 39 |
+
"score_2": "score_2",
|
| 40 |
+
"score_3": "score_3",
|
| 41 |
+
"score_4": "score_4",
|
| 42 |
+
"score_5": "score_5",
|
| 43 |
+
"score_6": "score_6",
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
with col2:
|
| 47 |
+
order_label = st.selectbox(
|
| 48 |
+
"Ordenar per",
|
| 49 |
+
list(order_options.keys()),
|
| 50 |
+
help=(
|
| 51 |
+
"Indica el camp pel qual s'ordenen els v铆deos a la taula: "
|
| 52 |
+
"nom del v铆deo o algun dels score_1 .. score_6."
|
| 53 |
+
),
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
stats = get_feedback_video_stats(agg=mode_label)
|
| 57 |
if not stats:
|
| 58 |
+
st.caption("Encara no hi ha valoracions a demo/data/feedback.db.")
|
| 59 |
st.stop()
|
| 60 |
|
| 61 |
+
df = pd.DataFrame(stats)
|
| 62 |
+
|
| 63 |
+
# Ordenaci贸 segons el selector
|
| 64 |
+
order_key = order_options[order_label]
|
| 65 |
+
ascending = order_key in {"video_name", "nom"}
|
| 66 |
+
df = df.sort_values(order_key, ascending=ascending, na_position="last")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
+
st.subheader("Taula agregada per v铆deo")
|
| 69 |
st.dataframe(
|
| 70 |
df[
|
| 71 |
[
|
| 72 |
"video_name",
|
| 73 |
"n",
|
| 74 |
+
"score_1",
|
| 75 |
+
"score_2",
|
| 76 |
+
"score_3",
|
| 77 |
+
"score_4",
|
| 78 |
+
"score_5",
|
| 79 |
+
"score_6",
|
|
|
|
| 80 |
]
|
| 81 |
],
|
| 82 |
use_container_width=True,
|