Spaces:
Sleeping
Sleeping
Upload 21 files
Browse files- app/__init__.py +0 -0
- app/__pycache__/__init__.cpython-311.pyc +0 -0
- app/__pycache__/analyzer.cpython-311.pyc +0 -0
- app/__pycache__/charts.cpython-311.pyc +0 -0
- app/__pycache__/client_manager.cpython-311.pyc +0 -0
- app/__pycache__/fetcher.cpython-311.pyc +0 -0
- app/__pycache__/reporting.cpython-311.pyc +0 -0
- app/analyzer.py +248 -0
- app/charts.py +334 -0
- app/client_manager.py +50 -0
- app/fetcher.py +139 -0
- app/reporting.py +63 -0
- app/ui/__init__.py +0 -0
- app/ui/__pycache__/__init__.cpython-311.pyc +0 -0
- app/ui/__pycache__/components.cpython-311.pyc +0 -0
- app/ui/__pycache__/main_app.cpython-311.pyc +0 -0
- app/ui/__pycache__/panel.cpython-311.pyc +0 -0
- app/ui/components.py +42 -0
- app/ui/main_app.py +294 -0
- app/ui/panel.py +83 -0
- app/utils.py +0 -0
app/__init__.py
ADDED
|
File without changes
|
app/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (153 Bytes). View file
|
|
|
app/__pycache__/analyzer.cpython-311.pyc
ADDED
|
Binary file (11.9 kB). View file
|
|
|
app/__pycache__/charts.cpython-311.pyc
ADDED
|
Binary file (18.8 kB). View file
|
|
|
app/__pycache__/client_manager.cpython-311.pyc
ADDED
|
Binary file (2.66 kB). View file
|
|
|
app/__pycache__/fetcher.cpython-311.pyc
ADDED
|
Binary file (6.17 kB). View file
|
|
|
app/__pycache__/reporting.cpython-311.pyc
ADDED
|
Binary file (3.66 kB). View file
|
|
|
app/analyzer.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/analyzer.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import re
|
| 5 |
+
from typing import List, Tuple
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn.functional as F
|
| 10 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 11 |
+
import streamlit as st
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# ======================================================================
|
| 15 |
+
# Config
|
| 16 |
+
# ======================================================================
|
| 17 |
+
# Modelo con pesos en safetensors (evita vulnerabilidad de torch.load en .bin)
|
| 18 |
+
MODEL_ID = "nlptown/bert-base-multilingual-uncased-sentiment"
|
| 19 |
+
# Si prefieres otro: "cardiffnlp/twitter-xlm-roberta-base-sentiment"
|
| 20 |
+
|
| 21 |
+
# Tamaños de batch recomendados (ajustables)
|
| 22 |
+
BATCH_SIZE_CPU = 32
|
| 23 |
+
BATCH_SIZE_GPU = 64
|
| 24 |
+
|
| 25 |
+
# Mapeo a etiquetas en español (compatible con tu UI)
|
| 26 |
+
LABEL_MAP_ES = {
|
| 27 |
+
"1 star": "muy negativo",
|
| 28 |
+
"2 stars": "negativo",
|
| 29 |
+
"3 stars": "neutral",
|
| 30 |
+
"4 stars": "positivo",
|
| 31 |
+
"5 stars": "muy positivo",
|
| 32 |
+
# variantes defensivas por si el modelo devuelve singular/plural distinto
|
| 33 |
+
"1 stars": "muy negativo",
|
| 34 |
+
"2 star": "negativo",
|
| 35 |
+
"3 star": "neutral",
|
| 36 |
+
"4 star": "positivo",
|
| 37 |
+
"5 star": "muy positivo",
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
NEG_SET = {"muy negativo", "negativo"}
|
| 41 |
+
POS_SET = {"positivo", "muy positivo"}
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# ======================================================================
|
| 45 |
+
# Carga del modelo/tokenizer (cacheada) + device
|
| 46 |
+
# ======================================================================
|
| 47 |
+
@st.cache_resource(show_spinner=False)
|
| 48 |
+
def load_sentiment_components():
|
| 49 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 50 |
+
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
|
| 51 |
+
model.eval()
|
| 52 |
+
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
| 53 |
+
model.to(device)
|
| 54 |
+
return tokenizer, model, device
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# ======================================================================
|
| 58 |
+
# Limpieza básica de texto (rápida)
|
| 59 |
+
# ======================================================================
|
| 60 |
+
_url = re.compile(r"https?://\S+")
|
| 61 |
+
_mention = re.compile(r"@\w+")
|
| 62 |
+
_ws = re.compile(r"\s+")
|
| 63 |
+
|
| 64 |
+
def clean_text_basic(t: str) -> str:
|
| 65 |
+
"""
|
| 66 |
+
Limpieza ligera: elimina URLs/mentions, normaliza espacios.
|
| 67 |
+
Conserva hashtags y signos (útiles en redes).
|
| 68 |
+
"""
|
| 69 |
+
if not t:
|
| 70 |
+
return ""
|
| 71 |
+
t = _url.sub(" ", t)
|
| 72 |
+
t = _mention.sub(" ", t)
|
| 73 |
+
t = _ws.sub(" ", t).strip()
|
| 74 |
+
return t
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# ======================================================================
|
| 78 |
+
# Heurística de sarcasmo/ironía (rápida y transparente)
|
| 79 |
+
# ======================================================================
|
| 80 |
+
SARC_HASHTAGS = {
|
| 81 |
+
"#sarcasmo", "#sarcasm", "#ironia", "#irony", "#sarc", "#irónica", "#irónico"
|
| 82 |
+
}
|
| 83 |
+
SARC_MARKERS = {
|
| 84 |
+
"/s", # convención Reddit/foros
|
| 85 |
+
"sí claro", "claro que sí", "yeah right", "ajá",
|
| 86 |
+
"gracias por nada", "qué podría salir mal", "buenísimo...", "genial...", "perfecto...",
|
| 87 |
+
}
|
| 88 |
+
SARC_EMOJIS = {"🙃", "😒", "🙄"}
|
| 89 |
+
|
| 90 |
+
def sarcasm_score(t: str) -> int:
|
| 91 |
+
"""Devuelve 0/1/2 según señales de sarcasmo encontradas."""
|
| 92 |
+
if not t:
|
| 93 |
+
return 0
|
| 94 |
+
tl = t.lower()
|
| 95 |
+
score = 0
|
| 96 |
+
# hashtags
|
| 97 |
+
for tag in SARC_HASHTAGS:
|
| 98 |
+
if tag in tl:
|
| 99 |
+
score += 2
|
| 100 |
+
# marcadores
|
| 101 |
+
for m in SARC_MARKERS:
|
| 102 |
+
if m in tl:
|
| 103 |
+
score += 1
|
| 104 |
+
# emojis
|
| 105 |
+
if any(e in t for e in SARC_EMOJIS):
|
| 106 |
+
score += 1
|
| 107 |
+
# exceso de comillas + adjetivo positivo (muy simplificado)
|
| 108 |
+
if ('"' in t or "“" in t or "”" in t) and any(p in tl for p in ("genial", "perfecto", "maravilloso")):
|
| 109 |
+
score += 1
|
| 110 |
+
return min(score, 3)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def adjust_with_sarcasm(label_es: str, score: int) -> str:
|
| 114 |
+
"""Ajusta la etiqueta en español ante señales de sarcasmo."""
|
| 115 |
+
if score <= 0:
|
| 116 |
+
return label_es
|
| 117 |
+
# heurística conservadora:
|
| 118 |
+
# - si el modelo dice positivo pero hay sarcasmo, degradar a neutral/negativo
|
| 119 |
+
if label_es in POS_SET:
|
| 120 |
+
return "negativo" if score >= 2 else "neutral"
|
| 121 |
+
# - si el modelo dice neutral y hay señales fuertes, degradar a negativo
|
| 122 |
+
if label_es == "neutral" and score >= 1:
|
| 123 |
+
return "negativo" if score >= 2 else "neutral"
|
| 124 |
+
# - si ya es negativo y el sarcasmo es muy alto, enfatizar "muy negativo"
|
| 125 |
+
if label_es in NEG_SET and score >= 3:
|
| 126 |
+
return "muy negativo"
|
| 127 |
+
return label_es
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
# ======================================================================
|
| 131 |
+
# Inferencia PyTorch pura (sin numpy)
|
| 132 |
+
# ======================================================================
|
| 133 |
+
def _predict_batch(texts: List[str], max_length: int = 256) -> Tuple[List[str], List[Tuple[float, float, float]], List[int]]:
|
| 134 |
+
"""
|
| 135 |
+
Devuelve:
|
| 136 |
+
- labels_raw: etiquetas originales del modelo ("1 star"...)
|
| 137 |
+
- probs_agg: lista de (p_neg, p_neu, p_pos) agregadas
|
| 138 |
+
- sarc_scores: sarcasm score por texto (para trazabilidad)
|
| 139 |
+
"""
|
| 140 |
+
tokenizer, model, device = load_sentiment_components()
|
| 141 |
+
|
| 142 |
+
if torch.cuda.is_available():
|
| 143 |
+
bs = BATCH_SIZE_GPU
|
| 144 |
+
else:
|
| 145 |
+
bs = BATCH_SIZE_CPU
|
| 146 |
+
|
| 147 |
+
labels_raw: List[str] = []
|
| 148 |
+
probs_agg: List[Tuple[float, float, float]] = []
|
| 149 |
+
sarc_scores: List[int] = []
|
| 150 |
+
|
| 151 |
+
with torch.inference_mode():
|
| 152 |
+
for i in range(0, len(texts), bs):
|
| 153 |
+
chunk = texts[i:i+bs]
|
| 154 |
+
enc = tokenizer(
|
| 155 |
+
chunk,
|
| 156 |
+
padding=True,
|
| 157 |
+
truncation=True,
|
| 158 |
+
max_length=max_length,
|
| 159 |
+
return_tensors="pt",
|
| 160 |
+
)
|
| 161 |
+
enc = {k: v.to(device) for k, v in enc.items()}
|
| 162 |
+
out = model(**enc) # logits shape: [B, 5] en este modelo
|
| 163 |
+
probs = F.softmax(out.logits, dim=-1) # [B,5]
|
| 164 |
+
|
| 165 |
+
# agregamos (neg=1+2, neu=3, pos=4+5)
|
| 166 |
+
# índices según orden del modelo nlptown: 0..4 = '1 star'..'5 stars'
|
| 167 |
+
p_neg = probs[:, 0] + probs[:, 1]
|
| 168 |
+
p_neu = probs[:, 2]
|
| 169 |
+
p_pos = probs[:, 3] + probs[:, 4]
|
| 170 |
+
|
| 171 |
+
top_idx = torch.argmax(probs, dim=-1).tolist()
|
| 172 |
+
labels_raw.extend([model.config.id2label[int(j)] for j in top_idx])
|
| 173 |
+
|
| 174 |
+
probs_agg.extend([(float(a), float(b), float(c)) for a, b, c in zip(p_neg, p_neu, p_pos)])
|
| 175 |
+
|
| 176 |
+
# sarcasmo para cada texto del batch
|
| 177 |
+
sarc_scores.extend([sarcasm_score(t) for t in chunk])
|
| 178 |
+
|
| 179 |
+
return labels_raw, probs_agg, sarc_scores
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
# ======================================================================
|
| 183 |
+
# API principal para la app
|
| 184 |
+
# ======================================================================
|
| 185 |
+
@st.cache_data(show_spinner=False)
|
| 186 |
+
def clean_and_analyze(
|
| 187 |
+
df: pd.DataFrame,
|
| 188 |
+
min_chars: int = 20,
|
| 189 |
+
dedup_cols: List[str] | None = None,
|
| 190 |
+
use_clean_text: bool = True,
|
| 191 |
+
) -> pd.DataFrame:
|
| 192 |
+
"""
|
| 193 |
+
Limpia, deduplica y enriquece el DataFrame, y añade columnas de sentimiento.
|
| 194 |
+
- min_chars=0 y dedup_cols=['uri'] → modo “volumen máximo”.
|
| 195 |
+
- use_clean_text=True → analiza sobre texto limpio (mejor estabilidad del modelo).
|
| 196 |
+
"""
|
| 197 |
+
if df is None or df.empty:
|
| 198 |
+
return df
|
| 199 |
+
|
| 200 |
+
d = df.copy()
|
| 201 |
+
|
| 202 |
+
# --- Deduplicado ---
|
| 203 |
+
if dedup_cols:
|
| 204 |
+
d = d.drop_duplicates(subset=dedup_cols)
|
| 205 |
+
|
| 206 |
+
# --- Texto básico + filtros ---
|
| 207 |
+
d["texto"] = d["texto"].fillna("")
|
| 208 |
+
if min_chars and min_chars > 0:
|
| 209 |
+
d = d[d["texto"].str.len() >= min_chars]
|
| 210 |
+
if d.empty:
|
| 211 |
+
return d
|
| 212 |
+
|
| 213 |
+
# Guardamos el original y creamos una versión limpia para el modelo
|
| 214 |
+
d["texto_raw"] = d["texto"]
|
| 215 |
+
d["texto_clean"] = d["texto_raw"].map(clean_text_basic) if use_clean_text else d["texto_raw"]
|
| 216 |
+
|
| 217 |
+
# --- Enriquecimiento rápido ---
|
| 218 |
+
d["n_palabras"] = d["texto_raw"].str.split().str.len()
|
| 219 |
+
d["has_url"] = d["texto_raw"].str.contains(r"https?://", na=False)
|
| 220 |
+
d["hashtags"] = d["texto_raw"].str.findall(r"#\w+")
|
| 221 |
+
d["mentions"] = d["texto_raw"].str.findall(r"@\w+")
|
| 222 |
+
|
| 223 |
+
# --- Inferencia ---
|
| 224 |
+
texts_for_model = d["texto_clean"].astype(str).tolist()
|
| 225 |
+
labels_raw, probs_agg, sarc_scores = _predict_batch(texts_for_model, max_length=256)
|
| 226 |
+
d["sentiment"] = labels_raw
|
| 227 |
+
d["p_neg"], d["p_neu"], d["p_pos"] = zip(*probs_agg)
|
| 228 |
+
d["sarcasm_score"] = sarc_scores
|
| 229 |
+
|
| 230 |
+
# --- Etiquetas en español + ajuste por sarcasmo ---
|
| 231 |
+
d["sent_desc"] = d["sentiment"].map(LABEL_MAP_ES).fillna("neutral")
|
| 232 |
+
d["sent_desc_adj"] = [adjust_with_sarcasm(lbl, sc) for lbl, sc in zip(d["sent_desc"], d["sarcasm_score"])]
|
| 233 |
+
|
| 234 |
+
# Por compatibilidad con el resto de la app, exponemos 'sent_desc' como final:
|
| 235 |
+
d["sent_desc"] = d["sent_desc_adj"]
|
| 236 |
+
d = d.drop(columns=["sent_desc_adj"], errors="ignore")
|
| 237 |
+
|
| 238 |
+
# Orden sugerido de columnas
|
| 239 |
+
cols_order = [
|
| 240 |
+
"uri", "autor", "fecha", "texto_raw", "texto_clean",
|
| 241 |
+
"sentiment", "sent_desc", "p_neg", "p_neu", "p_pos", "sarcasm_score",
|
| 242 |
+
"n_palabras", "has_url", "hashtags", "mentions",
|
| 243 |
+
]
|
| 244 |
+
# Mantén también las columnas originales no listadas
|
| 245 |
+
cols_final = [c for c in cols_order if c in d.columns] + [c for c in d.columns if c not in cols_order]
|
| 246 |
+
d = d[cols_final]
|
| 247 |
+
|
| 248 |
+
return d
|
app/charts.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import altair as alt
|
| 3 |
+
from io import BytesIO
|
| 4 |
+
from typing import Optional
|
| 5 |
+
import re
|
| 6 |
+
from collections import Counter
|
| 7 |
+
from io import BytesIO
|
| 8 |
+
from wordcloud import WordCloud
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# =========================
|
| 12 |
+
# Helpers
|
| 13 |
+
# =========================
|
| 14 |
+
def preprocess_dates(df: pd.DataFrame) -> pd.DataFrame:
|
| 15 |
+
"""
|
| 16 |
+
Normaliza 'fecha':
|
| 17 |
+
- convierte a datetime (UTC) y lo deja naive
|
| 18 |
+
- crea 'date_day' (datetime64[ns] a medianoche)
|
| 19 |
+
- crea 'date_iso' (str, opcional)
|
| 20 |
+
"""
|
| 21 |
+
d = df.copy()
|
| 22 |
+
s = pd.to_datetime(d["fecha"], utc=True, errors="coerce")
|
| 23 |
+
s = s.dt.tz_convert("UTC").dt.tz_localize(None)
|
| 24 |
+
d["fecha"] = s
|
| 25 |
+
d = d.dropna(subset=["fecha"])
|
| 26 |
+
d["date_day"] = d["fecha"].dt.normalize()
|
| 27 |
+
d["date_iso"] = d["date_day"].dt.strftime("%Y-%m-%d")
|
| 28 |
+
return d
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def export_chart_png(chart: alt.Chart, scale: int = 2) -> Optional[BytesIO]:
|
| 32 |
+
"""
|
| 33 |
+
Exporta un Altair Chart a PNG en memoria.
|
| 34 |
+
Requiere `pip install vl-convert-python`.
|
| 35 |
+
"""
|
| 36 |
+
try:
|
| 37 |
+
buf = BytesIO()
|
| 38 |
+
chart.save(buf, format="png", scale=scale)
|
| 39 |
+
buf.seek(0)
|
| 40 |
+
return buf
|
| 41 |
+
except Exception:
|
| 42 |
+
return None
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# =========================
|
| 46 |
+
# Gráficos
|
| 47 |
+
# =========================
|
| 48 |
+
def chart_posts_diario_ma(dfin: pd.DataFrame, window: int = 7) -> alt.Chart:
|
| 49 |
+
tmp = dfin[["date_day"]].copy()
|
| 50 |
+
serie = tmp.groupby("date_day").size().reset_index(name="posts")
|
| 51 |
+
serie["MA"] = serie["posts"].rolling(window, min_periods=1).mean()
|
| 52 |
+
|
| 53 |
+
c_posts = (
|
| 54 |
+
alt.Chart(serie)
|
| 55 |
+
.mark_line(point=False)
|
| 56 |
+
.encode(
|
| 57 |
+
x=alt.X("date_day:T", title="Fecha"),
|
| 58 |
+
y=alt.Y("posts:Q", title="Posts"),
|
| 59 |
+
tooltip=[alt.Tooltip("date_day:T", title="Fecha"), "posts:Q"],
|
| 60 |
+
)
|
| 61 |
+
)
|
| 62 |
+
c_ma = (
|
| 63 |
+
alt.Chart(serie)
|
| 64 |
+
.mark_line(strokeDash=[4, 3])
|
| 65 |
+
.encode(
|
| 66 |
+
x="date_day:T",
|
| 67 |
+
y=alt.Y("MA:Q", title="Media móvil"),
|
| 68 |
+
tooltip=[alt.Tooltip("date_day:T", title="Fecha"), "MA:Q"],
|
| 69 |
+
)
|
| 70 |
+
)
|
| 71 |
+
return (c_posts + c_ma).properties(height=260)
|
| 72 |
+
|
| 73 |
+
def chart_sentimiento_barras(dfin: pd.DataFrame) -> alt.Chart:
|
| 74 |
+
"""
|
| 75 |
+
Barras por categoría de sentimiento.
|
| 76 |
+
Soporta 'Neutral' y 'Neutral o mixto' según cómo venga en dfin['sent_desc'].
|
| 77 |
+
"""
|
| 78 |
+
if "sent_desc" not in dfin.columns:
|
| 79 |
+
# Nada que mostrar
|
| 80 |
+
return alt.Chart(pd.DataFrame({"sentimiento": [], "posts": []})).mark_bar()
|
| 81 |
+
|
| 82 |
+
# Orden preferido (incluimos ambas variantes de 'neutral')
|
| 83 |
+
order_pref = ["Muy negativo", "Negativo", "Neutral o mixto", "Neutral", "Positivo", "Muy positivo"]
|
| 84 |
+
|
| 85 |
+
# Conteo y orden estable
|
| 86 |
+
vc = (
|
| 87 |
+
dfin["sent_desc"]
|
| 88 |
+
.fillna("Desconocido")
|
| 89 |
+
.value_counts()
|
| 90 |
+
.rename_axis("sentimiento")
|
| 91 |
+
.reset_index(name="posts")
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Filtramos al orden preferido que realmente exista (y mantenemos ese orden)
|
| 95 |
+
present = [c for c in order_pref if c in vc["sentimiento"].values]
|
| 96 |
+
if not present:
|
| 97 |
+
# Si no hay ninguna de las conocidas, mostramos lo que haya
|
| 98 |
+
present = list(vc["sentimiento"].values)
|
| 99 |
+
|
| 100 |
+
vc["sentimiento"] = pd.Categorical(vc["sentimiento"], categories=present, ordered=True)
|
| 101 |
+
vc = vc.sort_values("sentimiento")
|
| 102 |
+
|
| 103 |
+
# Gráfico
|
| 104 |
+
return (
|
| 105 |
+
alt.Chart(vc)
|
| 106 |
+
.mark_bar()
|
| 107 |
+
.encode(
|
| 108 |
+
x=alt.X("sentimiento:N", sort=present, title="Sentimiento"),
|
| 109 |
+
y=alt.Y("posts:Q", title="Posts"),
|
| 110 |
+
tooltip=["sentimiento:N", "posts:Q"],
|
| 111 |
+
)
|
| 112 |
+
.properties(height=260)
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def chart_sentimiento_apilado(dfin: pd.DataFrame) -> alt.Chart:
|
| 117 |
+
tmp = dfin[["date_day", "sent_desc"]].copy()
|
| 118 |
+
g = tmp.groupby(["date_day", "sent_desc"]).size().reset_index(name="posts")
|
| 119 |
+
|
| 120 |
+
return (
|
| 121 |
+
alt.Chart(g)
|
| 122 |
+
.mark_area()
|
| 123 |
+
.encode(
|
| 124 |
+
x=alt.X("date_day:T", title="Fecha"),
|
| 125 |
+
y=alt.Y("posts:Q", stack="zero", title="Posts"),
|
| 126 |
+
color=alt.Color("sent_desc:N", title="Sentimiento"),
|
| 127 |
+
tooltip=[alt.Tooltip("date_day:T", title="Fecha"), "sent_desc:N", "posts:Q"],
|
| 128 |
+
)
|
| 129 |
+
.properties(height=260)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def chart_heatmap_dia_hora(dfin: pd.DataFrame) -> alt.Chart:
|
| 134 |
+
tmp = pd.DataFrame(
|
| 135 |
+
{"dow": dfin["fecha"].dt.day_name(), "hour": dfin["fecha"].dt.hour}
|
| 136 |
+
)
|
| 137 |
+
order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
|
| 138 |
+
tmp["dow"] = pd.Categorical(tmp["dow"], categories=order, ordered=True)
|
| 139 |
+
counts = tmp.groupby(["dow", "hour"]).size().reset_index(name="posts")
|
| 140 |
+
|
| 141 |
+
return (
|
| 142 |
+
alt.Chart(counts)
|
| 143 |
+
.mark_rect()
|
| 144 |
+
.encode(
|
| 145 |
+
x=alt.X("hour:O", title="Hora"),
|
| 146 |
+
y=alt.Y("dow:N", title="Día de semana", sort=order),
|
| 147 |
+
color=alt.Color("posts:Q", title="Posts"),
|
| 148 |
+
tooltip=["dow:N", "hour:O", "posts:Q"],
|
| 149 |
+
)
|
| 150 |
+
.properties(height=260)
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def chart_top_hashtags(dfin: pd.DataFrame, topn: int = 20):
|
| 155 |
+
if "hashtags" not in dfin.columns:
|
| 156 |
+
return None
|
| 157 |
+
exp = dfin[["hashtags"]].explode("hashtags").dropna(subset=["hashtags"])
|
| 158 |
+
if exp.empty:
|
| 159 |
+
return None
|
| 160 |
+
vc = exp["hashtags"].value_counts().head(topn).rename_axis("hashtag").reset_index(name="conteo")
|
| 161 |
+
return (
|
| 162 |
+
alt.Chart(vc)
|
| 163 |
+
.mark_bar()
|
| 164 |
+
.encode(
|
| 165 |
+
x="conteo:Q",
|
| 166 |
+
y=alt.Y("hashtag:N", sort="-x"),
|
| 167 |
+
tooltip=["hashtag:N", "conteo:Q"],
|
| 168 |
+
)
|
| 169 |
+
.properties(height=max(260, 24 * len(vc) + 20))
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def chart_sent_por_hashtag(dfin: pd.DataFrame, topn: int = 15):
|
| 174 |
+
if "hashtags" not in dfin.columns:
|
| 175 |
+
return None
|
| 176 |
+
exp = dfin[["hashtags", "sent_desc"]].explode("hashtags").dropna(subset=["hashtags"])
|
| 177 |
+
if exp.empty:
|
| 178 |
+
return None
|
| 179 |
+
top_tags = exp["hashtags"].value_counts().head(topn).index
|
| 180 |
+
g = (
|
| 181 |
+
exp[exp["hashtags"].isin(top_tags)]
|
| 182 |
+
.groupby(["hashtags", "sent_desc"])
|
| 183 |
+
.size()
|
| 184 |
+
.reset_index(name="posts")
|
| 185 |
+
)
|
| 186 |
+
return (
|
| 187 |
+
alt.Chart(g)
|
| 188 |
+
.mark_bar()
|
| 189 |
+
.encode(
|
| 190 |
+
y=alt.Y("hashtags:N", title="Hashtag", sort="-x"),
|
| 191 |
+
x=alt.X("posts:Q", title="Posts"),
|
| 192 |
+
color=alt.Color("sent_desc:N", title="Sentimiento"),
|
| 193 |
+
tooltip=["hashtags:N", "sent_desc:N", "posts:Q"],
|
| 194 |
+
)
|
| 195 |
+
.properties(height=28 * len(top_tags) + 20)
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def chart_top_menciones(dfin: pd.DataFrame, topn: int = 20):
|
| 200 |
+
if "mentions" not in dfin.columns:
|
| 201 |
+
return None
|
| 202 |
+
exp = dfin[["mentions"]].explode("mentions").dropna(subset=["mentions"])
|
| 203 |
+
if exp.empty:
|
| 204 |
+
return None
|
| 205 |
+
vc = exp["mentions"].value_counts().head(topn).rename_axis("mención").reset_index(name="conteo")
|
| 206 |
+
return (
|
| 207 |
+
alt.Chart(vc)
|
| 208 |
+
.mark_bar()
|
| 209 |
+
.encode(
|
| 210 |
+
x="conteo:Q",
|
| 211 |
+
y=alt.Y("mención:N", sort="-x"),
|
| 212 |
+
tooltip=["mención:N", "conteo:Q"],
|
| 213 |
+
)
|
| 214 |
+
.properties(height=28 * len(vc) + 20)
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
def chart_hist_longitud(dfin: pd.DataFrame):
|
| 218 |
+
if "n_palabras" not in dfin.columns:
|
| 219 |
+
return None
|
| 220 |
+
tmp = dfin[["n_palabras"]].copy()
|
| 221 |
+
return (
|
| 222 |
+
alt.Chart(tmp)
|
| 223 |
+
.mark_bar()
|
| 224 |
+
.encode(
|
| 225 |
+
x=alt.X("n_palabras:Q", bin=alt.Bin(maxbins=30), title="Número de palabras"),
|
| 226 |
+
y=alt.Y("count():Q", title="Posts"),
|
| 227 |
+
tooltip=[alt.Tooltip("count():Q", title="Posts")],
|
| 228 |
+
)
|
| 229 |
+
.properties(height=260)
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def chart_top_dominios(dfin: pd.DataFrame, topn: int = 20):
|
| 234 |
+
tmp = dfin[["texto"]].copy()
|
| 235 |
+
s = tmp["texto"].str.extractall(r"https?://([^/\s]+)")[0]
|
| 236 |
+
if s.empty:
|
| 237 |
+
return None
|
| 238 |
+
vc = s.value_counts().head(topn).rename_axis("dominio").reset_index(name="conteo")
|
| 239 |
+
return (
|
| 240 |
+
alt.Chart(vc)
|
| 241 |
+
.mark_bar()
|
| 242 |
+
.encode(
|
| 243 |
+
x="conteo:Q",
|
| 244 |
+
y=alt.Y("dominio:N", sort="-x"),
|
| 245 |
+
tooltip=["dominio:N", "conteo:Q"],
|
| 246 |
+
)
|
| 247 |
+
.properties(height=28 * len(vc) + 20)
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
def chart_nube_palabras(dfin: pd.DataFrame, max_words: int = 150) -> BytesIO | None:
|
| 251 |
+
"""
|
| 252 |
+
Genera una nube de palabras (PNG en memoria) a partir de dfin['texto'].
|
| 253 |
+
- Limpia URLs, menciones, hashtags y tokens cortos.
|
| 254 |
+
- Stopwords ES/EN básicas incluidas.
|
| 255 |
+
- Devuelve BytesIO con PNG o None si no hay texto útil.
|
| 256 |
+
"""
|
| 257 |
+
if "texto" not in dfin.columns or dfin.empty:
|
| 258 |
+
return None
|
| 259 |
+
|
| 260 |
+
# --- recolecta y limpia texto ---
|
| 261 |
+
texts = dfin["texto"].dropna().astype(str).tolist()
|
| 262 |
+
if not texts:
|
| 263 |
+
return None
|
| 264 |
+
|
| 265 |
+
text = " ".join(texts)
|
| 266 |
+
|
| 267 |
+
# quita URLs, menciones y hashtags (sólo el #/@, mantenemos la palabra)
|
| 268 |
+
text = re.sub(r"https?://\S+", " ", text)
|
| 269 |
+
text = re.sub(r"[@#]", " ", text)
|
| 270 |
+
|
| 271 |
+
# tokens básicos
|
| 272 |
+
tokens = re.findall(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ0-9]+", text)
|
| 273 |
+
|
| 274 |
+
# stopwords sencillas ES/EN (puedes ampliarlas si quieres)
|
| 275 |
+
stop_es = {
|
| 276 |
+
"de","la","que","el","en","y","a","los","del","se","las","por","un","para","con","no",
|
| 277 |
+
"una","su","al","lo","como","más","pero","sus","le","ya","o","fue","este","ha","sí",
|
| 278 |
+
"porque","esta","son","entre","cuando","muy","sin","sobre","también","me","hasta",
|
| 279 |
+
"hay","donde","quien","desde","todo","nos","durante","todos","uno","les","ni","contra",
|
| 280 |
+
"otros","ese","eso","ante","ellos","e","esto","mí","antes","algunos","qué","unos",
|
| 281 |
+
"yo","otro","otras","otra","él","tanto","esa","estos","mucho","quienes","nada","muchos",
|
| 282 |
+
"cual","poco","ella","estar","estas","algunas","algo","nosotros","mi","mis","tú","te",
|
| 283 |
+
"ti","tu","tus","ellas","nosotras","vosotros","vosotras","os","mío","mía","míos","mías",
|
| 284 |
+
"tuyo","tuya","tuyos","tuyas","suyo","suya","suyos","suyas","nuestro","nuestra","nuestros",
|
| 285 |
+
"nuestras","vuestro","vuestra","vuestros","vuestras","esos","esas","estoy","estás","está",
|
| 286 |
+
"estamos","estáis","están","esté","estés","estemos","estéis","estén","estaré","estarás",
|
| 287 |
+
"estará","estaremos","estaréis","estarán"
|
| 288 |
+
}
|
| 289 |
+
stop_en = {
|
| 290 |
+
"the","a","an","and","or","but","to","of","for","in","on","at","by","with","from","as",
|
| 291 |
+
"is","are","was","were","be","been","being","it","its","this","that","these","those",
|
| 292 |
+
"i","you","he","she","we","they","me","him","her","us","them","my","your","his","their",
|
| 293 |
+
"our","mine","yours","hers","theirs","ours","not","no","so","if","than","then","too",
|
| 294 |
+
"very","can","could","should","would","will","just","also","into","over","under",
|
| 295 |
+
}
|
| 296 |
+
stops = {t.lower() for t in (stop_es | stop_en)}
|
| 297 |
+
|
| 298 |
+
# filtra tokens (longitud mínima y no stopword)
|
| 299 |
+
tokens = [t for t in tokens if len(t) >= 3 and t.lower() not in stops]
|
| 300 |
+
if not tokens:
|
| 301 |
+
return None
|
| 302 |
+
|
| 303 |
+
# frecuencias
|
| 304 |
+
freqs = Counter(t.lower() for t in tokens)
|
| 305 |
+
|
| 306 |
+
# genera la nube
|
| 307 |
+
wc = WordCloud(
|
| 308 |
+
width=1400,
|
| 309 |
+
height=800,
|
| 310 |
+
background_color="white",
|
| 311 |
+
prefer_horizontal=0.9,
|
| 312 |
+
collocations=False, # no agrupa bi/trigramas, mejor control de tokens
|
| 313 |
+
max_words=max_words,
|
| 314 |
+
).generate_from_frequencies(freqs)
|
| 315 |
+
|
| 316 |
+
# a PNG en memoria
|
| 317 |
+
png = BytesIO()
|
| 318 |
+
wc.to_image().save(png, format="PNG")
|
| 319 |
+
png.seek(0)
|
| 320 |
+
return png
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
CHARTS = {
|
| 324 |
+
"📈 Posts por día + media móvil": chart_posts_diario_ma,
|
| 325 |
+
"📊 Recuento por sentimiento": chart_sentimiento_barras, # ⬅️ nueva entrada
|
| 326 |
+
"🧭 Sentimiento (área apilada)": chart_sentimiento_apilado,
|
| 327 |
+
"☁️ Nube de palabras": chart_nube_palabras, # ⬅️ NUEVO
|
| 328 |
+
"🗓️ Calor Día×Hora": chart_heatmap_dia_hora,
|
| 329 |
+
"🏷️ Top hashtags": chart_top_hashtags,
|
| 330 |
+
"🏷️ Hashtag × Sentimiento": chart_sent_por_hashtag,
|
| 331 |
+
"👤 Top menciones": chart_top_menciones,
|
| 332 |
+
"📏 Longitud del texto": chart_hist_longitud,
|
| 333 |
+
"🔗 Top dominios (URLs)": chart_top_dominios,
|
| 334 |
+
}
|
app/client_manager.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from atproto import Client
|
| 3 |
+
|
| 4 |
+
# ========= Gestión de sesión (login per-user) =========
|
| 5 |
+
|
| 6 |
+
def _get_client():
|
| 7 |
+
"""Devuelve el cliente actual de Bluesky desde la sesión."""
|
| 8 |
+
return st.session_state.get("bsky_client")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _set_client(c, handle):
|
| 12 |
+
"""Guarda el cliente y el handle en la sesión."""
|
| 13 |
+
st.session_state["bsky_client"] = c
|
| 14 |
+
st.session_state["bsky_handle"] = handle
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _is_logged_in():
|
| 18 |
+
"""Devuelve True si hay un cliente activo."""
|
| 19 |
+
return "bsky_client" in st.session_state and st.session_state["bsky_client"] is not None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _logout():
|
| 23 |
+
"""Cierra sesión eliminando los datos de la sesión."""
|
| 24 |
+
st.session_state.pop("bsky_client", None)
|
| 25 |
+
st.session_state.pop("bsky_handle", None)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def login_bsky(handle: str, app_password: str):
|
| 29 |
+
"""
|
| 30 |
+
Inicia sesión en Bluesky con el handle y la App Password del usuario.
|
| 31 |
+
"""
|
| 32 |
+
client = Client() # usa el servicio por defecto https://bsky.social
|
| 33 |
+
client.login(handle, app_password)
|
| 34 |
+
return client
|
| 35 |
+
|
| 36 |
+
# ---- Aliases públicos para compatibilidad con el resto del código ----
|
| 37 |
+
def get_client():
|
| 38 |
+
return _get_client()
|
| 39 |
+
|
| 40 |
+
def set_client(c, handle):
|
| 41 |
+
return _set_client(c, handle)
|
| 42 |
+
|
| 43 |
+
def is_logged_in():
|
| 44 |
+
return _is_logged_in()
|
| 45 |
+
|
| 46 |
+
def logout():
|
| 47 |
+
return _logout()
|
| 48 |
+
|
| 49 |
+
def login(handle: str, app_password: str):
|
| 50 |
+
return login_bsky(handle, app_password)
|
app/fetcher.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
from datetime import datetime, timedelta, timezone
|
| 3 |
+
from typing import Iterable, Optional, List
|
| 4 |
+
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from atproto import models
|
| 7 |
+
|
| 8 |
+
from app.client_manager import get_client
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _iso_to_dt_utc_naive(iso: str) -> Optional[datetime]:
|
| 12 |
+
"""Convierte ISO (posible 'Z') a datetime naive en UTC."""
|
| 13 |
+
if not iso:
|
| 14 |
+
return None
|
| 15 |
+
try:
|
| 16 |
+
iso = iso.replace("Z", "+00:00")
|
| 17 |
+
aware = datetime.fromisoformat(iso)
|
| 18 |
+
return aware.astimezone(timezone.utc).replace(tzinfo=None)
|
| 19 |
+
except Exception:
|
| 20 |
+
return None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _search_one_term(
|
| 24 |
+
term: str,
|
| 25 |
+
days_back: int,
|
| 26 |
+
max_posts: Optional[int],
|
| 27 |
+
) -> pd.DataFrame:
|
| 28 |
+
"""
|
| 29 |
+
Busca posts de un único término usando la API oficial (app.bsky.feed.search_posts).
|
| 30 |
+
Devuelve un DataFrame con columnas: texto, autor, fecha (datetime naive UTC), uri.
|
| 31 |
+
Respeta el corte por días y el límite max_posts.
|
| 32 |
+
"""
|
| 33 |
+
client = get_client()
|
| 34 |
+
if client is None:
|
| 35 |
+
raise RuntimeError("No hay sesión de Bluesky.")
|
| 36 |
+
|
| 37 |
+
cutoff_aware = datetime.now(timezone.utc) - timedelta(days=days_back)
|
| 38 |
+
|
| 39 |
+
rows: List[dict] = []
|
| 40 |
+
cursor = None
|
| 41 |
+
|
| 42 |
+
while True:
|
| 43 |
+
remaining = None if max_posts is None else max(max_posts - len(rows), 0)
|
| 44 |
+
if remaining == 0:
|
| 45 |
+
break
|
| 46 |
+
limit = 100 if remaining is None else max(1, min(100, remaining))
|
| 47 |
+
|
| 48 |
+
params = models.AppBskyFeedSearchPosts.Params(q=term, limit=limit, cursor=cursor)
|
| 49 |
+
resp = client.app.bsky.feed.search_posts(params=params)
|
| 50 |
+
|
| 51 |
+
posts = resp.posts or []
|
| 52 |
+
if not posts:
|
| 53 |
+
break
|
| 54 |
+
|
| 55 |
+
# Si encontramos algún post más antiguo que el cutoff, paramos este término
|
| 56 |
+
stop_for_age = False
|
| 57 |
+
|
| 58 |
+
for p in posts:
|
| 59 |
+
created_raw = getattr(p.record, "created_at", "") or ""
|
| 60 |
+
# comparar con cutoff usando AWARE
|
| 61 |
+
try:
|
| 62 |
+
aware = datetime.fromisoformat(created_raw.replace("Z", "+00:00"))
|
| 63 |
+
if aware < cutoff_aware:
|
| 64 |
+
stop_for_age = True
|
| 65 |
+
break
|
| 66 |
+
except Exception:
|
| 67 |
+
pass
|
| 68 |
+
|
| 69 |
+
created_dt = _iso_to_dt_utc_naive(created_raw)
|
| 70 |
+
|
| 71 |
+
rows.append(
|
| 72 |
+
{
|
| 73 |
+
"texto": getattr(p.record, "text", "") or "",
|
| 74 |
+
"autor": getattr(p.author, "handle", "") or "",
|
| 75 |
+
"fecha": created_dt,
|
| 76 |
+
"uri": getattr(p, "uri", "") or "",
|
| 77 |
+
}
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
if max_posts is not None and len(rows) >= max_posts:
|
| 81 |
+
stop_for_age = True
|
| 82 |
+
break
|
| 83 |
+
|
| 84 |
+
if stop_for_age:
|
| 85 |
+
break
|
| 86 |
+
|
| 87 |
+
cursor = resp.cursor
|
| 88 |
+
if not cursor:
|
| 89 |
+
break
|
| 90 |
+
|
| 91 |
+
return pd.DataFrame(rows, columns=["texto", "autor", "fecha", "uri"])
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def fetch_posts(topic: str, days_back: int, user_handle: str, max_posts: Optional[int] = None) -> pd.DataFrame:
|
| 95 |
+
"""
|
| 96 |
+
Búsqueda tipo AND (la API ya hace matching por 'q').
|
| 97 |
+
"""
|
| 98 |
+
return _search_one_term(topic, days_back, max_posts)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def fetch_posts_or(terms: Iterable[str], days_back: int, user_handle: str, max_posts: Optional[int] = None) -> pd.DataFrame:
|
| 102 |
+
"""
|
| 103 |
+
Búsqueda OR: reparte el cupo entre términos, combina y elimina duplicados.
|
| 104 |
+
"""
|
| 105 |
+
terms = [t.strip() for t in terms if t and t.strip()]
|
| 106 |
+
if not terms:
|
| 107 |
+
return pd.DataFrame(columns=["texto", "autor", "fecha", "uri"])
|
| 108 |
+
|
| 109 |
+
remaining = max_posts
|
| 110 |
+
frames: List[pd.DataFrame] = []
|
| 111 |
+
aportes = {}
|
| 112 |
+
|
| 113 |
+
for i, term in enumerate(terms):
|
| 114 |
+
limit_i = None
|
| 115 |
+
if remaining is not None:
|
| 116 |
+
# reparte lo que queda entre los que faltan (redondeo hacia arriba)
|
| 117 |
+
limit_i = math.ceil(remaining / (len(terms) - i))
|
| 118 |
+
|
| 119 |
+
try:
|
| 120 |
+
df_i = _search_one_term(term, days_back, limit_i)
|
| 121 |
+
except Exception as e:
|
| 122 |
+
# devolvemos vacío y que la app lo muestre como aviso
|
| 123 |
+
df_i = pd.DataFrame(columns=["texto", "autor", "fecha", "uri"])
|
| 124 |
+
|
| 125 |
+
aportes[term] = len(df_i)
|
| 126 |
+
frames.append(df_i)
|
| 127 |
+
|
| 128 |
+
if remaining is not None:
|
| 129 |
+
remaining = max(0, remaining - len(df_i))
|
| 130 |
+
|
| 131 |
+
df = pd.concat(frames, ignore_index=True)
|
| 132 |
+
df = df.drop_duplicates(subset=["uri", "texto", "autor"])
|
| 133 |
+
|
| 134 |
+
if max_posts is not None:
|
| 135 |
+
df = df.head(max_posts)
|
| 136 |
+
|
| 137 |
+
# guardamos “aportes” como atributo para que la UI lo muestre
|
| 138 |
+
df.attrs["aportes"] = aportes
|
| 139 |
+
return df
|
app/reporting.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/reporting.py
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
from io import BytesIO
|
| 4 |
+
from typing import List, Tuple
|
| 5 |
+
import altair as alt
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from reportlab.lib.pagesizes import A4
|
| 8 |
+
from reportlab.pdfgen import canvas
|
| 9 |
+
from reportlab.lib.utils import ImageReader
|
| 10 |
+
|
| 11 |
+
def altair_to_png_bytes(chart: alt.Chart, scale: int = 2) -> BytesIO:
|
| 12 |
+
buf = BytesIO()
|
| 13 |
+
chart.save(buf, format="png", scale=scale) # requiere vl-convert-python
|
| 14 |
+
buf.seek(0)
|
| 15 |
+
return buf
|
| 16 |
+
|
| 17 |
+
def build_pdf_with_images(df: pd.DataFrame, images: List[Tuple[str, BytesIO]]) -> bytes:
|
| 18 |
+
buffer = BytesIO()
|
| 19 |
+
c = canvas.Canvas(buffer, pagesize=A4)
|
| 20 |
+
page_w, page_h = A4
|
| 21 |
+
margin = 36
|
| 22 |
+
y = page_h - margin
|
| 23 |
+
|
| 24 |
+
c.setFont("Helvetica-Bold", 18)
|
| 25 |
+
c.drawString(margin, y, "Informe de Análisis - Bluesky Explorer")
|
| 26 |
+
y -= 22
|
| 27 |
+
c.setFont("Helvetica", 11)
|
| 28 |
+
c.drawString(margin, y, f"Total de publicaciones analizadas: {len(df)}")
|
| 29 |
+
y -= 8
|
| 30 |
+
c.drawString(margin, y, f"Gráficos incluidos: {len(images)}")
|
| 31 |
+
y -= 20
|
| 32 |
+
|
| 33 |
+
max_w = page_w - 2 * margin
|
| 34 |
+
|
| 35 |
+
for idx, (title, png_bytes) in enumerate(images, start=1):
|
| 36 |
+
if y < 140:
|
| 37 |
+
c.showPage()
|
| 38 |
+
y = page_h - margin
|
| 39 |
+
|
| 40 |
+
c.setFont("Helvetica-Bold", 12)
|
| 41 |
+
c.drawString(margin, y, f"{idx}. {title}")
|
| 42 |
+
y -= 12
|
| 43 |
+
|
| 44 |
+
img_reader = ImageReader(png_bytes)
|
| 45 |
+
iw, ih = img_reader.getSize()
|
| 46 |
+
scale = min(max_w / iw, 1)
|
| 47 |
+
w = iw * scale
|
| 48 |
+
h = ih * scale
|
| 49 |
+
|
| 50 |
+
if y - h < margin:
|
| 51 |
+
c.showPage()
|
| 52 |
+
y = page_h - margin - 12
|
| 53 |
+
c.setFont("Helvetica-Bold", 12)
|
| 54 |
+
c.drawString(margin, y, f"{idx}. {title}")
|
| 55 |
+
y -= 12
|
| 56 |
+
|
| 57 |
+
c.drawImage(img_reader, margin, y - h, width=w, height=h, preserveAspectRatio=True)
|
| 58 |
+
y -= h + 18
|
| 59 |
+
|
| 60 |
+
c.showPage()
|
| 61 |
+
c.save()
|
| 62 |
+
buffer.seek(0)
|
| 63 |
+
return buffer.getvalue()
|
app/ui/__init__.py
ADDED
|
File without changes
|
app/ui/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (156 Bytes). View file
|
|
|
app/ui/__pycache__/components.cpython-311.pyc
ADDED
|
Binary file (4.26 kB). View file
|
|
|
app/ui/__pycache__/main_app.cpython-311.pyc
ADDED
|
Binary file (4.7 kB). View file
|
|
|
app/ui/__pycache__/panel.cpython-311.pyc
ADDED
|
Binary file (4.73 kB). View file
|
|
|
app/ui/components.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from config.settings import MAX_POSTS_WARNING
|
| 4 |
+
from app.client_manager import is_logged_in, login, logout, set_client
|
| 5 |
+
|
| 6 |
+
def render_login_sidebar():
|
| 7 |
+
st.sidebar.header("Acceso a Bluesky")
|
| 8 |
+
if not is_logged_in():
|
| 9 |
+
handle = st.sidebar.text_input("Handle", "", key="login_handle")
|
| 10 |
+
app_password = st.sidebar.text_input("App Password", type="password", key="login_password")
|
| 11 |
+
if st.sidebar.button("Iniciar sesión", key="btn_login"):
|
| 12 |
+
try:
|
| 13 |
+
client = login(handle, app_password)
|
| 14 |
+
set_client(client, handle)
|
| 15 |
+
st.sidebar.success("Autenticado correctamente.")
|
| 16 |
+
st.rerun()
|
| 17 |
+
except Exception:
|
| 18 |
+
st.sidebar.error("Usuario o contraseña incorrectos.")
|
| 19 |
+
else:
|
| 20 |
+
st.sidebar.success(f"Sesión iniciada como {st.session_state.get('bsky_handle')}")
|
| 21 |
+
if st.sidebar.button("Cerrar sesión", key="btn_logout"):
|
| 22 |
+
logout()
|
| 23 |
+
st.rerun()
|
| 24 |
+
|
| 25 |
+
def render_search_form():
|
| 26 |
+
st.sidebar.header("Configuración de búsqueda")
|
| 27 |
+
with st.sidebar.form("search_form", clear_on_submit=False):
|
| 28 |
+
topic = st.text_input("Término", value="apagón", key="search_topic")
|
| 29 |
+
days_back = st.slider("Días atrás", 7, 365, 30, key="search_days_back")
|
| 30 |
+
max_posts = st.number_input("Máximo de posts", 1, 30000, 1000, key="search_max_posts")
|
| 31 |
+
operator = st.radio("Operador", ["AND", "OR"], horizontal=True, key="search_operator")
|
| 32 |
+
|
| 33 |
+
require_confirm = max_posts > MAX_POSTS_WARNING
|
| 34 |
+
if require_confirm:
|
| 35 |
+
st.warning(f"🚨 Has solicitado {int(max_posts)} publicaciones. Puede ralentizar el proceso.", icon="⚠️")
|
| 36 |
+
confirm_heavy = st.checkbox("Entiendo el aviso y deseo continuar", key="search_confirm_heavy")
|
| 37 |
+
else:
|
| 38 |
+
confirm_heavy = True
|
| 39 |
+
|
| 40 |
+
submitted = st.form_submit_button("Buscar", use_container_width=True, type="primary")
|
| 41 |
+
|
| 42 |
+
return submitted, str(topic).strip(), int(days_back), int(max_posts), operator, bool(require_confirm), bool(confirm_heavy)
|
app/ui/main_app.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# --- ensure project root on sys.path ---
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
ROOT = Path(__file__).resolve().parents[2] # .../TFM
|
| 6 |
+
if str(ROOT) not in sys.path:
|
| 7 |
+
sys.path.insert(0, str(ROOT))
|
| 8 |
+
# ---------------------------------------
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import hashlib
|
| 12 |
+
from io import BytesIO
|
| 13 |
+
|
| 14 |
+
import altair as alt
|
| 15 |
+
import pandas as pd
|
| 16 |
+
import streamlit as st
|
| 17 |
+
from reportlab.lib.pagesizes import A4
|
| 18 |
+
from reportlab.pdfgen import canvas
|
| 19 |
+
from reportlab.lib.utils import ImageReader
|
| 20 |
+
|
| 21 |
+
from config.settings import MAX_POSTS_WARNING
|
| 22 |
+
from app.client_manager import login, set_client, logout, is_logged_in
|
| 23 |
+
from app.fetcher import fetch_posts, fetch_posts_or
|
| 24 |
+
from app.analyzer import clean_and_analyze
|
| 25 |
+
from app import charts # <- nuestras funciones de gráficos
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
st.set_page_config(page_title="Bluesky Explorer", page_icon="🔎", layout="wide")
|
| 29 |
+
st.title("🔎 Bluesky Explorer")
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# -------------------------------
|
| 33 |
+
# Helpers
|
| 34 |
+
# -------------------------------
|
| 35 |
+
def _result_key(handle, topic, days_back, max_posts, operator):
|
| 36 |
+
payload = json.dumps(
|
| 37 |
+
{"h": handle, "t": topic, "d": int(days_back), "m": int(max_posts), "o": operator},
|
| 38 |
+
ensure_ascii=False,
|
| 39 |
+
sort_keys=True,
|
| 40 |
+
).encode()
|
| 41 |
+
return hashlib.md5(payload).hexdigest()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# -------------------------------
|
| 45 |
+
# Login
|
| 46 |
+
# -------------------------------
|
| 47 |
+
st.sidebar.header("Acceso a Bluesky")
|
| 48 |
+
|
| 49 |
+
if not is_logged_in():
|
| 50 |
+
handle = st.sidebar.text_input("Handle", "", key="login_handle")
|
| 51 |
+
app_password = st.sidebar.text_input("App Password", type="password", key="login_pwd")
|
| 52 |
+
if st.sidebar.button("Iniciar sesión", key="btn_login"):
|
| 53 |
+
try:
|
| 54 |
+
client = login(handle, app_password)
|
| 55 |
+
set_client(client, handle)
|
| 56 |
+
st.sidebar.success("Autenticado correctamente.")
|
| 57 |
+
st.rerun()
|
| 58 |
+
except Exception:
|
| 59 |
+
st.sidebar.error("Usuario o contraseña incorrectos.")
|
| 60 |
+
else:
|
| 61 |
+
st.sidebar.success(f"Sesión iniciada como {st.session_state.get('bsky_handle')}")
|
| 62 |
+
if st.sidebar.button("Cerrar sesión", key="btn_logout"):
|
| 63 |
+
logout()
|
| 64 |
+
st.rerun()
|
| 65 |
+
|
| 66 |
+
if not is_logged_in():
|
| 67 |
+
st.stop()
|
| 68 |
+
|
| 69 |
+
# -------------------------------
|
| 70 |
+
# Parámetros
|
| 71 |
+
# -------------------------------
|
| 72 |
+
st.sidebar.header("Configuración de búsqueda")
|
| 73 |
+
with st.sidebar.form("search_form", clear_on_submit=False):
|
| 74 |
+
topic = st.text_input("Término", value="apagón", key="param_topic")
|
| 75 |
+
days_back = st.slider("Días atrás", 7, 365, 30, key="param_days")
|
| 76 |
+
max_posts = st.number_input("Máximo de posts", 1, 30000, 1000, key="param_max")
|
| 77 |
+
operator = st.radio("Operador", ["AND", "OR"], horizontal=True, key="param_op")
|
| 78 |
+
|
| 79 |
+
require_confirm = max_posts > MAX_POSTS_WARNING
|
| 80 |
+
if require_confirm:
|
| 81 |
+
st.warning(
|
| 82 |
+
f"🚨 Has solicitado {int(max_posts)} publicaciones. Puede ralentizar el proceso.",
|
| 83 |
+
icon="⚠️",
|
| 84 |
+
)
|
| 85 |
+
confirm_heavy = st.checkbox("Entiendo el aviso y deseo continuar", key="param_confirm")
|
| 86 |
+
else:
|
| 87 |
+
confirm_heavy = True
|
| 88 |
+
|
| 89 |
+
submitted = st.form_submit_button("Buscar", use_container_width=True)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# -------------------------------
|
| 93 |
+
# Buscar + Analizar
|
| 94 |
+
# -------------------------------
|
| 95 |
+
if submitted:
|
| 96 |
+
topic = topic.strip()
|
| 97 |
+
if not topic:
|
| 98 |
+
st.error("Debes introducir un término de búsqueda.")
|
| 99 |
+
st.stop()
|
| 100 |
+
if days_back <= 0 or max_posts <= 0:
|
| 101 |
+
st.error("Parámetros inválidos.")
|
| 102 |
+
st.stop()
|
| 103 |
+
if require_confirm and not confirm_heavy:
|
| 104 |
+
st.error("Debes marcar la casilla para continuar.")
|
| 105 |
+
st.stop()
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
with st.spinner("🔎 Buscando publicaciones en Bluesky..."):
|
| 109 |
+
if operator == "AND":
|
| 110 |
+
df_raw = fetch_posts(topic, days_back, st.session_state["bsky_handle"], int(max_posts))
|
| 111 |
+
else:
|
| 112 |
+
df_raw = fetch_posts_or(topic.split(), days_back, st.session_state["bsky_handle"], int(max_posts))
|
| 113 |
+
except Exception as e:
|
| 114 |
+
st.error(f"Error al recuperar publicaciones: {e}")
|
| 115 |
+
st.stop()
|
| 116 |
+
|
| 117 |
+
# Info de aportes por término (si viene)
|
| 118 |
+
aportes = df_raw.attrs.get("aportes")
|
| 119 |
+
if aportes:
|
| 120 |
+
st.caption("📊 Posts por término: " + ", ".join(f"{k}: {v}" for k, v in aportes.items()))
|
| 121 |
+
|
| 122 |
+
if df_raw is None or df_raw.empty:
|
| 123 |
+
st.warning("No se encontraron publicaciones.")
|
| 124 |
+
st.stop()
|
| 125 |
+
|
| 126 |
+
# Analizar
|
| 127 |
+
try:
|
| 128 |
+
with st.spinner("🧠 Analizando sentimiento y limpiando datos..."):
|
| 129 |
+
df = clean_and_analyze(df_raw) # tu función cacheada
|
| 130 |
+
except Exception as e:
|
| 131 |
+
st.error(f"Error durante el análisis: {e}")
|
| 132 |
+
st.stop()
|
| 133 |
+
|
| 134 |
+
if df is None or df.empty:
|
| 135 |
+
st.info("No se encontraron publicaciones válidas tras limpieza.")
|
| 136 |
+
st.stop()
|
| 137 |
+
|
| 138 |
+
# Fechas seguras para graficar/exportar
|
| 139 |
+
df = charts.preprocess_dates(df)
|
| 140 |
+
|
| 141 |
+
# Guardar en sesión
|
| 142 |
+
key = _result_key(st.session_state["bsky_handle"], topic, days_back, int(max_posts), operator)
|
| 143 |
+
st.session_state["current_key"] = key
|
| 144 |
+
st.session_state.setdefault("results", {})[key] = df
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# -------------------------------
|
| 148 |
+
# Recuperar último resultado
|
| 149 |
+
# -------------------------------
|
| 150 |
+
df = None
|
| 151 |
+
key = st.session_state.get("current_key")
|
| 152 |
+
if key:
|
| 153 |
+
df = st.session_state.get("results", {}).get(key)
|
| 154 |
+
|
| 155 |
+
if df is None or df.empty:
|
| 156 |
+
st.info("Realiza una búsqueda para construir el panel de gráficos.")
|
| 157 |
+
st.stop()
|
| 158 |
+
|
| 159 |
+
# -------------------------------
|
| 160 |
+
# Métricas + Datos
|
| 161 |
+
# -------------------------------
|
| 162 |
+
st.metric("Posts totales", len(df))
|
| 163 |
+
st.dataframe(df, use_container_width=True)
|
| 164 |
+
|
| 165 |
+
# -------------------------------
|
| 166 |
+
# Panel de gráficos
|
| 167 |
+
# -------------------------------
|
| 168 |
+
st.sidebar.header("Panel de gráficos")
|
| 169 |
+
|
| 170 |
+
# Evita que el texto de los botones se parta
|
| 171 |
+
st.sidebar.markdown(
|
| 172 |
+
"""
|
| 173 |
+
<style>
|
| 174 |
+
section[data-testid="stSidebar"] button { white-space: nowrap; }
|
| 175 |
+
</style>
|
| 176 |
+
""",
|
| 177 |
+
unsafe_allow_html=True,
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
st.session_state.setdefault("panels", {})
|
| 181 |
+
st.session_state["panels"].setdefault(key, [])
|
| 182 |
+
panels = st.session_state["panels"][key]
|
| 183 |
+
|
| 184 |
+
choice = st.sidebar.selectbox("Añadir gráfico", list(charts.CHARTS.keys()), key="chart_select")
|
| 185 |
+
|
| 186 |
+
c1, c2, c3 = st.sidebar.columns(3, gap="small")
|
| 187 |
+
with c1:
|
| 188 |
+
add = st.button(" ➕ ", use_container_width=True, key="btn_add_chart")
|
| 189 |
+
with c2:
|
| 190 |
+
undo = st.button(" ↩️ ", use_container_width=True, key="btn_undo_chart")
|
| 191 |
+
with c3:
|
| 192 |
+
clear = st.button(" 🗑 ", use_container_width=True, key="btn_clear_chart")
|
| 193 |
+
|
| 194 |
+
if add:
|
| 195 |
+
panels.append(choice)
|
| 196 |
+
if undo and panels:
|
| 197 |
+
panels.pop()
|
| 198 |
+
if clear:
|
| 199 |
+
panels.clear()
|
| 200 |
+
|
| 201 |
+
if panels:
|
| 202 |
+
st.subheader("📊 Panel de gráficos")
|
| 203 |
+
|
| 204 |
+
chart_pngs: list[tuple[str, BytesIO]] = []
|
| 205 |
+
|
| 206 |
+
for i, name in enumerate(panels, start=1):
|
| 207 |
+
st.markdown(f"**{i}. {name}**")
|
| 208 |
+
chart_func = charts.CHARTS[name]
|
| 209 |
+
chart_obj = chart_func(df)
|
| 210 |
+
|
| 211 |
+
# 1) Cualquier gráfico de Altair (Chart, LayerChart, Facet, Concat, etc.)
|
| 212 |
+
if hasattr(chart_obj, "to_dict"): # duck-typing para objetos Altair
|
| 213 |
+
st.altair_chart(chart_obj, use_container_width=True)
|
| 214 |
+
# Exportar a PNG (requiere vl-convert-python)
|
| 215 |
+
png = charts.export_chart_png(chart_obj, scale=2)
|
| 216 |
+
if png:
|
| 217 |
+
chart_pngs.append((name, png))
|
| 218 |
+
else:
|
| 219 |
+
st.warning(f"No se pudo exportar '{name}' como imagen (Altair).")
|
| 220 |
+
|
| 221 |
+
# 2) Imagen generada (por ejemplo la nube de palabras -> BytesIO)
|
| 222 |
+
elif isinstance(chart_obj, BytesIO):
|
| 223 |
+
st.image(chart_obj, use_container_width=True) # <-- sin el parámetro deprecado
|
| 224 |
+
chart_pngs.append((name, chart_obj))
|
| 225 |
+
|
| 226 |
+
# 3) Sin datos
|
| 227 |
+
elif chart_obj is None:
|
| 228 |
+
st.info("No hay datos suficientes para este gráfico.")
|
| 229 |
+
|
| 230 |
+
# 4) Tipo inesperado
|
| 231 |
+
else:
|
| 232 |
+
st.warning(f"Tipo de salida no soportado para '{name}'.")
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
# PDF
|
| 236 |
+
def _pdf_from_images(df_data: pd.DataFrame, images: list[tuple[str, BytesIO]]) -> bytes:
|
| 237 |
+
buf = BytesIO()
|
| 238 |
+
c = canvas.Canvas(buf, pagesize=A4)
|
| 239 |
+
page_w, page_h = A4
|
| 240 |
+
margin = 36
|
| 241 |
+
y = page_h - margin
|
| 242 |
+
|
| 243 |
+
c.setFont("Helvetica-Bold", 18)
|
| 244 |
+
c.drawString(margin, y, "Informe de Análisis - Bluesky Explorer")
|
| 245 |
+
y -= 22
|
| 246 |
+
c.setFont("Helvetica", 11)
|
| 247 |
+
c.drawString(margin, y, f"Total de publicaciones analizadas: {len(df_data)}")
|
| 248 |
+
y -= 8
|
| 249 |
+
c.drawString(margin, y, f"Gráficos incluidos: {len(images)}")
|
| 250 |
+
y -= 20
|
| 251 |
+
|
| 252 |
+
max_w = page_w - 2 * margin
|
| 253 |
+
|
| 254 |
+
for idx, (title, png_bytes) in enumerate(images, start=1):
|
| 255 |
+
if y < 140:
|
| 256 |
+
c.showPage()
|
| 257 |
+
y = page_h - margin
|
| 258 |
+
|
| 259 |
+
c.setFont("Helvetica-Bold", 12)
|
| 260 |
+
c.drawString(margin, y, f"{idx}. {title}")
|
| 261 |
+
y -= 12
|
| 262 |
+
|
| 263 |
+
img = ImageReader(png_bytes)
|
| 264 |
+
iw, ih = img.getSize()
|
| 265 |
+
scale = min(max_w / iw, 1.0)
|
| 266 |
+
w = iw * scale
|
| 267 |
+
h = ih * scale
|
| 268 |
+
|
| 269 |
+
if y - h < margin:
|
| 270 |
+
c.showPage()
|
| 271 |
+
y = page_h - margin - 12
|
| 272 |
+
c.setFont("Helvetica-Bold", 12)
|
| 273 |
+
c.drawString(margin, y, f"{idx}. {title}")
|
| 274 |
+
y -= 12
|
| 275 |
+
|
| 276 |
+
c.drawImage(img, margin, y - h, width=w, height=h, preserveAspectRatio=True)
|
| 277 |
+
y -= h + 18
|
| 278 |
+
|
| 279 |
+
c.showPage()
|
| 280 |
+
c.save()
|
| 281 |
+
buf.seek(0)
|
| 282 |
+
return buf.getvalue()
|
| 283 |
+
|
| 284 |
+
if chart_pngs:
|
| 285 |
+
pdf_bytes = _pdf_from_images(df, chart_pngs)
|
| 286 |
+
st.download_button(
|
| 287 |
+
label="📄 Descargar informe en PDF",
|
| 288 |
+
data=pdf_bytes,
|
| 289 |
+
file_name="informe_bluesky.pdf",
|
| 290 |
+
mime="application/pdf",
|
| 291 |
+
key="btn_pdf",
|
| 292 |
+
)
|
| 293 |
+
else:
|
| 294 |
+
st.info("Selecciona un tipo de gráfico en la barra lateral y pulsa **Añadir** para construir tu panel.")
|
app/ui/panel.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from io import BytesIO
|
| 3 |
+
from typing import List, Tuple
|
| 4 |
+
import streamlit as st
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from app.charts import CHARTS
|
| 7 |
+
from app.reporting import altair_to_png_bytes, build_pdf_with_images
|
| 8 |
+
|
| 9 |
+
def render_panel(df: pd.DataFrame, state_key: str):
|
| 10 |
+
st.sidebar.header("Panel de gráficos")
|
| 11 |
+
|
| 12 |
+
# Evita cortes raros en botones
|
| 13 |
+
st.sidebar.markdown(
|
| 14 |
+
"""
|
| 15 |
+
<style>
|
| 16 |
+
section[data-testid="stSidebar"] button { white-space: nowrap; }
|
| 17 |
+
</style>
|
| 18 |
+
""",
|
| 19 |
+
unsafe_allow_html=True,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# Estado por resultado
|
| 23 |
+
st.session_state.setdefault("panels", {})
|
| 24 |
+
st.session_state["panels"].setdefault(state_key, [])
|
| 25 |
+
panels: list[str] = st.session_state["panels"][state_key]
|
| 26 |
+
|
| 27 |
+
# Selectbox + botones con keys únicas por 'state_key'
|
| 28 |
+
choice = st.sidebar.selectbox(
|
| 29 |
+
"Añadir gráfico",
|
| 30 |
+
list(CHARTS.keys()),
|
| 31 |
+
key=f"chart_select_{state_key}",
|
| 32 |
+
)
|
| 33 |
+
c1, c2, c3 = st.sidebar.columns(3, gap="small")
|
| 34 |
+
with c1:
|
| 35 |
+
add = st.button(" ➕ ", use_container_width=True, key=f"btn_add_{state_key}")
|
| 36 |
+
with c2:
|
| 37 |
+
undo = st.button(" ↩️ ", use_container_width=True, key=f"btn_undo_{state_key}")
|
| 38 |
+
with c3:
|
| 39 |
+
clear = st.button(" 🗑 ", use_container_width=True, key=f"btn_clear_{state_key}")
|
| 40 |
+
|
| 41 |
+
if add:
|
| 42 |
+
panels.append(choice)
|
| 43 |
+
if undo and panels:
|
| 44 |
+
panels.pop()
|
| 45 |
+
if clear:
|
| 46 |
+
panels.clear()
|
| 47 |
+
|
| 48 |
+
if not panels:
|
| 49 |
+
st.info("Selecciona un tipo de gráfico en la barra lateral y pulsa **Añadir** para construir tu panel.")
|
| 50 |
+
return
|
| 51 |
+
|
| 52 |
+
st.subheader("📊 Panel de gráficos")
|
| 53 |
+
|
| 54 |
+
chart_pngs: List[Tuple[str, BytesIO]] = []
|
| 55 |
+
|
| 56 |
+
for i, name in enumerate(panels, start=1):
|
| 57 |
+
st.markdown(f"**{i}. {name}**")
|
| 58 |
+
chart_func = CHARTS[name]
|
| 59 |
+
chart_obj = chart_func(df)
|
| 60 |
+
if chart_obj is None:
|
| 61 |
+
st.info("No hay datos suficientes para este gráfico.")
|
| 62 |
+
continue
|
| 63 |
+
|
| 64 |
+
st.altair_chart(chart_obj, use_container_width=True)
|
| 65 |
+
|
| 66 |
+
# Exportación a PNG (necesita vl-convert-python instalado)
|
| 67 |
+
try:
|
| 68 |
+
png_buf = altair_to_png_bytes(chart_obj, scale=2)
|
| 69 |
+
chart_pngs.append((name, png_buf))
|
| 70 |
+
except Exception as e:
|
| 71 |
+
st.warning(f"No se pudo exportar '{name}' como imagen: {e}")
|
| 72 |
+
|
| 73 |
+
if chart_pngs:
|
| 74 |
+
pdf_bytes = build_pdf_with_images(df, chart_pngs)
|
| 75 |
+
st.download_button(
|
| 76 |
+
label="📄 Descargar informe en PDF",
|
| 77 |
+
data=pdf_bytes,
|
| 78 |
+
file_name="informe_bluesky.pdf",
|
| 79 |
+
mime="application/pdf",
|
| 80 |
+
key=f"btn_pdf_{state_key}",
|
| 81 |
+
)
|
| 82 |
+
else:
|
| 83 |
+
st.info("No se pudieron exportar imágenes para el PDF.")
|
app/utils.py
ADDED
|
File without changes
|