GaetanoParente's picture
integrata persistenza dei log precedenti
a4a884a
import os
import uuid
import csv
import pytz
from datetime import datetime
from pathlib import Path
from huggingface_hub import CommitScheduler, hf_hub_download
# --- CONFIGURAZIONE ---
DATASET_REPO_ID = "NextGenTech/ngt-ai-platform-logs"
LOG_DIR = Path("data/logs")
LOG_FILE = LOG_DIR / "access_logs.csv"
HF_TOKEN = os.environ.get("HF_TOKEN")
ITALY_TZ = pytz.timezone("Europe/Rome")
TIME = 5 # Minuti di intervallo tra ogni aggiornamento del dataset
LOG_DIR.mkdir(parents=True, exist_ok=True)
if HF_TOKEN:
try:
print("📥 Controllo presenza log remoti...")
hf_hub_download(
repo_id=DATASET_REPO_ID,
filename="logs/access_logs.csv",
repo_type="dataset",
token=HF_TOKEN,
local_dir="data",
local_dir_use_symlinks=False
)
print("✅ Storico log scaricato e ripristinato!")
except Exception as e:
print(f"⚠️ Nessun log remoto trovato (o primo avvio): {e}")
print("🆕 Si partirà con un nuovo file di log.")
if not LOG_FILE.exists() or LOG_FILE.stat().st_size == 0:
print("🆕 Creazione header file di log...")
with open(LOG_FILE, "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerow([
"timestamp", "session_id", "module", "action",
"ip_address", "user_agent", "language", "input_size", "input_text" ,"processing_time"
])
scheduler = CommitScheduler(
repo_id=DATASET_REPO_ID,
repo_type="dataset",
folder_path=LOG_DIR,
path_in_repo="logs",
every=TIME,
token=HF_TOKEN
)
def log_interaction(request, module_name, action, input_data=None, execution_time=0.0):
try:
if request:
headers = request.headers
ip = headers.get("x-forwarded-for", request.client.host)
user_agent = headers.get("user-agent", "Unknown")
language = headers.get("accept-language", "Unknown").split(',')[0]
else:
ip, user_agent, language = "LOCAL", "Dev-Mode", "it"
session_raw = f"{ip}{user_agent}{datetime.now().date()}"
session_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, session_raw))[:8]
input_meta = "0"
input_text_content = ""
now_italy = datetime.now(ITALY_TZ)
if isinstance(input_data, str):
input_meta = f"{len(input_data)} chars"
clean_text = input_data.replace('\n', ' ').replace('\r', '')
input_text_content = (clean_text[:1000] + '..') if len(clean_text) > 1000 else clean_text
elif hasattr(input_data, 'shape'):
input_meta = f"{input_data.shape}"
input_text_content = "[IMAGE/BINARY]"
elif input_data is not None:
input_meta = "Binary/File"
input_text_content = "[FILE]"
# Scrittura Log (Append)
with scheduler.lock:
with open(LOG_FILE, "a", newline="", encoding="utf-8") as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerow([
now_italy.isoformat(),
session_id,
module_name,
action,
ip,
user_agent,
language,
input_meta,
input_text_content,
f"{execution_time:.4f}s"
])
print(f"📝 LOG SALVATO: {module_name}")
except Exception as e:
print(f"❌ ERRORE LOGGING: {e}")