import os import uuid import csv import pytz from datetime import datetime from pathlib import Path from huggingface_hub import CommitScheduler, hf_hub_download # --- CONFIGURAZIONE --- DATASET_REPO_ID = "NextGenTech/ngt-ai-platform-logs" LOG_DIR = Path("data/logs") LOG_FILE = LOG_DIR / "access_logs.csv" HF_TOKEN = os.environ.get("HF_TOKEN") ITALY_TZ = pytz.timezone("Europe/Rome") TIME = 5 # Minuti di intervallo tra ogni aggiornamento del dataset LOG_DIR.mkdir(parents=True, exist_ok=True) if HF_TOKEN: try: print("πŸ“₯ Controllo presenza log remoti...") hf_hub_download( repo_id=DATASET_REPO_ID, filename="logs/access_logs.csv", repo_type="dataset", token=HF_TOKEN, local_dir="data", local_dir_use_symlinks=False ) print("βœ… Storico log scaricato e ripristinato!") except Exception as e: print(f"⚠️ Nessun log remoto trovato (o primo avvio): {e}") print("πŸ†• Si partirΓ  con un nuovo file di log.") if not LOG_FILE.exists() or LOG_FILE.stat().st_size == 0: print("πŸ†• Creazione header file di log...") with open(LOG_FILE, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f, lineterminator='\n') writer.writerow([ "timestamp", "session_id", "module", "action", "ip_address", "user_agent", "language", "input_size", "input_text" ,"processing_time" ]) scheduler = CommitScheduler( repo_id=DATASET_REPO_ID, repo_type="dataset", folder_path=LOG_DIR, path_in_repo="logs", every=TIME, token=HF_TOKEN ) def log_interaction(request, module_name, action, input_data=None, execution_time=0.0): try: if request: headers = request.headers ip = headers.get("x-forwarded-for", request.client.host) user_agent = headers.get("user-agent", "Unknown") language = headers.get("accept-language", "Unknown").split(',')[0] else: ip, user_agent, language = "LOCAL", "Dev-Mode", "it" session_raw = f"{ip}{user_agent}{datetime.now().date()}" session_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, session_raw))[:8] input_meta = "0" input_text_content = "" now_italy = datetime.now(ITALY_TZ) if isinstance(input_data, str): input_meta = f"{len(input_data)} chars" clean_text = input_data.replace('\n', ' ').replace('\r', '') input_text_content = (clean_text[:1000] + '..') if len(clean_text) > 1000 else clean_text elif hasattr(input_data, 'shape'): input_meta = f"{input_data.shape}" input_text_content = "[IMAGE/BINARY]" elif input_data is not None: input_meta = "Binary/File" input_text_content = "[FILE]" # Scrittura Log (Append) with scheduler.lock: with open(LOG_FILE, "a", newline="", encoding="utf-8") as f: writer = csv.writer(f, lineterminator='\n') writer.writerow([ now_italy.isoformat(), session_id, module_name, action, ip, user_agent, language, input_meta, input_text_content, f"{execution_time:.4f}s" ]) print(f"πŸ“ LOG SALVATO: {module_name}") except Exception as e: print(f"❌ ERRORE LOGGING: {e}")