Spaces:

NextGenTech
/

ngt-ai-platform

Sleeping

App Files Files Community

ngt-ai-platform / modules /forecasting.py

GaetanoParente

aggiunto modulo forecasting e update sentiment con modello CNN

4bdde62 about 1 month ago

raw

history blame contribute delete

6.33 kB

	import pandas as pd
	import numpy as np
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.metrics import mean_absolute_error
	import plotly.graph_objects as go
	import io

	def generate_mock_export():
	"""
	Genera un CSV con 4 settimane di dati (28 giorni) per simulare una pianificazione.
	Include pattern orari (picchi 10:00/16:00), settimanali (Lunedì alto) e festivi.
	"""
	# Generiamo dati partendo da un Lunedì recente
	start_date = pd.Timestamp("2025-05-05")
	periods = 28 * 24 * 4
	dates = pd.date_range(start=start_date, periods=periods, freq='15min')
	df = pd.DataFrame({'Timestamp': dates})
	df['Hour'] = df['Timestamp'].dt.hour
	df['DayOfWeek'] = df['Timestamp'].dt.dayofweek
	base_volume = 10
	df['Volume'] = base_volume
	mask_work = (df['Hour'] >= 8) & (df['Hour'] <= 21)
	df.loc[mask_work, 'Volume'] += 50
	mask_peak1 = (df['Hour'] >= 10) & (df['Hour'] <= 11)
	mask_peak2 = (df['Hour'] >= 15) & (df['Hour'] <= 16)
	df.loc[mask_peak1, 'Volume'] += 40
	df.loc[mask_peak2, 'Volume'] += 30
	df.loc[df['Hour'] == 13, 'Volume'] -= 15
	df.loc[df['DayOfWeek'] <= 1, 'Volume'] *= 1.2
	df.loc[df['DayOfWeek'] >= 5, 'Volume'] *= 0.2
	df.loc[df['Hour'] < 7, 'Volume'] = np.random.randint(0, 5, size=sum(df['Hour'] < 7))
	noise = np.random.randint(-5, 10, size=len(df))
	df['Volume'] += noise
	df['Volume'] = df['Volume'].clip(lower=0).astype(int)

	csv_buffer = io.StringIO()
	df[['Timestamp', 'Volume']].to_csv(csv_buffer, index=False)
	csv_buffer.seek(0)
	return csv_buffer.getvalue()

	def predict_workload(file_obj):
	"""
	Logica Backtesting con Output PLOTLY (Interattivo).
	"""
	if file_obj is None: return None, "⚠️ Seleziona un file CSV dalla sidebar laterale."

	try:
	# Caricamento file cvs
	if hasattr(file_obj, 'name'):
	df = pd.read_csv(file_obj.name)
	else:
	df = pd.read_csv(file_obj)

	df['Timestamp'] = pd.to_datetime(df['Timestamp'])
	df = df.sort_values('Timestamp')

	# --- LOGICA SPLIT settimana n da settimane n-1 ---
	last_timestamp = df['Timestamp'].max()
	days_to_subtract = last_timestamp.dayofweek
	split_date = (last_timestamp - pd.Timedelta(days=days_to_subtract)).normalize()

	train_df = df[df['Timestamp'] < split_date].copy()
	test_df = df[df['Timestamp'] >= split_date].copy()

	if len(train_df) == 0 or len(test_df) == 0:
	return None, "⚠️ Dati insufficienti per il backtesting."

	# --- TRAINING ---
	for d in [train_df, test_df]:
	d['Hour'] = d['Timestamp'].dt.hour
	d['Minute'] = d['Timestamp'].dt.minute
	d['DayOfWeek'] = d['Timestamp'].dt.dayofweek

	X_train = train_df[['Hour', 'Minute', 'DayOfWeek']]
	y_train = train_df['Volume']

	model = RandomForestRegressor(n_estimators=100, random_state=42)
	model.fit(X_train, y_train)

	# --- PREDICTION ---
	X_test = test_df[['Hour', 'Minute', 'DayOfWeek']]
	test_df['Predicted_Volume'] = model.predict(X_test).astype(int)

	# --- METRICS ---
	total_actual = test_df['Volume'].sum()
	total_pred = test_df['Predicted_Volume'].sum()
	mae = mean_absolute_error(test_df['Volume'], test_df['Predicted_Volume'])
	diff_perc = ((total_pred - total_actual) / total_actual) * 100

	# ==========================================
	# CREAZIONE GRAFICO INTERATTIVO (PLOTLY)
	# ==========================================

	# Creiamo un oggetto Figure
	fig = go.Figure()

	# 1. Serie: Dati Reali
	fig.add_trace(go.Scatter(
	x=test_df['Timestamp'],
	y=test_df['Volume'],
	mode='lines',
	name='Reale (Consuntivo)',
	line=dict(color='#3b82f6', width=2),
	fill='tozeroy', # Riempie l'area sotto
	fillcolor='rgba(59, 130, 246, 0.2)' # Blu trasparente
	))

	# 2. Serie: Forecast AI (Previsione)
	fig.add_trace(go.Scatter(
	x=test_df['Timestamp'],
	y=test_df['Predicted_Volume'],
	mode='lines+markers', # Linea con pallini sui punti
	name='Forecast AI (Pianificato)',
	line=dict(color='#f97316', width=3, dash='solid'),
	marker=dict(size=4)
	))

	# 3. Layout e Stile
	fig.update_layout(
	title=f"<b>Analisi Comparativa:</b> Reale vs AI (Settimana corrente)",
	xaxis_title="Fascia Oraria (15min)",
	yaxis_title="Volume Chiamate",
	template="plotly_white", # Sfondo bianco pulito
	hovermode="x unified", # mostra entrambi i valori al passaggio del mouse
	legend=dict(
	orientation="h", # Legenda orizzontale in alto
	yanchor="bottom",
	y=1.02,
	xanchor="right",
	x=1
	),
	height=500, # Altezza fissa
	margin=dict(l=20, r=20, t=60, b=20)
	)

	# 4. Aggiunta Range Slider (Barra sotto per scorrere i giorni)
	fig.update_xaxes(
	rangeslider_visible=True,
	rangeselector=dict(
	buttons=list([
	dict(count=1, label="1gg", step="day", stepmode="backward"),
	dict(count=3, label="3gg", step="day", stepmode="backward"),
	dict(step="all", label="Settimana")
	])
	)
	)

	# Statistiche testuali
	msg = (
	f"✅ Analisi Interattiva Completata\n"
	f"📊 Risultati Backtesting:\n"
	f"• Volume Reale: {total_actual}\n"
	f"• Volume Previsto AI: {total_pred} ({diff_perc:+.1f}%)\n"
	f"• Scostamento Medio (MAE): {mae:.1f} chiamate/slot\n\n"
	f"💡 Usa lo slider in basso per zoomare su un giorno specifico."
	)

	# Gradio gr.Plot accetta l'oggetto fig direttamente
	return fig, msg

	except Exception as e:
	# In caso di errore restituiamo un grafico vuoto e il messaggio
	return go.Figure(), f"Errore Tecnico: {str(e)}"