Spaces:

LogicGoInfotechSpaces
/

Expense_Prediction

Running

App Files Files Community

Expense_Prediction / app.py

LogicGoInfotechSpaces

Update app.py

1b441ae verified 5 days ago

raw

history blame contribute delete

28.2 kB

	# app.py
	import calendar
	import math
	import os
	from collections import defaultdict
	from datetime import datetime, timezone
	from typing import Dict, List, Optional, Tuple
	from time import perf_counter
	from bson import ObjectId
	from dotenv import load_dotenv
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel, Field
	from pymongo import MongoClient
	from pymongo.collection import Collection

	load_dotenv()

	app = FastAPI(title="Expense Prediction API", version="1.0.0")

	# ---------- Configurable constants ----------
	MAX_HISTORY_MONTHS = int(os.getenv("MAX_HISTORY_MONTHS", "36")) # months to fetch for detection/tuning
	SEASONALITY_PERIOD = int(os.getenv("SEASONALITY_PERIOD", "12")) # monthly seasonality (12 months)
	SEASONALITY_AMPLITUDE_THRESHOLD = float(os.getenv("SEASONALITY_AMPLITUDE_THRESHOLD", "0.18"))
	# grid-search limits (keeps tuning light)
	ALPHA_GRID = [0.3, 0.5, 0.7]
	BETA_GRID = [0.1, 0.3, 0.5]
	GAMMA_GRID = [0.1, 0.3, 0.5]
	MAX_GRID_SEARCH_COMBINATIONS = 30 # safety cap
	# ------------------------------------------------

	class MonthlyExpense(BaseModel):
	year: int
	month: int
	total: float = Field(..., description="Total expenses recorded for the month")

	class CategoryPrediction(BaseModel):
	headCategoryId: str
	title: str
	history: List[MonthlyExpense]
	predictionMonth: MonthlyExpense


	class PredictionResponse(BaseModel):
	userId: str
	categories: List[CategoryPrediction]

	class APIResponse(BaseModel):
	status: str
	message: str
	data: Optional[PredictionResponse] = None

	class MongoConnection:
	def __init__(self) -> None:
	mongo_uri = os.getenv("MONGO_URI")
	if not mongo_uri:
	raise RuntimeError("MONGO_URI is not configured in the environment")

	self._client = MongoClient(mongo_uri, tz_aware=True)
	self._database = self._client.get_default_database()
	self.transactions: Collection = self._database["transactions"]
	self.headcategories: Collection = self._database["headcategories"]
	self.api_logs: Collection = self._database["api_logs"]


	mongo = MongoConnection()

	# ----------------- Date helpers -----------------
	def _first_day_of_month(dt: datetime) -> datetime:
	return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)


	def _shift_months(dt: datetime, months: int) -> datetime:
	month_index = dt.month - 1 + months
	year = dt.year + month_index // 12
	month = month_index % 12 + 1
	last_day = calendar.monthrange(year, month)[1]
	day = min(dt.day, last_day)
	return dt.replace(year=year, month=month, day=day)


	def month_to_index(year: int, month: int) -> int:
	return year * 12 + (month - 1)


	def index_to_month(idx: int) -> Tuple[int, int]:
	year = idx // 12
	month = (idx % 12) + 1
	return year, month

	def log_api_event(
	name: str,
	status: str,
	response_time: float,
	user_id: Optional[str] = None,
	error_message: Optional[str] = None,
	):
	payload = {
	"name": name,
	"status": status,
	"response_time": round(response_time, 3),
	"user_id": user_id or "anonymous",
	"date": datetime.now(timezone.utc),
	}

	if error_message:
	payload["error_message"] = error_message

	try:
	mongo.api_logs.insert_one(payload)
	except Exception:
	# never crash API because of logging
	pass

	# ------------------------------------------------

	# ----------------- Time series utilities -----------------
	def build_continuous_series(history: List[MonthlyExpense]) -> Tuple[List[float], List[Tuple[int, int]]]:
	"""
	Given sparse monthly history items (year, month, total), build a continuous series
	covering from earliest to latest month in history. Missing months are represented by None.
	Returns (values_list_with_none, list_of_(year,month)_corresponding).
	"""
	if not history:
	return [], []

	# sort history
	history_sorted = sorted(history, key=lambda h: (h.year, h.month))
	start_idx = month_to_index(history_sorted[0].year, history_sorted[0].month)
	end_idx = month_to_index(history_sorted[-1].year, history_sorted[-1].month)
	length = end_idx - start_idx + 1

	idx_to_val = {}
	for h in history_sorted:
	idx = month_to_index(h.year, h.month)
	idx_to_val[idx] = h.total

	series = []
	months = []
	for i in range(start_idx, end_idx + 1):
	months.append(index_to_month(i))
	series.append(idx_to_val.get(i, None))

	return series, months


	def impute_missing(series: List[Optional[float]]) -> List[float]:
	"""
	Fill missing values (None) by linear interpolation. If leading/trailing Nones remain,
	forward/backfill with nearest value or 0 if no data.
	"""
	n = len(series)
	if n == 0:
	return []

	arr = [None if v is None else float(v) for v in series]

	# collect indices of non-None
	known = [i for i, v in enumerate(arr) if v is not None]

	if not known:
	# all missing -> return zeros
	return [0.0] * n

	# linear interpolation between known points
	for i in range(len(known) - 1):
	a = known[i]
	b = known[i + 1]
	va = arr[a]
	vb = arr[b]
	step = (vb - va) / (b - a)
	for j in range(a + 1, b):
	arr[j] = va + step * (j - a)

	# fill leading
	first = known[0]
	for i in range(0, first):
	arr[i] = arr[first]

	# fill trailing
	last = known[-1]
	for i in range(last + 1, n):
	arr[i] = arr[last]

	return [float(x) for x in arr]


	def seasonal_strength(series: List[float], period: int = SEASONALITY_PERIOD) -> float:
	"""
	Estimate seasonality strength for monthly data.
	Returns amplitude_ratio = (max_month_mean - min_month_mean) / overall_mean
	Higher value => stronger seasonality.
	Requires at least 2 * period data points for a reliable estimate.
	"""
	n = len(series)
	if n < 2 * period:
	return 0.0

	# compute month-of-year means
	month_buckets = [[] for _ in range(period)]
	for idx, val in enumerate(series):
	month = idx % period
	month_buckets[month].append(val)

	month_means = [ (sum(b)/len(b)) if b else 0.0 for b in month_buckets ]
	overall_mean = sum(series) / len(series) if series else 0.0
	if overall_mean == 0:
	return 0.0
	amplitude = max(month_means) - min(month_means)
	return amplitude / overall_mean


	# ----------------- Forecasting algorithms -----------------
	def holt_double_forecast(series: List[float], alpha: float, beta: float, n_forecast: int = 1) -> List[float]:
	"""
	Holt's linear method (double exponential smoothing).
	Returns list of length n_forecast (forecast ahead).
	"""
	n = len(series)
	if n == 0:
	return [0.0] * n_forecast
	if n == 1:
	return [series[-1]] * n_forecast

	level = series[0]
	trend = series[1] - series[0]

	for t in range(1, n):
	value = series[t]
	prev_level = level
	level = alpha * value + (1 - alpha) * (level + trend)
	trend = beta * (level - prev_level) + (1 - beta) * trend

	# forecast h steps ahead
	forecasts = [level + (i + 1) * trend for i in range(n_forecast)]
	return [max(0.0, f) for f in forecasts]


	def holt_winters_additive(series: List[float], season_length: int, alpha: float, beta: float, gamma: float, n_forecast: int = 1) -> List[float]:
	"""
	Additive Holt-Winters seasonal method.
	series: list of floats (no missing) where season_length is known (e.g., 12)
	"""
	n = len(series)
	if n == 0:
	return [0.0] * n_forecast
	if n < season_length * 2:
	# not enough data to initialize seasonals reliably -> fallback to holt_double
	return holt_double_forecast(series, alpha, beta, n_forecast)

	# initialize level, trend, seasonals
	seasonals = _initial_seasonal_components(series, season_length)
	level = sum(series[:season_length]) / season_length
	trend = (sum(series[season_length:2season_length]) - sum(series[:season_length])) / (season_length season_length)

	result = []
	for i in range(n + n_forecast):
	if i < n:
	val = series[i]
	last_level = level
	level = alpha * (val - seasonals[i % season_length]) + (1 - alpha) * (level + trend)
	trend = beta * (level - last_level) + (1 - beta) * trend
	seasonals[i % season_length] = gamma * (val - level) + (1 - gamma) * seasonals[i % season_length]
	# in-sample prediction (not used)
	else:
	# forecast
	m = i - n + 1
	forecast = level + m * trend + seasonals[i % season_length]
	result.append(max(0.0, forecast))

	# ensure length matches n_forecast
	return result[:n_forecast]


	def _initial_seasonal_components(series: List[float], season_length: int) -> List[float]:
	"""
	Initialize seasonality components by averaging.
	"""
	seasonals = [0.0] * season_length
	n_seasons = len(series) // season_length
	if n_seasons == 0:
	return seasonals
	season_averages = []
	for j in range(n_seasons):
	start = j * season_length
	season_avg = sum(series[start:start + season_length]) / season_length
	season_averages.append(season_avg)
	for i in range(season_length):
	s = 0.0
	for j in range(n_seasons):
	s += series[j * season_length + i] - season_averages[j]
	seasonals[i] = s / n_seasons
	return seasonals

	# ----------------- Dynamic WMA -----------------
	def dynamic_wma(series: List[float], max_len: int = 6) -> float:
	"""
	Compute a dynamic WMA using up to max_len most recent months.
	The weights adapt based on volatility: higher volatility -> smoother (older months get more weight).
	"""
	n = len(series)
	if n == 0:
	return 0.0
	take = min(n, max_len)
	recent = series[-take:]
	# compute month-to-month relative changes
	if len(recent) >= 2:
	changes = [abs(recent[i] - recent[i - 1]) for i in range(1, len(recent))]
	vol = sum(changes) / len(changes) if changes else 0.0
	else:
	vol = 0.0

	# base weights favor recent months
	base_weights = [ (i + 1) for i in range(take) ] # 1..take
	base_weights = list(reversed(base_weights)) # newest highest
	total = sum(base_weights)
	base_weights = [w/total for w in base_weights]

	# adaptation factor: more vol -> flatten weights
	# vol_ratio normalized roughly w.r.t average magnitude
	avg = sum(recent) / len(recent) if recent else 1.0
	vol_ratio = (vol / avg) if avg else 0.0
	# clamp vol_ratio
	vol_ratio = max(0.0, min(vol_ratio, 1.0))

	# blend between base_weights and equal weights
	equal_weights = [1.0 / take] * take
	blend = min(0.7, vol_ratio) # limit blend to avoid extreme flattening
	weights = [(1 - blend) * bw + blend * ew for bw, ew in zip(base_weights, equal_weights)]
	# compute prediction
	prediction = sum(w * v for w, v in zip(weights, reversed(recent))) # reversed so weights map newest->oldest
	return max(0.0, prediction)

	# ----------------- Parameter tuning (lightweight) -----------------
	def walk_forward_cv_mse(series: List[float], forecast_func, params: dict, min_train_size: int = 6) -> float:
	"""
	Perform walk-forward validation computing MSE. forecast_func must accept (train_series, params) and return a single-step forecast.
	"""
	n = len(series)
	if n < min_train_size + 1:
	# not enough data to validate -> return large error so tuner avoids complex models
	return float("inf")

	errors = []
	# iterate rolling window
	for split in range(min_train_size, n):
	train = series[:split]
	actual = series[split]
	try:
	pred = forecast_func(train, params)
	except Exception:
	return float("inf")
	if pred is None:
	return float("inf")
	errors.append((pred - actual) ** 2)
	return sum(errors) / len(errors) if errors else float("inf")


	def forecast_wrapper_holt(train: List[float], params: dict) -> float:
	alpha = params.get("alpha", 0.5)
	beta = params.get("beta", 0.3)
	return holt_double_forecast(train, alpha, beta, n_forecast=1)[0]


	def forecast_wrapper_hw(train: List[float], params: dict) -> float:
	alpha = params.get("alpha", 0.5)
	beta = params.get("beta", 0.3)
	gamma = params.get("gamma", 0.2)
	season_length = params.get("season_length", SEASONALITY_PERIOD)
	return holt_winters_additive(train, season_length, alpha, beta, gamma, n_forecast=1)[0]


	def tune_parameters(series: List[float], seasonal: bool, season_length: int = SEASONALITY_PERIOD) -> dict:
	"""
	Lightweight grid search for (alpha, beta, gamma) returning best params.
	Uses walk-forward CV to score parameter combinations.
	"""
	best = None
	best_score = float("inf")
	combos_tested = 0

	if seasonal:
	grid = []
	for a in ALPHA_GRID:
	for b in BETA_GRID:
	for g in GAMMA_GRID:
	grid.append({"alpha": a, "beta": b, "gamma": g, "season_length": season_length})
	else:
	grid = [{"alpha": a, "beta": b} for a in ALPHA_GRID for b in BETA_GRID]

	# cap combos
	if len(grid) > MAX_GRID_SEARCH_COMBINATIONS:
	grid = grid[:MAX_GRID_SEARCH_COMBINATIONS]

	for params in grid:
	combos_tested += 1
	if seasonal:
	score = walk_forward_cv_mse(series, forecast_wrapper_hw, params, min_train_size=max(6, season_length))
	else:
	score = walk_forward_cv_mse(series, forecast_wrapper_holt, params, min_train_size=6)
	if score < best_score:
	best_score = score
	best = params

	if best is None:
	# fallback default
	if seasonal:
	return {"alpha": 0.5, "beta": 0.3, "gamma": 0.2, "season_length": season_length}
	else:
	return {"alpha": 0.5, "beta": 0.3}

	return best

	# ----------------- Top-level predictor combining everything -----------------
	def _predict_next_month(history: List[MonthlyExpense]) -> float:
	"""
	Comprehensive predictor:
	- builds continuous series and imputes missing months
	- auto-detects seasonality
	- tunes parameters (lightweight) per series
	- uses Holt-Winters if seasonal, else Holt
	- fallback to dynamic WMA for very short/noisy series
	"""
	if not history:
	return 0.0

	# limit history length to MAX_HISTORY_MONTHS (use most recent months)
	history_sorted = sorted(history, key=lambda h: (h.year, h.month))
	if len(history_sorted) > MAX_HISTORY_MONTHS:
	history_sorted = history_sorted[-MAX_HISTORY_MONTHS:]

	# Build continuous series (may contain Nones for missing months)
	series_with_none, months = build_continuous_series(history_sorted)
	series = impute_missing(series_with_none)

	# if after imputation all zeros, return 0
	if all(v == 0.0 for v in series):
	return 0.0

	n = len(series)

	# If very short history (<=2) use simple rules / dynamic WMA
	if n <= 2:
	return round(dynamic_wma(series, max_len=2), 2)

	# Seasonality detection: needs at least 2 * season_length samples for reliability
	season_strength = seasonal_strength(series, period=SEASONALITY_PERIOD)
	is_seasonal = season_strength >= SEASONALITY_AMPLITUDE_THRESHOLD and n >= 2 * SEASONALITY_PERIOD

	# If not much data but still some seasonality signal present and we have at least season_length points,
	# we can still attempt seasonal HW but with care.
	season_length_used = SEASONALITY_PERIOD if is_seasonal else None

	# Tuning: per-series personalized coefficients
	try:
	tuned = tune_parameters(series, seasonal=is_seasonal, season_length=season_length_used or SEASONALITY_PERIOD)
	except Exception:
	tuned = None

	# If tuning failed or not enough data, fallback defaults
	if tuned is None:
	if is_seasonal:
	tuned = {"alpha": 0.5, "beta": 0.3, "gamma": 0.2, "season_length": SEASONALITY_PERIOD}
	else:
	tuned = {"alpha": 0.5, "beta": 0.3}

	# Edge case: if the series is extremely volatile compared to mean, prefer dynamic WMA (more robust)
	mean_val = sum(series) / len(series) if series else 0.0
	diffs = [abs(series[i] - series[i - 1]) for i in range(1, len(series))] if len(series) >= 2 else [0.0]
	avg_diff = sum(diffs) / len(diffs) if diffs else 0.0
	volatility_ratio = (avg_diff / mean_val) if mean_val else 0.0

	if volatility_ratio > 1.0 and n < 6:
	# extremely volatile and short history -> WMA is safer
	pred = dynamic_wma(series, max_len=min(6, n))
	return round(pred, 2)

	# Choose model
	if is_seasonal:
	alpha = tuned.get("alpha", 0.5)
	beta = tuned.get("beta", 0.3)
	gamma = tuned.get("gamma", 0.2)
	season_length = tuned.get("season_length", SEASONALITY_PERIOD)
	pred = holt_winters_additive(series, season_length, alpha, beta, gamma, n_forecast=1)[0]
	else:
	alpha = tuned.get("alpha", 0.5)
	beta = tuned.get("beta", 0.3)
	pred = holt_double_forecast(series, alpha, beta, n_forecast=1)[0]

	# final safety clamps
	if math.isnan(pred) or pred is None or pred < 0:
	# fallback to recent avg
	pred = sum(series[-3:]) / min(3, len(series))

	return round(float(pred), 2)


	# ----------------- API endpoint -----------------
	@app.get("/users/{user_id}/expense-prediction",response_model=APIResponse,)
	def predict_expense(user_id: str):
	start_time = perf_counter()

	try:
	user_object_id = ObjectId(user_id)
	except Exception:
	log_api_event(
	name="Expense Prediction",
	status="failed",
	response_time=0,
	user_id=user_id,
	error_message="Invalid user id",
	)
	raise HTTPException(status_code=400, detail="Invalid user id")

	try:
	now = datetime.now(timezone.utc)
	start_period = _shift_months(_first_day_of_month(now), -MAX_HISTORY_MONTHS + 1)
	prediction_month = _shift_months(_first_day_of_month(now), 1)

	pipeline = [
	{
	"$match": {
	"user": user_object_id,
	"type": "EXPENSE",
	"headCategory": {"$ne": None},
	"date": {"$gte": start_period},
	}
	},
	{
	"$project": {
	"amount": 1,
	"headCategory": 1,
	"year": {"$year": "$date"},
	"month": {"$month": "$date"},
	}
	},
	{
	"$group": {
	"_id": {
	"headCategory": "$headCategory",
	"year": "$year",
	"month": "$month",
	},
	"total": {"$sum": "$amount"},
	}
	},
	{
	"$lookup": {
	"from": "headcategories",
	"localField": "_id.headCategory",
	"foreignField": "_id",
	"as": "headCategoryDoc",
	}
	},
	{"$unwind": "$headCategoryDoc"},
	{"$sort": {"_id.headCategory": 1, "_id.year": 1, "_id.month": 1}},
	]

	results = list(mongo.transactions.aggregate(pipeline))

	grouped: Dict[ObjectId, Dict[str, List[MonthlyExpense]]] = defaultdict(lambda: {"history": []})

	for item in results:
	head_category_id: ObjectId = item["_id"]["headCategory"]
	category_record = grouped[head_category_id]
	category_record["title"] = item["headCategoryDoc"].get("title", "Unknown")
	category_record["history"].append(
	MonthlyExpense(
	year=item["_id"]["year"],
	month=item["_id"]["month"],
	total=float(item["total"]),
	)
	)

	categories: List[CategoryPrediction] = []
	for head_category_id, record in grouped.items():
	history = sorted(record["history"], key=lambda doc: (doc.year, doc.month))
	predicted_total = _predict_next_month(history)

	categories.append(
	CategoryPrediction(
	headCategoryId=str(head_category_id),
	title=record.get("title", "Unknown"),
	history=history,
	predictionMonth=MonthlyExpense(
	year=prediction_month.year,
	month=prediction_month.month,
	total=predicted_total,
	),
	)
	)

	response_data = PredictionResponse(userId=user_id, categories=categories)

	log_api_event(
	name="Expense Prediction",
	status="success",
	response_time=perf_counter() - start_time,
	user_id=user_id,
	)

	return APIResponse(
	status="success",
	message="Expense prediction generated successfully",
	data=response_data,
	)

	except Exception as exc:
	log_api_event(
	name="Expense Prediction",
	status="failed",
	response_time=perf_counter() - start_time,
	user_id=user_id,
	error_message=str(exc),
	)
	raise HTTPException(status_code=500, detail="Internal server error")


	@app.get("/health")
	def health():
	try:
	mongo._client.admin.command("ping")
	return {
	"status": "ok",
	"message": "Service is healthy",
	"timestamp": datetime.now(timezone.utc),
	}
	except Exception as exc:
	raise HTTPException(
	status_code=503,
	detail={
	"status": "down",
	"message": "Database connectivity failed",
	"error": str(exc),
	},
	)












	# import calendar
	# import os
	# from collections import defaultdict
	# from datetime import datetime, timezone
	# from typing import Dict, List

	# from bson import ObjectId
	# from dotenv import load_dotenv
	# from fastapi import FastAPI, HTTPException
	# from pydantic import BaseModel, Field
	# from pymongo import MongoClient
	# from pymongo.collection import Collection

	# load_dotenv()

	# app = FastAPI(title="Expense Prediction API", version="1.0.0")


	# class MonthlyExpense(BaseModel):
	# year: int
	# month: int
	# total: float = Field(..., description="Total expenses recorded for the month")


	# class CategoryPrediction(BaseModel):
	# headCategoryId: str
	# title: str
	# history: List[MonthlyExpense]
	# predictionMonth: MonthlyExpense


	# class PredictionResponse(BaseModel):
	# userId: str
	# categories: List[CategoryPrediction]


	# class MongoConnection:
	# def __init__(self) -> None:
	# mongo_uri = os.getenv("MONGO_URI")
	# if not mongo_uri:
	# raise RuntimeError("MONGO_URI is not configured in the environment")

	# self._client = MongoClient(mongo_uri, tz_aware=True)
	# self._database = self._client.get_default_database()
	# self.transactions: Collection = self._database["transactions"]
	# self.headcategories: Collection = self._database["headcategories"]


	# mongo = MongoConnection()


	# def _first_day_of_month(dt: datetime) -> datetime:
	# return dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0)


	# def _shift_months(dt: datetime, months: int) -> datetime:
	# month_index = dt.month - 1 + months
	# year = dt.year + month_index // 12
	# month = month_index % 12 + 1
	# last_day = calendar.monthrange(year, month)[1]
	# day = min(dt.day, last_day)
	# return dt.replace(year=year, month=month, day=day)


	# # -----------------------------------------------------------
	# # NEW: Weighted Moving Average-based prediction function
	# # -----------------------------------------------------------

	# def _predict_next_month(history: List[MonthlyExpense]) -> float:
	# """Predict next month's expense using Weighted Moving Average (WMA)."""
	# totals = [h.total for h in history]

	# # Only one month → Just repeat last month
	# if len(totals) == 1:
	# return round(totals[-1], 2)

	# # Two months → Slight smoothing
	# if len(totals) == 2:
	# last, prev = totals[-1], totals[-2]
	# prediction = last * 0.7 + prev * 0.3
	# return round(prediction, 2)

	# # Three or more months → Use 3-month WMA (0.5, 0.3, 0.2)
	# last3 = totals[-3:]
	# weights = [0.2, 0.3, 0.5] # oldest → newest
	# prediction = sum(v * w for v, w in zip(last3, weights))

	# return round(prediction, 2)


	# # -----------------------------------------------------------
	# # EXPENSE PREDICTION ENDPOINT
	# # -----------------------------------------------------------

	# @app.get("/users/{user_id}/expense-prediction", response_model=PredictionResponse)
	# def predict_expense(user_id: str) -> PredictionResponse:
	# try:
	# user_object_id = ObjectId(user_id)
	# except Exception as exc:
	# raise HTTPException(status_code=400, detail="Invalid user id") from exc

	# now = datetime.now(timezone.utc)
	# start_period = _shift_months(_first_day_of_month(now), -2)
	# prediction_month = _shift_months(_first_day_of_month(now), 1)

	# pipeline = [
	# {
	# "$match": {
	# "user": user_object_id,
	# "type": "EXPENSE",
	# "headCategory": {"$ne": None},
	# "date": {"$gte": start_period},
	# }
	# },
	# {
	# "$project": {
	# "amount": 1,
	# "headCategory": 1,
	# "year": {"$year": "$date"},
	# "month": {"$month": "$date"},
	# }
	# },
	# {
	# "$group": {
	# "_id": {
	# "headCategory": "$headCategory",
	# "year": "$year",
	# "month": "$month",
	# },
	# "total": {"$sum": "$amount"},
	# }
	# },
	# {
	# "$lookup": {
	# "from": "headcategories",
	# "localField": "_id.headCategory",
	# "foreignField": "_id",
	# "as": "headCategoryDoc",
	# }
	# },
	# {"$unwind": "$headCategoryDoc"},
	# {"$sort": {"_id.headCategory": 1, "_id.year": 1, "_id.month": 1}},
	# ]

	# results = list(mongo.transactions.aggregate(pipeline))

	# grouped: Dict[ObjectId, Dict[str, List[MonthlyExpense]]] = defaultdict(
	# lambda: {"history": []}
	# )

	# for item in results:
	# head_category_id: ObjectId = item["_id"]["headCategory"]
	# category_record = grouped[head_category_id]
	# category_record["title"] = item["headCategoryDoc"].get("title", "Unknown")
	# category_record["history"].append(
	# MonthlyExpense(
	# year=item["_id"]["year"],
	# month=item["_id"]["month"],
	# total=float(item["total"]),
	# )
	# )

	# categories: List[CategoryPrediction] = []
	# for head_category_id, record in grouped.items():
	# history = sorted(record["history"], key=lambda doc: (doc.year, doc.month))
	# predicted_total = _predict_next_month(history)

	# categories.append(
	# CategoryPrediction(
	# headCategoryId=str(head_category_id),
	# title=record.get("title", "Unknown"),
	# history=history,
	# predictionMonth=MonthlyExpense(
	# year=prediction_month.year,
	# month=prediction_month.month,
	# total=predicted_total,
	# ),
	# )
	# )

	# return PredictionResponse(userId=user_id, categories=categories)