FinMK / backend /analytics /services.py
Kumar
Refactor: Exclude PDF and CSV files from Git to fix HF push error
24e6f5b
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from expense_tracker.utils import MongoDBClient
def forecast_income(user_id, days_to_forecast=30):
"""
Predicts future income based on future dates using Linear Regression.
Aggregates income by date before training.
"""
client = MongoDBClient.get_client()
from bson import ObjectId
# Handle ObjectId vs String
if not isinstance(user_id, ObjectId):
try:
user_id = ObjectId(user_id)
except:
return {"error": "Invalid User ID format"}
user = client.users.find_one({'_id': user_id}, {'financial_data.incomes': 1})
incomes = []
if user and 'financial_data' in user and 'incomes' in user['financial_data']:
incomes = user['financial_data']['incomes']
if not incomes:
return {"error": "Not enough data to forecast"}
# specific handling for date conversion if needed, though usually they are datetime objects
clean_data = []
for inc in incomes:
date_val = inc.get('date')
if isinstance(date_val, str):
try:
date_val = datetime.strptime(date_val, '%Y-%m-%d')
except:
continue
elif not isinstance(date_val, datetime):
continue
clean_data.append({
'date': date_val,
'amount': float(inc.get('amount', 0))
})
df = pd.DataFrame(clean_data)
if df.empty:
return {"error": "No valid income records found"}
# Aggregate by date
daily_income = df.groupby('date')['amount'].sum().reset_index()
daily_income = daily_income.sort_values('date')
# Needs at least a few points for regression
if len(daily_income) < 2:
return {
"current_balance": daily_income['amount'].sum(),
"forecast": [],
"message": "Need more data points for regression"
}
# Prepare features
# Transform date to ordinal for regression
daily_income['date_ordinal'] = daily_income['date'].map(datetime.toordinal)
X = daily_income[['date_ordinal']]
y = daily_income['amount']
X = daily_income[['date_ordinal']]
y = daily_income['amount']
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)
# Forecast
last_date = daily_income['date'].max()
future_dates = [last_date + timedelta(days=i) for i in range(1, days_to_forecast + 1)]
future_ordinals = np.array([d.toordinal() for d in future_dates]).reshape(-1, 1)
predictions = model.predict(future_ordinals)
forecast_data = []
for date, pred in zip(future_dates, predictions):
forecast_data.append({
'date': date.strftime('%Y-%m-%d'),
'predicted_amount': max(0, round(pred, 2)) # logical floor at 0
})
return {
"analysis": "Linear Regression Trend",
"slope": round(model.coef_[0], 2), # positive means increasing income trend
"forecast": forecast_data
}