File size: 3,125 Bytes
24e6f5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from expense_tracker.utils import MongoDBClient

def forecast_income(user_id, days_to_forecast=30):
    """
    Predicts future income based on future dates using Linear Regression.
    Aggregates income by date before training.
    """
    client = MongoDBClient.get_client()
    from bson import ObjectId
    
    # Handle ObjectId vs String
    if not isinstance(user_id, ObjectId):
        try:
            user_id = ObjectId(user_id)
        except:
             return {"error": "Invalid User ID format"}

    user = client.users.find_one({'_id': user_id}, {'financial_data.incomes': 1})
    
    incomes = []
    if user and 'financial_data' in user and 'incomes' in user['financial_data']:
        incomes = user['financial_data']['incomes']
    
    if not incomes:
        return {"error": "Not enough data to forecast"}

    # specific handling for date conversion if needed, though usually they are datetime objects
    clean_data = []
    for inc in incomes:
        date_val = inc.get('date')
        if isinstance(date_val, str):
            try:
                date_val = datetime.strptime(date_val, '%Y-%m-%d')
            except:
                continue
        elif not isinstance(date_val, datetime):
            continue
            
        clean_data.append({
            'date': date_val,
            'amount': float(inc.get('amount', 0))
        })
        
    df = pd.DataFrame(clean_data)
    if df.empty:
        return {"error": "No valid income records found"}

    # Aggregate by date
    daily_income = df.groupby('date')['amount'].sum().reset_index()
    daily_income = daily_income.sort_values('date')
    
    # Needs at least a few points for regression
    if len(daily_income) < 2:
        return {
            "current_balance": daily_income['amount'].sum(),
            "forecast": [],
            "message": "Need more data points for regression"
        }

    # Prepare features
    # Transform date to ordinal for regression
    daily_income['date_ordinal'] = daily_income['date'].map(datetime.toordinal)
    
    X = daily_income[['date_ordinal']]
    y = daily_income['amount']
    
    X = daily_income[['date_ordinal']]
    y = daily_income['amount']
    
    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    model.fit(X, y)
    
    # Forecast
    last_date = daily_income['date'].max()
    future_dates = [last_date + timedelta(days=i) for i in range(1, days_to_forecast + 1)]
    future_ordinals = np.array([d.toordinal() for d in future_dates]).reshape(-1, 1)
    
    predictions = model.predict(future_ordinals)
    
    forecast_data = []
    for date, pred in zip(future_dates, predictions):
        forecast_data.append({
            'date': date.strftime('%Y-%m-%d'),
            'predicted_amount': max(0, round(pred, 2)) # logical floor at 0
        })
        
    return {
        "analysis": "Linear Regression Trend",
        "slope": round(model.coef_[0], 2), # positive means increasing income trend
        "forecast": forecast_data
    }