LogicGoInfotechSpaces commited on
Commit
b71b4c6
·
1 Parent(s): 46c4337

Use category ID from budgets and lookup names from headCategories and categories collections

Browse files
.history/app/smart_recommendation_20251225155108.py ADDED
@@ -0,0 +1,580 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) If there are no budgets, fall back to expenses history
44
+ if not category_data:
45
+ end_date = datetime(year, month, 1) - timedelta(days=1)
46
+ start_date = end_date - timedelta(days=180) # ~6 months
47
+
48
+ expenses = list(
49
+ self.db.expenses.find(
50
+ {
51
+ "user_id": user_id,
52
+ "date": {"$gte": start_date, "$lte": end_date},
53
+ "type": "expense",
54
+ }
55
+ )
56
+ )
57
+
58
+ if not expenses:
59
+ return []
60
+
61
+ # Group expenses by category and calculate monthly averages
62
+ category_data = self._calculate_category_statistics(
63
+ expenses, start_date, end_date
64
+ )
65
+
66
+ recommendations: List[BudgetRecommendation] = []
67
+
68
+ for category, data in category_data.items():
69
+ avg_expense = data["average_monthly"]
70
+ confidence = self._calculate_confidence(data)
71
+
72
+ # Always try OpenAI first (primary source of recommendation)
73
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
74
+ if ai_result and ai_result.get("recommended_budget"):
75
+ recommended_budget = ai_result.get("recommended_budget")
76
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
77
+ action = ai_result.get("action")
78
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
79
+ else:
80
+ # Fallback to rule-based recommendation if OpenAI fails
81
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
82
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
83
+ action = None
84
+ if not ai_result:
85
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
86
+ else:
87
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
+
89
+ recommendations.append(BudgetRecommendation(
90
+ category=category,
91
+ average_expense=round(avg_expense, 2),
92
+ recommended_budget=round(recommended_budget or 0, 2),
93
+ reason=reason,
94
+ confidence=confidence,
95
+ action=action
96
+ ))
97
+
98
+ # Sort by average expense (highest first)
99
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
100
+
101
+ return recommendations
102
+
103
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
104
+ """Calculate statistics for each category"""
105
+ category_data = defaultdict(lambda: {
106
+ "total": 0,
107
+ "count": 0,
108
+ "months": set(),
109
+ "monthly_totals": defaultdict(float)
110
+ })
111
+
112
+ for expense in expenses:
113
+ category = expense.get("category", "Uncategorized")
114
+ amount = expense.get("amount", 0)
115
+ date = expense.get("date")
116
+
117
+ # Handle date conversion - skip if date is None or invalid
118
+ if date is None:
119
+ continue
120
+
121
+ if isinstance(date, str):
122
+ try:
123
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
124
+ except (ValueError, AttributeError):
125
+ continue
126
+ elif not isinstance(date, datetime):
127
+ # If date is not a string or datetime, skip this expense
128
+ continue
129
+
130
+ category_data[category]["total"] += amount
131
+ category_data[category]["count"] += 1
132
+
133
+ # Track monthly totals
134
+ month_key = (date.year, date.month)
135
+ category_data[category]["months"].add(month_key)
136
+ category_data[category]["monthly_totals"][month_key] += amount
137
+
138
+ # Calculate averages
139
+ result = {}
140
+ for category, data in category_data.items():
141
+ num_months = len(data["months"]) or 1
142
+ avg_monthly = data["total"] / num_months
143
+
144
+ # Calculate standard deviation for variability
145
+ monthly_values = list(data["monthly_totals"].values())
146
+ if len(monthly_values) > 1:
147
+ mean = sum(monthly_values) / len(monthly_values)
148
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
149
+ std_dev = math.sqrt(variance)
150
+ else:
151
+ std_dev = 0
152
+
153
+ result[category] = {
154
+ "average_monthly": avg_monthly,
155
+ "total": data["total"],
156
+ "count": data["count"],
157
+ "months_analyzed": num_months,
158
+ "std_dev": std_dev,
159
+ "monthly_values": monthly_values
160
+ }
161
+
162
+ return result
163
+
164
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
165
+ """
166
+ Calculate recommended budget based on average expense.
167
+
168
+ Strategy:
169
+ - Base: Average monthly expense
170
+ - Add 5% buffer for variability
171
+ - Round to nearest 100 for cleaner numbers
172
+ """
173
+ # Add 5% buffer to handle variability
174
+ buffer = avg_expense * 0.05
175
+
176
+ # If there's high variability (std_dev > 20% of mean), add more buffer
177
+ if data["std_dev"] > 0:
178
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
179
+ if coefficient_of_variation > 0.2:
180
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
181
+
182
+ recommended = avg_expense + buffer
183
+
184
+ # Round to nearest 100 for cleaner budget numbers
185
+ recommended = round(recommended / 100) * 100
186
+
187
+ # Ensure minimum of 100 if there was any expense
188
+ if recommended < 100 and avg_expense > 0:
189
+ recommended = 100
190
+
191
+ return recommended
192
+
193
+ def _calculate_confidence(self, data: Dict) -> float:
194
+ """
195
+ Calculate confidence score (0-1) based on data quality.
196
+
197
+ Factors:
198
+ - Number of months analyzed (more = higher confidence)
199
+ - Number of transactions (more = higher confidence)
200
+ - Consistency of spending (lower std_dev = higher confidence)
201
+ """
202
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
203
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
204
+
205
+ # Consistency score (inverse of coefficient of variation)
206
+ if data["average_monthly"] > 0:
207
+ cv = data["std_dev"] / data["average_monthly"]
208
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
209
+ else:
210
+ consistency_score = 0.5
211
+
212
+ # Weighted average
213
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
214
+
215
+ return round(confidence, 2)
216
+
217
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
218
+ """Generate human-readable reason for the recommendation"""
219
+ # Format amounts with currency symbol
220
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
221
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
222
+
223
+ if recommended_budget > avg_expense:
224
+ buffer = recommended_budget - avg_expense
225
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
226
+ return (
227
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
228
+ f"We suggest setting your budget to {budget_formatted} for next month "
229
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
230
+ )
231
+ else:
232
+ return (
233
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
234
+ f"We recommend a budget of {budget_formatted} for next month."
235
+ )
236
+
237
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
238
+ """Get average expenses by category for the past N months"""
239
+ end_date = datetime.now()
240
+ start_date = end_date - timedelta(days=months * 30)
241
+
242
+ expenses = list(self.db.expenses.find({
243
+ "user_id": user_id,
244
+ "date": {"$gte": start_date, "$lte": end_date},
245
+ "type": "expense"
246
+ }))
247
+
248
+ if not expenses:
249
+ return []
250
+
251
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
252
+
253
+ result = []
254
+ for category, data in category_data.items():
255
+ result.append(CategoryExpense(
256
+ category=category,
257
+ average_monthly_expense=round(data["average_monthly"], 2),
258
+ total_expenses=data["count"],
259
+ months_analyzed=data["months_analyzed"]
260
+ ))
261
+
262
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
263
+ return result
264
+
265
+ def _get_category_name(self, category_id) -> str:
266
+ """Look up category name from categories collection"""
267
+ if not category_id:
268
+ return "Uncategorized"
269
+
270
+ try:
271
+ # Try to find category in categories collection
272
+ if isinstance(category_id, ObjectId):
273
+ category_doc = self.db.categories.find_one({"_id": category_id})
274
+ else:
275
+ try:
276
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
277
+ except (ValueError, TypeError):
278
+ category_doc = self.db.categories.find_one({"_id": category_id})
279
+
280
+ if category_doc:
281
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
282
+ except Exception as e:
283
+ print(f"Error looking up category name for {category_id}: {e}")
284
+ pass
285
+
286
+ return str(category_id) if category_id else "Uncategorized"
287
+
288
+ def _get_category_stats_from_budgets(
289
+ self, user_id: str, month: int, year: int
290
+ ) -> Dict:
291
+ """
292
+ Build category stats from existing budgets for this user.
293
+
294
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
295
+ as a spending category and derive an \"average\" from its amounts.
296
+ Also extracts categories from headCategories array.
297
+ """
298
+ budgets = []
299
+
300
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
301
+
302
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
303
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
304
+ try:
305
+ query_objid = {"createdBy": ObjectId(user_id)}
306
+ budgets_objid = list(self.db.budgets.find(query_objid))
307
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
308
+ if budgets_objid:
309
+ budgets.extend(budgets_objid)
310
+ except (ValueError, TypeError) as e:
311
+ print(f"Pattern 1 failed: {e}")
312
+ pass
313
+
314
+ # Pattern 2: Try with string user_id - no status filter
315
+ try:
316
+ query_str = {"createdBy": user_id}
317
+ budgets_str = list(self.db.budgets.find(query_str))
318
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
319
+ if budgets_str:
320
+ budgets.extend(budgets_str)
321
+ except Exception as e:
322
+ print(f"Pattern 2 failed: {e}")
323
+ pass
324
+
325
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
326
+ try:
327
+ query_userid = {"user_id": user_id}
328
+ budgets_userid = list(self.db.budgets.find(query_userid))
329
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
330
+ if budgets_userid:
331
+ budgets.extend(budgets_userid)
332
+ except Exception as e:
333
+ print(f"Pattern 3 failed: {e}")
334
+ pass
335
+
336
+ # Pattern 4: Try ObjectId with user_id field - no status filter
337
+ try:
338
+ query_objid_userid = {"user_id": ObjectId(user_id)}
339
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
340
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
341
+ if budgets_objid_userid:
342
+ budgets.extend(budgets_objid_userid)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 4 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
348
+ try:
349
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
350
+ if budget_by_id:
351
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
352
+ created_by = budget_by_id.get("createdBy")
353
+ if created_by:
354
+ # Now find all budgets for this createdBy
355
+ query_by_creator = {"createdBy": created_by}
356
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
357
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
358
+ if budgets_by_creator:
359
+ budgets.extend(budgets_by_creator)
360
+ except (ValueError, TypeError) as e:
361
+ print(f"Pattern 5 failed: {e}")
362
+ pass
363
+
364
+ # Pattern 6: Try finding by budget _id as string
365
+ try:
366
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
367
+ if budget_by_id_str:
368
+ print(f"Pattern 6: Found budget by _id as string")
369
+ budgets.append(budget_by_id_str)
370
+ except Exception as e:
371
+ print(f"Pattern 6 failed: {e}")
372
+ pass
373
+
374
+ # Remove duplicates based on _id
375
+ seen_ids = set()
376
+ unique_budgets = []
377
+ for b in budgets:
378
+ budget_id = str(b.get("_id", ""))
379
+ if budget_id not in seen_ids:
380
+ seen_ids.add(budget_id)
381
+ unique_budgets.append(b)
382
+
383
+ budgets = unique_budgets
384
+
385
+ if not budgets:
386
+ print(f"No budgets found for user_id: {user_id}")
387
+ print(f"Tried all query patterns. Checking sample budget structure...")
388
+ # Get a sample budget to see the structure
389
+ sample = self.db.budgets.find_one()
390
+ if sample:
391
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
392
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
393
+ return {}
394
+
395
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
396
+
397
+ result: Dict[str, Dict] = {}
398
+ for b in budgets:
399
+ # First, try to extract categories from headCategories array
400
+ head_categories = b.get("headCategories", [])
401
+
402
+ if head_categories and isinstance(head_categories, list):
403
+ # Process nested categories from headCategories
404
+ for head_cat in head_categories:
405
+ if not isinstance(head_cat, dict):
406
+ continue
407
+
408
+ # Get headCategory ID and amounts
409
+ head_cat_id = head_cat.get("headCategory")
410
+ try:
411
+ head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
412
+ head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
413
+ except (ValueError, TypeError):
414
+ head_cat_max = 0
415
+ head_cat_spend = 0
416
+
417
+ # Process nested categories within headCategory
418
+ nested_categories = head_cat.get("categories", [])
419
+ if nested_categories and isinstance(nested_categories, list):
420
+ for nested_cat in nested_categories:
421
+ if not isinstance(nested_cat, dict):
422
+ continue
423
+
424
+ nested_cat_id = nested_cat.get("category")
425
+ try:
426
+ nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
427
+ nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
428
+ except (ValueError, TypeError):
429
+ nested_cat_max = 0
430
+ nested_cat_spend = 0
431
+ spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
432
+
433
+ # Only include categories with limits (must have maxAmount > 0)
434
+ if nested_cat_max > 0:
435
+ # Look up actual category name
436
+ nested_category_name = self._get_category_name(nested_cat_id)
437
+ nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
438
+
439
+ if nested_category_name not in result:
440
+ result[nested_category_name] = {
441
+ "average_monthly": nested_base_amount,
442
+ "total": nested_base_amount,
443
+ "count": 1,
444
+ "months_analyzed": 1,
445
+ "std_dev": 0.0,
446
+ "monthly_values": [nested_base_amount],
447
+ }
448
+ else:
449
+ result[nested_category_name]["total"] += nested_base_amount
450
+ result[nested_category_name]["count"] += 1
451
+ result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
452
+ result[nested_category_name]["average_monthly"] = (
453
+ result[nested_category_name]["total"] / result[nested_category_name]["count"]
454
+ )
455
+ result[nested_category_name]["monthly_values"].append(nested_base_amount)
456
+
457
+ # Also include headCategory if it has amounts
458
+ if head_cat_max > 0 or head_cat_spend > 0:
459
+ head_category_name = self._get_category_name(head_cat_id)
460
+ head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
461
+
462
+ if head_category_name not in result:
463
+ result[head_category_name] = {
464
+ "average_monthly": head_base_amount,
465
+ "total": head_base_amount,
466
+ "count": 1,
467
+ "months_analyzed": 1,
468
+ "std_dev": 0.0,
469
+ "monthly_values": [head_base_amount],
470
+ }
471
+ else:
472
+ result[head_category_name]["total"] += head_base_amount
473
+ result[head_category_name]["count"] += 1
474
+ result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
475
+ result[head_category_name]["average_monthly"] = (
476
+ result[head_category_name]["total"] / result[head_category_name]["count"]
477
+ )
478
+ result[head_category_name]["monthly_values"].append(head_base_amount)
479
+
480
+ # Also include the main budget as a category (if it has amounts)
481
+ budget_name = b.get("name", "Uncategorized")
482
+ if not budget_name or budget_name == "Uncategorized":
483
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
484
+
485
+ # Derive a base amount from WalletSync fields
486
+ try:
487
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
488
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
489
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
490
+ except (ValueError, TypeError):
491
+ max_amount = 0
492
+ spend_amount = 0
493
+ budget_amount = 0
494
+
495
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
496
+ if spend_amount > 0:
497
+ base_amount = spend_amount
498
+ elif max_amount > 0:
499
+ base_amount = max_amount
500
+ elif budget_amount > 0:
501
+ base_amount = budget_amount
502
+ else:
503
+ base_amount = 0
504
+
505
+ # Only add main budget if it has an amount and we haven't processed categories
506
+ if base_amount > 0:
507
+ if budget_name not in result:
508
+ result[budget_name] = {
509
+ "average_monthly": base_amount,
510
+ "total": base_amount,
511
+ "count": 1,
512
+ "months_analyzed": 1,
513
+ "std_dev": 0.0,
514
+ "monthly_values": [base_amount],
515
+ }
516
+ else:
517
+ result[budget_name]["total"] += base_amount
518
+ result[budget_name]["count"] += 1
519
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
520
+ result[budget_name]["average_monthly"] = (
521
+ result[budget_name]["total"] / result[budget_name]["count"]
522
+ )
523
+ result[budget_name]["monthly_values"].append(base_amount)
524
+
525
+ print(f"Processed {len(result)} budget categories for recommendations")
526
+ return result
527
+
528
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
529
+ """Use OpenAI to refine the budget recommendation."""
530
+ if not OPENAI_API_KEY:
531
+ return None
532
+
533
+ # Handle empty monthly_values
534
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
535
+ history = f"{avg_expense:.0f}"
536
+ else:
537
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
538
+
539
+ summary = (
540
+ f"Category: {category}\n"
541
+ f"Monthly totals: [{history}]\n"
542
+ f"Average spend: {avg_expense:.2f}\n"
543
+ f"Std deviation: {data['std_dev']:.2f}\n"
544
+ f"Months observed: {data['months_analyzed']}\n"
545
+ )
546
+
547
+ prompt = (
548
+ "You are an Indian personal finance coach. "
549
+ "Given the user's spending history, decide whether to increase, decrease, "
550
+ "or keep the upcoming month's budget and provide a short explanation. "
551
+ "Respond strictly as JSON with the following keys:\n"
552
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
553
+ "Use rupees for all amounts.\n\n"
554
+ f"{summary}"
555
+ )
556
+
557
+ try:
558
+ response = requests.post(
559
+ "https://api.openai.com/v1/chat/completions",
560
+ headers={
561
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
562
+ "Content-Type": "application/json",
563
+ },
564
+ json={
565
+ "model": "gpt-4o-mini",
566
+ "messages": [
567
+ {"role": "user", "content": prompt}
568
+ ],
569
+ "temperature": 0.1,
570
+ "response_format": {"type": "json_object"},
571
+ },
572
+ timeout=30,
573
+ )
574
+ response.raise_for_status()
575
+ response_data = response.json()
576
+ content = response_data["choices"][0]["message"]["content"]
577
+ return json.loads(content)
578
+ except Exception as exc:
579
+ print(f"OpenAI recommendation error for {category}: {exc}")
580
+ return None
.history/app/smart_recommendation_20251225155112.py ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) If there are no budgets, fall back to expenses history
44
+ if not category_data:
45
+ end_date = datetime(year, month, 1) - timedelta(days=1)
46
+ start_date = end_date - timedelta(days=180) # ~6 months
47
+
48
+ expenses = list(
49
+ self.db.expenses.find(
50
+ {
51
+ "user_id": user_id,
52
+ "date": {"$gte": start_date, "$lte": end_date},
53
+ "type": "expense",
54
+ }
55
+ )
56
+ )
57
+
58
+ if not expenses:
59
+ return []
60
+
61
+ # Group expenses by category and calculate monthly averages
62
+ category_data = self._calculate_category_statistics(
63
+ expenses, start_date, end_date
64
+ )
65
+
66
+ recommendations: List[BudgetRecommendation] = []
67
+
68
+ for category, data in category_data.items():
69
+ avg_expense = data["average_monthly"]
70
+ confidence = self._calculate_confidence(data)
71
+
72
+ # Always try OpenAI first (primary source of recommendation)
73
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
74
+ if ai_result and ai_result.get("recommended_budget"):
75
+ recommended_budget = ai_result.get("recommended_budget")
76
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
77
+ action = ai_result.get("action")
78
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
79
+ else:
80
+ # Fallback to rule-based recommendation if OpenAI fails
81
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
82
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
83
+ action = None
84
+ if not ai_result:
85
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
86
+ else:
87
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
+
89
+ recommendations.append(BudgetRecommendation(
90
+ category=category,
91
+ average_expense=round(avg_expense, 2),
92
+ recommended_budget=round(recommended_budget or 0, 2),
93
+ reason=reason,
94
+ confidence=confidence,
95
+ action=action
96
+ ))
97
+
98
+ # Sort by average expense (highest first)
99
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
100
+
101
+ return recommendations
102
+
103
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
104
+ """Calculate statistics for each category"""
105
+ category_data = defaultdict(lambda: {
106
+ "total": 0,
107
+ "count": 0,
108
+ "months": set(),
109
+ "monthly_totals": defaultdict(float)
110
+ })
111
+
112
+ for expense in expenses:
113
+ category = expense.get("category", "Uncategorized")
114
+ amount = expense.get("amount", 0)
115
+ date = expense.get("date")
116
+
117
+ # Handle date conversion - skip if date is None or invalid
118
+ if date is None:
119
+ continue
120
+
121
+ if isinstance(date, str):
122
+ try:
123
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
124
+ except (ValueError, AttributeError):
125
+ continue
126
+ elif not isinstance(date, datetime):
127
+ # If date is not a string or datetime, skip this expense
128
+ continue
129
+
130
+ category_data[category]["total"] += amount
131
+ category_data[category]["count"] += 1
132
+
133
+ # Track monthly totals
134
+ month_key = (date.year, date.month)
135
+ category_data[category]["months"].add(month_key)
136
+ category_data[category]["monthly_totals"][month_key] += amount
137
+
138
+ # Calculate averages
139
+ result = {}
140
+ for category, data in category_data.items():
141
+ num_months = len(data["months"]) or 1
142
+ avg_monthly = data["total"] / num_months
143
+
144
+ # Calculate standard deviation for variability
145
+ monthly_values = list(data["monthly_totals"].values())
146
+ if len(monthly_values) > 1:
147
+ mean = sum(monthly_values) / len(monthly_values)
148
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
149
+ std_dev = math.sqrt(variance)
150
+ else:
151
+ std_dev = 0
152
+
153
+ result[category] = {
154
+ "average_monthly": avg_monthly,
155
+ "total": data["total"],
156
+ "count": data["count"],
157
+ "months_analyzed": num_months,
158
+ "std_dev": std_dev,
159
+ "monthly_values": monthly_values
160
+ }
161
+
162
+ return result
163
+
164
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
165
+ """
166
+ Calculate recommended budget based on average expense.
167
+
168
+ Strategy:
169
+ - Base: Average monthly expense
170
+ - Add 5% buffer for variability
171
+ - Round to nearest 100 for cleaner numbers
172
+ """
173
+ # Add 5% buffer to handle variability
174
+ buffer = avg_expense * 0.05
175
+
176
+ # If there's high variability (std_dev > 20% of mean), add more buffer
177
+ if data["std_dev"] > 0:
178
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
179
+ if coefficient_of_variation > 0.2:
180
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
181
+
182
+ recommended = avg_expense + buffer
183
+
184
+ # Round to nearest 100 for cleaner budget numbers
185
+ recommended = round(recommended / 100) * 100
186
+
187
+ # Ensure minimum of 100 if there was any expense
188
+ if recommended < 100 and avg_expense > 0:
189
+ recommended = 100
190
+
191
+ return recommended
192
+
193
+ def _calculate_confidence(self, data: Dict) -> float:
194
+ """
195
+ Calculate confidence score (0-1) based on data quality.
196
+
197
+ Factors:
198
+ - Number of months analyzed (more = higher confidence)
199
+ - Number of transactions (more = higher confidence)
200
+ - Consistency of spending (lower std_dev = higher confidence)
201
+ """
202
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
203
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
204
+
205
+ # Consistency score (inverse of coefficient of variation)
206
+ if data["average_monthly"] > 0:
207
+ cv = data["std_dev"] / data["average_monthly"]
208
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
209
+ else:
210
+ consistency_score = 0.5
211
+
212
+ # Weighted average
213
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
214
+
215
+ return round(confidence, 2)
216
+
217
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
218
+ """Generate human-readable reason for the recommendation"""
219
+ # Format amounts with currency symbol
220
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
221
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
222
+
223
+ if recommended_budget > avg_expense:
224
+ buffer = recommended_budget - avg_expense
225
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
226
+ return (
227
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
228
+ f"We suggest setting your budget to {budget_formatted} for next month "
229
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
230
+ )
231
+ else:
232
+ return (
233
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
234
+ f"We recommend a budget of {budget_formatted} for next month."
235
+ )
236
+
237
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
238
+ """Get average expenses by category for the past N months"""
239
+ end_date = datetime.now()
240
+ start_date = end_date - timedelta(days=months * 30)
241
+
242
+ expenses = list(self.db.expenses.find({
243
+ "user_id": user_id,
244
+ "date": {"$gte": start_date, "$lte": end_date},
245
+ "type": "expense"
246
+ }))
247
+
248
+ if not expenses:
249
+ return []
250
+
251
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
252
+
253
+ result = []
254
+ for category, data in category_data.items():
255
+ result.append(CategoryExpense(
256
+ category=category,
257
+ average_monthly_expense=round(data["average_monthly"], 2),
258
+ total_expenses=data["count"],
259
+ months_analyzed=data["months_analyzed"]
260
+ ))
261
+
262
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
263
+ return result
264
+
265
+ def _get_category_name(self, category_id) -> str:
266
+ """Look up category name from categories collection"""
267
+ if not category_id:
268
+ return "Uncategorized"
269
+
270
+ try:
271
+ # Try to find category in categories collection
272
+ if isinstance(category_id, ObjectId):
273
+ category_doc = self.db.categories.find_one({"_id": category_id})
274
+ else:
275
+ try:
276
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
277
+ except (ValueError, TypeError):
278
+ category_doc = self.db.categories.find_one({"_id": category_id})
279
+
280
+ if category_doc:
281
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
282
+ except Exception as e:
283
+ print(f"Error looking up category name for {category_id}: {e}")
284
+ pass
285
+
286
+ return str(category_id) if category_id else "Uncategorized"
287
+
288
+ def _get_category_stats_from_budgets(
289
+ self, user_id: str, month: int, year: int
290
+ ) -> Dict:
291
+ """
292
+ Build category stats from existing budgets for this user.
293
+
294
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
295
+ as a spending category and derive an \"average\" from its amounts.
296
+ Also extracts categories from headCategories array.
297
+ """
298
+ budgets = []
299
+
300
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
301
+
302
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
303
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
304
+ try:
305
+ query_objid = {"createdBy": ObjectId(user_id)}
306
+ budgets_objid = list(self.db.budgets.find(query_objid))
307
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
308
+ if budgets_objid:
309
+ budgets.extend(budgets_objid)
310
+ except (ValueError, TypeError) as e:
311
+ print(f"Pattern 1 failed: {e}")
312
+ pass
313
+
314
+ # Pattern 2: Try with string user_id - no status filter
315
+ try:
316
+ query_str = {"createdBy": user_id}
317
+ budgets_str = list(self.db.budgets.find(query_str))
318
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
319
+ if budgets_str:
320
+ budgets.extend(budgets_str)
321
+ except Exception as e:
322
+ print(f"Pattern 2 failed: {e}")
323
+ pass
324
+
325
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
326
+ try:
327
+ query_userid = {"user_id": user_id}
328
+ budgets_userid = list(self.db.budgets.find(query_userid))
329
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
330
+ if budgets_userid:
331
+ budgets.extend(budgets_userid)
332
+ except Exception as e:
333
+ print(f"Pattern 3 failed: {e}")
334
+ pass
335
+
336
+ # Pattern 4: Try ObjectId with user_id field - no status filter
337
+ try:
338
+ query_objid_userid = {"user_id": ObjectId(user_id)}
339
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
340
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
341
+ if budgets_objid_userid:
342
+ budgets.extend(budgets_objid_userid)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 4 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
348
+ try:
349
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
350
+ if budget_by_id:
351
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
352
+ created_by = budget_by_id.get("createdBy")
353
+ if created_by:
354
+ # Now find all budgets for this createdBy
355
+ query_by_creator = {"createdBy": created_by}
356
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
357
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
358
+ if budgets_by_creator:
359
+ budgets.extend(budgets_by_creator)
360
+ except (ValueError, TypeError) as e:
361
+ print(f"Pattern 5 failed: {e}")
362
+ pass
363
+
364
+ # Pattern 6: Try finding by budget _id as string
365
+ try:
366
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
367
+ if budget_by_id_str:
368
+ print(f"Pattern 6: Found budget by _id as string")
369
+ budgets.append(budget_by_id_str)
370
+ except Exception as e:
371
+ print(f"Pattern 6 failed: {e}")
372
+ pass
373
+
374
+ # Remove duplicates based on _id
375
+ seen_ids = set()
376
+ unique_budgets = []
377
+ for b in budgets:
378
+ budget_id = str(b.get("_id", ""))
379
+ if budget_id not in seen_ids:
380
+ seen_ids.add(budget_id)
381
+ unique_budgets.append(b)
382
+
383
+ budgets = unique_budgets
384
+
385
+ if not budgets:
386
+ print(f"No budgets found for user_id: {user_id}")
387
+ print(f"Tried all query patterns. Checking sample budget structure...")
388
+ # Get a sample budget to see the structure
389
+ sample = self.db.budgets.find_one()
390
+ if sample:
391
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
392
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
393
+ return {}
394
+
395
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
396
+
397
+ result: Dict[str, Dict] = {}
398
+ for b in budgets:
399
+ # First, try to extract categories from headCategories array
400
+ head_categories = b.get("headCategories", [])
401
+
402
+ if head_categories and isinstance(head_categories, list):
403
+ # Process nested categories from headCategories
404
+ for head_cat in head_categories:
405
+ if not isinstance(head_cat, dict):
406
+ continue
407
+
408
+ # Get headCategory ID and amounts
409
+ head_cat_id = head_cat.get("headCategory")
410
+ try:
411
+ head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
412
+ head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
413
+ except (ValueError, TypeError):
414
+ head_cat_max = 0
415
+ head_cat_spend = 0
416
+
417
+ # Process nested categories within headCategory
418
+ nested_categories = head_cat.get("categories", [])
419
+ if nested_categories and isinstance(nested_categories, list):
420
+ for nested_cat in nested_categories:
421
+ if not isinstance(nested_cat, dict):
422
+ continue
423
+
424
+ nested_cat_id = nested_cat.get("category")
425
+ try:
426
+ nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
427
+ nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
428
+ except (ValueError, TypeError):
429
+ nested_cat_max = 0
430
+ nested_cat_spend = 0
431
+ spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
432
+
433
+ # Only include categories with limits (must have maxAmount > 0)
434
+ if nested_cat_max > 0:
435
+ # Look up actual category name
436
+ nested_category_name = self._get_category_name(nested_cat_id)
437
+ nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
438
+
439
+ if nested_category_name not in result:
440
+ result[nested_category_name] = {
441
+ "average_monthly": nested_base_amount,
442
+ "total": nested_base_amount,
443
+ "count": 1,
444
+ "months_analyzed": 1,
445
+ "std_dev": 0.0,
446
+ "monthly_values": [nested_base_amount],
447
+ }
448
+ else:
449
+ result[nested_category_name]["total"] += nested_base_amount
450
+ result[nested_category_name]["count"] += 1
451
+ result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
452
+ result[nested_category_name]["average_monthly"] = (
453
+ result[nested_category_name]["total"] / result[nested_category_name]["count"]
454
+ )
455
+ result[nested_category_name]["monthly_values"].append(nested_base_amount)
456
+
457
+ # Also include headCategory if it has amounts
458
+ if head_cat_max > 0 or head_cat_spend > 0:
459
+ head_category_name = self._get_category_name(head_cat_id)
460
+ head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
461
+
462
+ if head_category_name not in result:
463
+ result[head_category_name] = {
464
+ "average_monthly": head_base_amount,
465
+ "total": head_base_amount,
466
+ "count": 1,
467
+ "months_analyzed": 1,
468
+ "std_dev": 0.0,
469
+ "monthly_values": [head_base_amount],
470
+ }
471
+ else:
472
+ result[head_category_name]["total"] += head_base_amount
473
+ result[head_category_name]["count"] += 1
474
+ result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
475
+ result[head_category_name]["average_monthly"] = (
476
+ result[head_category_name]["total"] / result[head_category_name]["count"]
477
+ )
478
+ result[head_category_name]["monthly_values"].append(head_base_amount)
479
+
480
+ # Also include the main budget as a category (if it has amounts)
481
+ budget_name = b.get("name", "Uncategorized")
482
+ if not budget_name or budget_name == "Uncategorized":
483
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
484
+
485
+ # Derive a base amount from WalletSync fields
486
+ try:
487
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
488
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
489
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
490
+ except (ValueError, TypeError):
491
+ max_amount = 0
492
+ spend_amount = 0
493
+ budget_amount = 0
494
+
495
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
496
+ if spend_amount > 0:
497
+ base_amount = spend_amount
498
+ elif max_amount > 0:
499
+ base_amount = max_amount
500
+ elif budget_amount > 0:
501
+ base_amount = budget_amount
502
+ else:
503
+ base_amount = 0
504
+
505
+ # Only add main budget if it has an amount and we haven't processed categories
506
+ if base_amount > 0:
507
+ if budget_name not in result:
508
+ result[budget_name] = {
509
+ "average_monthly": base_amount,
510
+ "total": base_amount,
511
+ "count": 1,
512
+ "months_analyzed": 1,
513
+ "std_dev": 0.0,
514
+ "monthly_values": [base_amount],
515
+ }
516
+ else:
517
+ result[budget_name]["total"] += base_amount
518
+ result[budget_name]["count"] += 1
519
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
520
+ result[budget_name]["average_monthly"] = (
521
+ result[budget_name]["total"] / result[budget_name]["count"]
522
+ )
523
+ result[budget_name]["monthly_values"].append(base_amount)
524
+
525
+ print(f"Processed {len(result)} budget categories for recommendations")
526
+ return result
527
+
528
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
529
+ """Use OpenAI to refine the budget recommendation."""
530
+ if not OPENAI_API_KEY:
531
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
532
+ return None
533
+
534
+ print(f"🔄 Calling OpenAI API for category: {category}...")
535
+
536
+ # Handle empty monthly_values
537
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
538
+ history = f"{avg_expense:.0f}"
539
+ else:
540
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
541
+
542
+ summary = (
543
+ f"Category: {category}\n"
544
+ f"Monthly totals: [{history}]\n"
545
+ f"Average spend: {avg_expense:.2f}\n"
546
+ f"Std deviation: {data['std_dev']:.2f}\n"
547
+ f"Months observed: {data['months_analyzed']}\n"
548
+ )
549
+
550
+ prompt = (
551
+ "You are an Indian personal finance coach. "
552
+ "Given the user's spending history, decide whether to increase, decrease, "
553
+ "or keep the upcoming month's budget and provide a short explanation. "
554
+ "Respond strictly as JSON with the following keys:\n"
555
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
556
+ "Use rupees for all amounts.\n\n"
557
+ f"{summary}"
558
+ )
559
+
560
+ try:
561
+ response = requests.post(
562
+ "https://api.openai.com/v1/chat/completions",
563
+ headers={
564
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
565
+ "Content-Type": "application/json",
566
+ },
567
+ json={
568
+ "model": "gpt-4o-mini",
569
+ "messages": [
570
+ {"role": "user", "content": prompt}
571
+ ],
572
+ "temperature": 0.1,
573
+ "response_format": {"type": "json_object"},
574
+ },
575
+ timeout=30,
576
+ )
577
+ response.raise_for_status()
578
+ response_data = response.json()
579
+ content = response_data["choices"][0]["message"]["content"]
580
+ return json.loads(content)
581
+ except Exception as exc:
582
+ print(f"OpenAI recommendation error for {category}: {exc}")
583
+ return None
.history/app/smart_recommendation_20251225155130.py ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) If there are no budgets, fall back to expenses history
44
+ if not category_data:
45
+ end_date = datetime(year, month, 1) - timedelta(days=1)
46
+ start_date = end_date - timedelta(days=180) # ~6 months
47
+
48
+ expenses = list(
49
+ self.db.expenses.find(
50
+ {
51
+ "user_id": user_id,
52
+ "date": {"$gte": start_date, "$lte": end_date},
53
+ "type": "expense",
54
+ }
55
+ )
56
+ )
57
+
58
+ if not expenses:
59
+ return []
60
+
61
+ # Group expenses by category and calculate monthly averages
62
+ category_data = self._calculate_category_statistics(
63
+ expenses, start_date, end_date
64
+ )
65
+
66
+ recommendations: List[BudgetRecommendation] = []
67
+
68
+ for category, data in category_data.items():
69
+ avg_expense = data["average_monthly"]
70
+ confidence = self._calculate_confidence(data)
71
+
72
+ # Always try OpenAI first (primary source of recommendation)
73
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
74
+ if ai_result and ai_result.get("recommended_budget"):
75
+ recommended_budget = ai_result.get("recommended_budget")
76
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
77
+ action = ai_result.get("action")
78
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
79
+ else:
80
+ # Fallback to rule-based recommendation if OpenAI fails
81
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
82
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
83
+ action = None
84
+ if not ai_result:
85
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
86
+ else:
87
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
+
89
+ recommendations.append(BudgetRecommendation(
90
+ category=category,
91
+ average_expense=round(avg_expense, 2),
92
+ recommended_budget=round(recommended_budget or 0, 2),
93
+ reason=reason,
94
+ confidence=confidence,
95
+ action=action
96
+ ))
97
+
98
+ # Sort by average expense (highest first)
99
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
100
+
101
+ return recommendations
102
+
103
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
104
+ """Calculate statistics for each category"""
105
+ category_data = defaultdict(lambda: {
106
+ "total": 0,
107
+ "count": 0,
108
+ "months": set(),
109
+ "monthly_totals": defaultdict(float)
110
+ })
111
+
112
+ for expense in expenses:
113
+ category = expense.get("category", "Uncategorized")
114
+ amount = expense.get("amount", 0)
115
+ date = expense.get("date")
116
+
117
+ # Handle date conversion - skip if date is None or invalid
118
+ if date is None:
119
+ continue
120
+
121
+ if isinstance(date, str):
122
+ try:
123
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
124
+ except (ValueError, AttributeError):
125
+ continue
126
+ elif not isinstance(date, datetime):
127
+ # If date is not a string or datetime, skip this expense
128
+ continue
129
+
130
+ category_data[category]["total"] += amount
131
+ category_data[category]["count"] += 1
132
+
133
+ # Track monthly totals
134
+ month_key = (date.year, date.month)
135
+ category_data[category]["months"].add(month_key)
136
+ category_data[category]["monthly_totals"][month_key] += amount
137
+
138
+ # Calculate averages
139
+ result = {}
140
+ for category, data in category_data.items():
141
+ num_months = len(data["months"]) or 1
142
+ avg_monthly = data["total"] / num_months
143
+
144
+ # Calculate standard deviation for variability
145
+ monthly_values = list(data["monthly_totals"].values())
146
+ if len(monthly_values) > 1:
147
+ mean = sum(monthly_values) / len(monthly_values)
148
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
149
+ std_dev = math.sqrt(variance)
150
+ else:
151
+ std_dev = 0
152
+
153
+ result[category] = {
154
+ "average_monthly": avg_monthly,
155
+ "total": data["total"],
156
+ "count": data["count"],
157
+ "months_analyzed": num_months,
158
+ "std_dev": std_dev,
159
+ "monthly_values": monthly_values
160
+ }
161
+
162
+ return result
163
+
164
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
165
+ """
166
+ Calculate recommended budget based on average expense.
167
+
168
+ Strategy:
169
+ - Base: Average monthly expense
170
+ - Add 5% buffer for variability
171
+ - Round to nearest 100 for cleaner numbers
172
+ """
173
+ # Add 5% buffer to handle variability
174
+ buffer = avg_expense * 0.05
175
+
176
+ # If there's high variability (std_dev > 20% of mean), add more buffer
177
+ if data["std_dev"] > 0:
178
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
179
+ if coefficient_of_variation > 0.2:
180
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
181
+
182
+ recommended = avg_expense + buffer
183
+
184
+ # Round to nearest 100 for cleaner budget numbers
185
+ recommended = round(recommended / 100) * 100
186
+
187
+ # Ensure minimum of 100 if there was any expense
188
+ if recommended < 100 and avg_expense > 0:
189
+ recommended = 100
190
+
191
+ return recommended
192
+
193
+ def _calculate_confidence(self, data: Dict) -> float:
194
+ """
195
+ Calculate confidence score (0-1) based on data quality.
196
+
197
+ Factors:
198
+ - Number of months analyzed (more = higher confidence)
199
+ - Number of transactions (more = higher confidence)
200
+ - Consistency of spending (lower std_dev = higher confidence)
201
+ """
202
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
203
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
204
+
205
+ # Consistency score (inverse of coefficient of variation)
206
+ if data["average_monthly"] > 0:
207
+ cv = data["std_dev"] / data["average_monthly"]
208
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
209
+ else:
210
+ consistency_score = 0.5
211
+
212
+ # Weighted average
213
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
214
+
215
+ return round(confidence, 2)
216
+
217
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
218
+ """Generate human-readable reason for the recommendation"""
219
+ # Format amounts with currency symbol
220
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
221
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
222
+
223
+ if recommended_budget > avg_expense:
224
+ buffer = recommended_budget - avg_expense
225
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
226
+ return (
227
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
228
+ f"We suggest setting your budget to {budget_formatted} for next month "
229
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
230
+ )
231
+ else:
232
+ return (
233
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
234
+ f"We recommend a budget of {budget_formatted} for next month."
235
+ )
236
+
237
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
238
+ """Get average expenses by category for the past N months"""
239
+ end_date = datetime.now()
240
+ start_date = end_date - timedelta(days=months * 30)
241
+
242
+ expenses = list(self.db.expenses.find({
243
+ "user_id": user_id,
244
+ "date": {"$gte": start_date, "$lte": end_date},
245
+ "type": "expense"
246
+ }))
247
+
248
+ if not expenses:
249
+ return []
250
+
251
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
252
+
253
+ result = []
254
+ for category, data in category_data.items():
255
+ result.append(CategoryExpense(
256
+ category=category,
257
+ average_monthly_expense=round(data["average_monthly"], 2),
258
+ total_expenses=data["count"],
259
+ months_analyzed=data["months_analyzed"]
260
+ ))
261
+
262
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
263
+ return result
264
+
265
+ def _get_category_name(self, category_id) -> str:
266
+ """Look up category name from categories collection"""
267
+ if not category_id:
268
+ return "Uncategorized"
269
+
270
+ try:
271
+ # Try to find category in categories collection
272
+ if isinstance(category_id, ObjectId):
273
+ category_doc = self.db.categories.find_one({"_id": category_id})
274
+ else:
275
+ try:
276
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
277
+ except (ValueError, TypeError):
278
+ category_doc = self.db.categories.find_one({"_id": category_id})
279
+
280
+ if category_doc:
281
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
282
+ except Exception as e:
283
+ print(f"Error looking up category name for {category_id}: {e}")
284
+ pass
285
+
286
+ return str(category_id) if category_id else "Uncategorized"
287
+
288
+ def _get_category_stats_from_budgets(
289
+ self, user_id: str, month: int, year: int
290
+ ) -> Dict:
291
+ """
292
+ Build category stats from existing budgets for this user.
293
+
294
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
295
+ as a spending category and derive an \"average\" from its amounts.
296
+ Also extracts categories from headCategories array.
297
+ """
298
+ budgets = []
299
+
300
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
301
+
302
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
303
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
304
+ try:
305
+ query_objid = {"createdBy": ObjectId(user_id)}
306
+ budgets_objid = list(self.db.budgets.find(query_objid))
307
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
308
+ if budgets_objid:
309
+ budgets.extend(budgets_objid)
310
+ except (ValueError, TypeError) as e:
311
+ print(f"Pattern 1 failed: {e}")
312
+ pass
313
+
314
+ # Pattern 2: Try with string user_id - no status filter
315
+ try:
316
+ query_str = {"createdBy": user_id}
317
+ budgets_str = list(self.db.budgets.find(query_str))
318
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
319
+ if budgets_str:
320
+ budgets.extend(budgets_str)
321
+ except Exception as e:
322
+ print(f"Pattern 2 failed: {e}")
323
+ pass
324
+
325
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
326
+ try:
327
+ query_userid = {"user_id": user_id}
328
+ budgets_userid = list(self.db.budgets.find(query_userid))
329
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
330
+ if budgets_userid:
331
+ budgets.extend(budgets_userid)
332
+ except Exception as e:
333
+ print(f"Pattern 3 failed: {e}")
334
+ pass
335
+
336
+ # Pattern 4: Try ObjectId with user_id field - no status filter
337
+ try:
338
+ query_objid_userid = {"user_id": ObjectId(user_id)}
339
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
340
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
341
+ if budgets_objid_userid:
342
+ budgets.extend(budgets_objid_userid)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 4 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
348
+ try:
349
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
350
+ if budget_by_id:
351
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
352
+ created_by = budget_by_id.get("createdBy")
353
+ if created_by:
354
+ # Now find all budgets for this createdBy
355
+ query_by_creator = {"createdBy": created_by}
356
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
357
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
358
+ if budgets_by_creator:
359
+ budgets.extend(budgets_by_creator)
360
+ except (ValueError, TypeError) as e:
361
+ print(f"Pattern 5 failed: {e}")
362
+ pass
363
+
364
+ # Pattern 6: Try finding by budget _id as string
365
+ try:
366
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
367
+ if budget_by_id_str:
368
+ print(f"Pattern 6: Found budget by _id as string")
369
+ budgets.append(budget_by_id_str)
370
+ except Exception as e:
371
+ print(f"Pattern 6 failed: {e}")
372
+ pass
373
+
374
+ # Remove duplicates based on _id
375
+ seen_ids = set()
376
+ unique_budgets = []
377
+ for b in budgets:
378
+ budget_id = str(b.get("_id", ""))
379
+ if budget_id not in seen_ids:
380
+ seen_ids.add(budget_id)
381
+ unique_budgets.append(b)
382
+
383
+ budgets = unique_budgets
384
+
385
+ if not budgets:
386
+ print(f"No budgets found for user_id: {user_id}")
387
+ print(f"Tried all query patterns. Checking sample budget structure...")
388
+ # Get a sample budget to see the structure
389
+ sample = self.db.budgets.find_one()
390
+ if sample:
391
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
392
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
393
+ return {}
394
+
395
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
396
+
397
+ result: Dict[str, Dict] = {}
398
+ for b in budgets:
399
+ # First, try to extract categories from headCategories array
400
+ head_categories = b.get("headCategories", [])
401
+
402
+ if head_categories and isinstance(head_categories, list):
403
+ # Process nested categories from headCategories
404
+ for head_cat in head_categories:
405
+ if not isinstance(head_cat, dict):
406
+ continue
407
+
408
+ # Get headCategory ID and amounts
409
+ head_cat_id = head_cat.get("headCategory")
410
+ try:
411
+ head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
412
+ head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
413
+ except (ValueError, TypeError):
414
+ head_cat_max = 0
415
+ head_cat_spend = 0
416
+
417
+ # Process nested categories within headCategory
418
+ nested_categories = head_cat.get("categories", [])
419
+ if nested_categories and isinstance(nested_categories, list):
420
+ for nested_cat in nested_categories:
421
+ if not isinstance(nested_cat, dict):
422
+ continue
423
+
424
+ nested_cat_id = nested_cat.get("category")
425
+ try:
426
+ nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
427
+ nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
428
+ except (ValueError, TypeError):
429
+ nested_cat_max = 0
430
+ nested_cat_spend = 0
431
+ spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
432
+
433
+ # Only include categories with limits (must have maxAmount > 0)
434
+ if nested_cat_max > 0:
435
+ # Look up actual category name
436
+ nested_category_name = self._get_category_name(nested_cat_id)
437
+ nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
438
+
439
+ if nested_category_name not in result:
440
+ result[nested_category_name] = {
441
+ "average_monthly": nested_base_amount,
442
+ "total": nested_base_amount,
443
+ "count": 1,
444
+ "months_analyzed": 1,
445
+ "std_dev": 0.0,
446
+ "monthly_values": [nested_base_amount],
447
+ }
448
+ else:
449
+ result[nested_category_name]["total"] += nested_base_amount
450
+ result[nested_category_name]["count"] += 1
451
+ result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
452
+ result[nested_category_name]["average_monthly"] = (
453
+ result[nested_category_name]["total"] / result[nested_category_name]["count"]
454
+ )
455
+ result[nested_category_name]["monthly_values"].append(nested_base_amount)
456
+
457
+ # Also include headCategory if it has amounts
458
+ if head_cat_max > 0 or head_cat_spend > 0:
459
+ head_category_name = self._get_category_name(head_cat_id)
460
+ head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
461
+
462
+ if head_category_name not in result:
463
+ result[head_category_name] = {
464
+ "average_monthly": head_base_amount,
465
+ "total": head_base_amount,
466
+ "count": 1,
467
+ "months_analyzed": 1,
468
+ "std_dev": 0.0,
469
+ "monthly_values": [head_base_amount],
470
+ }
471
+ else:
472
+ result[head_category_name]["total"] += head_base_amount
473
+ result[head_category_name]["count"] += 1
474
+ result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
475
+ result[head_category_name]["average_monthly"] = (
476
+ result[head_category_name]["total"] / result[head_category_name]["count"]
477
+ )
478
+ result[head_category_name]["monthly_values"].append(head_base_amount)
479
+
480
+ # Also include the main budget as a category (if it has amounts)
481
+ budget_name = b.get("name", "Uncategorized")
482
+ if not budget_name or budget_name == "Uncategorized":
483
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
484
+
485
+ # Derive a base amount from WalletSync fields
486
+ try:
487
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
488
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
489
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
490
+ except (ValueError, TypeError):
491
+ max_amount = 0
492
+ spend_amount = 0
493
+ budget_amount = 0
494
+
495
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
496
+ if spend_amount > 0:
497
+ base_amount = spend_amount
498
+ elif max_amount > 0:
499
+ base_amount = max_amount
500
+ elif budget_amount > 0:
501
+ base_amount = budget_amount
502
+ else:
503
+ base_amount = 0
504
+
505
+ # Only add main budget if it has an amount and we haven't processed categories
506
+ if base_amount > 0:
507
+ if budget_name not in result:
508
+ result[budget_name] = {
509
+ "average_monthly": base_amount,
510
+ "total": base_amount,
511
+ "count": 1,
512
+ "months_analyzed": 1,
513
+ "std_dev": 0.0,
514
+ "monthly_values": [base_amount],
515
+ }
516
+ else:
517
+ result[budget_name]["total"] += base_amount
518
+ result[budget_name]["count"] += 1
519
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
520
+ result[budget_name]["average_monthly"] = (
521
+ result[budget_name]["total"] / result[budget_name]["count"]
522
+ )
523
+ result[budget_name]["monthly_values"].append(base_amount)
524
+
525
+ print(f"Processed {len(result)} budget categories for recommendations")
526
+ return result
527
+
528
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
529
+ """Use OpenAI to refine the budget recommendation."""
530
+ if not OPENAI_API_KEY:
531
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
532
+ return None
533
+
534
+ print(f"🔄 Calling OpenAI API for category: {category}...")
535
+
536
+ # Handle empty monthly_values
537
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
538
+ history = f"{avg_expense:.0f}"
539
+ else:
540
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
541
+
542
+ summary = (
543
+ f"Category: {category}\n"
544
+ f"Monthly totals: [{history}]\n"
545
+ f"Average spend: {avg_expense:.2f}\n"
546
+ f"Std deviation: {data['std_dev']:.2f}\n"
547
+ f"Months observed: {data['months_analyzed']}\n"
548
+ )
549
+
550
+ prompt = (
551
+ "You are an Indian personal finance coach. "
552
+ "Given the user's spending history, decide whether to increase, decrease, "
553
+ "or keep the upcoming month's budget and provide a short explanation. "
554
+ "Respond strictly as JSON with the following keys:\n"
555
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
556
+ "Use rupees for all amounts.\n\n"
557
+ f"{summary}"
558
+ )
559
+
560
+ try:
561
+ response = requests.post(
562
+ "https://api.openai.com/v1/chat/completions",
563
+ headers={
564
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
565
+ "Content-Type": "application/json",
566
+ },
567
+ json={
568
+ "model": "gpt-4o-mini",
569
+ "messages": [
570
+ {"role": "user", "content": prompt}
571
+ ],
572
+ "temperature": 0.1,
573
+ "response_format": {"type": "json_object"},
574
+ },
575
+ timeout=30,
576
+ )
577
+ response.raise_for_status()
578
+ response_data = response.json()
579
+ content = response_data["choices"][0]["message"]["content"]
580
+ return json.loads(content)
581
+ except Exception as exc:
582
+ print(f"OpenAI recommendation error for {category}: {exc}")
583
+ return None
.history/app/smart_recommendation_20251225160734.py ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) If there are no budgets, fall back to expenses history
44
+ if not category_data:
45
+ end_date = datetime(year, month, 1) - timedelta(days=1)
46
+ start_date = end_date - timedelta(days=180) # ~6 months
47
+
48
+ expenses = list(
49
+ self.db.expenses.find(
50
+ {
51
+ "user_id": user_id,
52
+ "date": {"$gte": start_date, "$lte": end_date},
53
+ "type": "expense",
54
+ }
55
+ )
56
+ )
57
+
58
+ if not expenses:
59
+ return []
60
+
61
+ # Group expenses by category and calculate monthly averages
62
+ category_data = self._calculate_category_statistics(
63
+ expenses, start_date, end_date
64
+ )
65
+
66
+ recommendations: List[BudgetRecommendation] = []
67
+
68
+ for category, data in category_data.items():
69
+ avg_expense = data["average_monthly"]
70
+ confidence = self._calculate_confidence(data)
71
+
72
+ # Always try OpenAI first (primary source of recommendation)
73
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
74
+ if ai_result and ai_result.get("recommended_budget"):
75
+ recommended_budget = ai_result.get("recommended_budget")
76
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
77
+ action = ai_result.get("action")
78
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
79
+ else:
80
+ # Fallback to rule-based recommendation if OpenAI fails
81
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
82
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
83
+ action = None
84
+ if not ai_result:
85
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
86
+ else:
87
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
+
89
+ recommendations.append(BudgetRecommendation(
90
+ budget_name=category,
91
+ average_expense=round(avg_expense, 2),
92
+ recommended_budget=round(recommended_budget or 0, 2),
93
+ reason=reason,
94
+ confidence=confidence,
95
+ action=action
96
+ ))
97
+
98
+ # Sort by average expense (highest first)
99
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
100
+
101
+ return recommendations
102
+
103
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
104
+ """Calculate statistics for each category"""
105
+ category_data = defaultdict(lambda: {
106
+ "total": 0,
107
+ "count": 0,
108
+ "months": set(),
109
+ "monthly_totals": defaultdict(float)
110
+ })
111
+
112
+ for expense in expenses:
113
+ category = expense.get("category", "Uncategorized")
114
+ amount = expense.get("amount", 0)
115
+ date = expense.get("date")
116
+
117
+ # Handle date conversion - skip if date is None or invalid
118
+ if date is None:
119
+ continue
120
+
121
+ if isinstance(date, str):
122
+ try:
123
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
124
+ except (ValueError, AttributeError):
125
+ continue
126
+ elif not isinstance(date, datetime):
127
+ # If date is not a string or datetime, skip this expense
128
+ continue
129
+
130
+ category_data[category]["total"] += amount
131
+ category_data[category]["count"] += 1
132
+
133
+ # Track monthly totals
134
+ month_key = (date.year, date.month)
135
+ category_data[category]["months"].add(month_key)
136
+ category_data[category]["monthly_totals"][month_key] += amount
137
+
138
+ # Calculate averages
139
+ result = {}
140
+ for category, data in category_data.items():
141
+ num_months = len(data["months"]) or 1
142
+ avg_monthly = data["total"] / num_months
143
+
144
+ # Calculate standard deviation for variability
145
+ monthly_values = list(data["monthly_totals"].values())
146
+ if len(monthly_values) > 1:
147
+ mean = sum(monthly_values) / len(monthly_values)
148
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
149
+ std_dev = math.sqrt(variance)
150
+ else:
151
+ std_dev = 0
152
+
153
+ result[category] = {
154
+ "average_monthly": avg_monthly,
155
+ "total": data["total"],
156
+ "count": data["count"],
157
+ "months_analyzed": num_months,
158
+ "std_dev": std_dev,
159
+ "monthly_values": monthly_values
160
+ }
161
+
162
+ return result
163
+
164
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
165
+ """
166
+ Calculate recommended budget based on average expense.
167
+
168
+ Strategy:
169
+ - Base: Average monthly expense
170
+ - Add 5% buffer for variability
171
+ - Round to nearest 100 for cleaner numbers
172
+ """
173
+ # Add 5% buffer to handle variability
174
+ buffer = avg_expense * 0.05
175
+
176
+ # If there's high variability (std_dev > 20% of mean), add more buffer
177
+ if data["std_dev"] > 0:
178
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
179
+ if coefficient_of_variation > 0.2:
180
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
181
+
182
+ recommended = avg_expense + buffer
183
+
184
+ # Round to nearest 100 for cleaner budget numbers
185
+ recommended = round(recommended / 100) * 100
186
+
187
+ # Ensure minimum of 100 if there was any expense
188
+ if recommended < 100 and avg_expense > 0:
189
+ recommended = 100
190
+
191
+ return recommended
192
+
193
+ def _calculate_confidence(self, data: Dict) -> float:
194
+ """
195
+ Calculate confidence score (0-1) based on data quality.
196
+
197
+ Factors:
198
+ - Number of months analyzed (more = higher confidence)
199
+ - Number of transactions (more = higher confidence)
200
+ - Consistency of spending (lower std_dev = higher confidence)
201
+ """
202
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
203
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
204
+
205
+ # Consistency score (inverse of coefficient of variation)
206
+ if data["average_monthly"] > 0:
207
+ cv = data["std_dev"] / data["average_monthly"]
208
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
209
+ else:
210
+ consistency_score = 0.5
211
+
212
+ # Weighted average
213
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
214
+
215
+ return round(confidence, 2)
216
+
217
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
218
+ """Generate human-readable reason for the recommendation"""
219
+ # Format amounts with currency symbol
220
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
221
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
222
+
223
+ if recommended_budget > avg_expense:
224
+ buffer = recommended_budget - avg_expense
225
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
226
+ return (
227
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
228
+ f"We suggest setting your budget to {budget_formatted} for next month "
229
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
230
+ )
231
+ else:
232
+ return (
233
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
234
+ f"We recommend a budget of {budget_formatted} for next month."
235
+ )
236
+
237
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
238
+ """Get average expenses by category for the past N months"""
239
+ end_date = datetime.now()
240
+ start_date = end_date - timedelta(days=months * 30)
241
+
242
+ expenses = list(self.db.expenses.find({
243
+ "user_id": user_id,
244
+ "date": {"$gte": start_date, "$lte": end_date},
245
+ "type": "expense"
246
+ }))
247
+
248
+ if not expenses:
249
+ return []
250
+
251
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
252
+
253
+ result = []
254
+ for category, data in category_data.items():
255
+ result.append(CategoryExpense(
256
+ category=category,
257
+ average_monthly_expense=round(data["average_monthly"], 2),
258
+ total_expenses=data["count"],
259
+ months_analyzed=data["months_analyzed"]
260
+ ))
261
+
262
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
263
+ return result
264
+
265
+ def _get_category_name(self, category_id) -> str:
266
+ """Look up category name from categories collection"""
267
+ if not category_id:
268
+ return "Uncategorized"
269
+
270
+ try:
271
+ # Try to find category in categories collection
272
+ if isinstance(category_id, ObjectId):
273
+ category_doc = self.db.categories.find_one({"_id": category_id})
274
+ else:
275
+ try:
276
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
277
+ except (ValueError, TypeError):
278
+ category_doc = self.db.categories.find_one({"_id": category_id})
279
+
280
+ if category_doc:
281
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
282
+ except Exception as e:
283
+ print(f"Error looking up category name for {category_id}: {e}")
284
+ pass
285
+
286
+ return str(category_id) if category_id else "Uncategorized"
287
+
288
+ def _get_category_stats_from_budgets(
289
+ self, user_id: str, month: int, year: int
290
+ ) -> Dict:
291
+ """
292
+ Build category stats from existing budgets for this user.
293
+
294
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
295
+ as a spending category and derive an \"average\" from its amounts.
296
+ Also extracts categories from headCategories array.
297
+ """
298
+ budgets = []
299
+
300
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
301
+
302
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
303
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
304
+ try:
305
+ query_objid = {"createdBy": ObjectId(user_id)}
306
+ budgets_objid = list(self.db.budgets.find(query_objid))
307
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
308
+ if budgets_objid:
309
+ budgets.extend(budgets_objid)
310
+ except (ValueError, TypeError) as e:
311
+ print(f"Pattern 1 failed: {e}")
312
+ pass
313
+
314
+ # Pattern 2: Try with string user_id - no status filter
315
+ try:
316
+ query_str = {"createdBy": user_id}
317
+ budgets_str = list(self.db.budgets.find(query_str))
318
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
319
+ if budgets_str:
320
+ budgets.extend(budgets_str)
321
+ except Exception as e:
322
+ print(f"Pattern 2 failed: {e}")
323
+ pass
324
+
325
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
326
+ try:
327
+ query_userid = {"user_id": user_id}
328
+ budgets_userid = list(self.db.budgets.find(query_userid))
329
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
330
+ if budgets_userid:
331
+ budgets.extend(budgets_userid)
332
+ except Exception as e:
333
+ print(f"Pattern 3 failed: {e}")
334
+ pass
335
+
336
+ # Pattern 4: Try ObjectId with user_id field - no status filter
337
+ try:
338
+ query_objid_userid = {"user_id": ObjectId(user_id)}
339
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
340
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
341
+ if budgets_objid_userid:
342
+ budgets.extend(budgets_objid_userid)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 4 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
348
+ try:
349
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
350
+ if budget_by_id:
351
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
352
+ created_by = budget_by_id.get("createdBy")
353
+ if created_by:
354
+ # Now find all budgets for this createdBy
355
+ query_by_creator = {"createdBy": created_by}
356
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
357
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
358
+ if budgets_by_creator:
359
+ budgets.extend(budgets_by_creator)
360
+ except (ValueError, TypeError) as e:
361
+ print(f"Pattern 5 failed: {e}")
362
+ pass
363
+
364
+ # Pattern 6: Try finding by budget _id as string
365
+ try:
366
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
367
+ if budget_by_id_str:
368
+ print(f"Pattern 6: Found budget by _id as string")
369
+ budgets.append(budget_by_id_str)
370
+ except Exception as e:
371
+ print(f"Pattern 6 failed: {e}")
372
+ pass
373
+
374
+ # Remove duplicates based on _id
375
+ seen_ids = set()
376
+ unique_budgets = []
377
+ for b in budgets:
378
+ budget_id = str(b.get("_id", ""))
379
+ if budget_id not in seen_ids:
380
+ seen_ids.add(budget_id)
381
+ unique_budgets.append(b)
382
+
383
+ budgets = unique_budgets
384
+
385
+ if not budgets:
386
+ print(f"No budgets found for user_id: {user_id}")
387
+ print(f"Tried all query patterns. Checking sample budget structure...")
388
+ # Get a sample budget to see the structure
389
+ sample = self.db.budgets.find_one()
390
+ if sample:
391
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
392
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
393
+ return {}
394
+
395
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
396
+
397
+ result: Dict[str, Dict] = {}
398
+ for b in budgets:
399
+ # First, try to extract categories from headCategories array
400
+ head_categories = b.get("headCategories", [])
401
+
402
+ if head_categories and isinstance(head_categories, list):
403
+ # Process nested categories from headCategories
404
+ for head_cat in head_categories:
405
+ if not isinstance(head_cat, dict):
406
+ continue
407
+
408
+ # Get headCategory ID and amounts
409
+ head_cat_id = head_cat.get("headCategory")
410
+ try:
411
+ head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
412
+ head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
413
+ except (ValueError, TypeError):
414
+ head_cat_max = 0
415
+ head_cat_spend = 0
416
+
417
+ # Process nested categories within headCategory
418
+ nested_categories = head_cat.get("categories", [])
419
+ if nested_categories and isinstance(nested_categories, list):
420
+ for nested_cat in nested_categories:
421
+ if not isinstance(nested_cat, dict):
422
+ continue
423
+
424
+ nested_cat_id = nested_cat.get("category")
425
+ try:
426
+ nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
427
+ nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
428
+ except (ValueError, TypeError):
429
+ nested_cat_max = 0
430
+ nested_cat_spend = 0
431
+ spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
432
+
433
+ # Only include categories with limits (must have maxAmount > 0)
434
+ if nested_cat_max > 0:
435
+ # Look up actual category name
436
+ nested_category_name = self._get_category_name(nested_cat_id)
437
+ nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
438
+
439
+ if nested_category_name not in result:
440
+ result[nested_category_name] = {
441
+ "average_monthly": nested_base_amount,
442
+ "total": nested_base_amount,
443
+ "count": 1,
444
+ "months_analyzed": 1,
445
+ "std_dev": 0.0,
446
+ "monthly_values": [nested_base_amount],
447
+ }
448
+ else:
449
+ result[nested_category_name]["total"] += nested_base_amount
450
+ result[nested_category_name]["count"] += 1
451
+ result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
452
+ result[nested_category_name]["average_monthly"] = (
453
+ result[nested_category_name]["total"] / result[nested_category_name]["count"]
454
+ )
455
+ result[nested_category_name]["monthly_values"].append(nested_base_amount)
456
+
457
+ # Also include headCategory if it has amounts
458
+ if head_cat_max > 0 or head_cat_spend > 0:
459
+ head_category_name = self._get_category_name(head_cat_id)
460
+ head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
461
+
462
+ if head_category_name not in result:
463
+ result[head_category_name] = {
464
+ "average_monthly": head_base_amount,
465
+ "total": head_base_amount,
466
+ "count": 1,
467
+ "months_analyzed": 1,
468
+ "std_dev": 0.0,
469
+ "monthly_values": [head_base_amount],
470
+ }
471
+ else:
472
+ result[head_category_name]["total"] += head_base_amount
473
+ result[head_category_name]["count"] += 1
474
+ result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
475
+ result[head_category_name]["average_monthly"] = (
476
+ result[head_category_name]["total"] / result[head_category_name]["count"]
477
+ )
478
+ result[head_category_name]["monthly_values"].append(head_base_amount)
479
+
480
+ # Also include the main budget as a category (if it has amounts)
481
+ budget_name = b.get("name", "Uncategorized")
482
+ if not budget_name or budget_name == "Uncategorized":
483
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
484
+
485
+ # Derive a base amount from WalletSync fields
486
+ try:
487
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
488
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
489
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
490
+ except (ValueError, TypeError):
491
+ max_amount = 0
492
+ spend_amount = 0
493
+ budget_amount = 0
494
+
495
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
496
+ if spend_amount > 0:
497
+ base_amount = spend_amount
498
+ elif max_amount > 0:
499
+ base_amount = max_amount
500
+ elif budget_amount > 0:
501
+ base_amount = budget_amount
502
+ else:
503
+ base_amount = 0
504
+
505
+ # Only add main budget if it has an amount and we haven't processed categories
506
+ if base_amount > 0:
507
+ if budget_name not in result:
508
+ result[budget_name] = {
509
+ "average_monthly": base_amount,
510
+ "total": base_amount,
511
+ "count": 1,
512
+ "months_analyzed": 1,
513
+ "std_dev": 0.0,
514
+ "monthly_values": [base_amount],
515
+ }
516
+ else:
517
+ result[budget_name]["total"] += base_amount
518
+ result[budget_name]["count"] += 1
519
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
520
+ result[budget_name]["average_monthly"] = (
521
+ result[budget_name]["total"] / result[budget_name]["count"]
522
+ )
523
+ result[budget_name]["monthly_values"].append(base_amount)
524
+
525
+ print(f"Processed {len(result)} budget categories for recommendations")
526
+ return result
527
+
528
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
529
+ """Use OpenAI to refine the budget recommendation."""
530
+ if not OPENAI_API_KEY:
531
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
532
+ return None
533
+
534
+ print(f"🔄 Calling OpenAI API for category: {category}...")
535
+
536
+ # Handle empty monthly_values
537
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
538
+ history = f"{avg_expense:.0f}"
539
+ else:
540
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
541
+
542
+ summary = (
543
+ f"Category: {category}\n"
544
+ f"Monthly totals: [{history}]\n"
545
+ f"Average spend: {avg_expense:.2f}\n"
546
+ f"Std deviation: {data['std_dev']:.2f}\n"
547
+ f"Months observed: {data['months_analyzed']}\n"
548
+ )
549
+
550
+ prompt = (
551
+ "You are an Indian personal finance coach. "
552
+ "Given the user's spending history, decide whether to increase, decrease, "
553
+ "or keep the upcoming month's budget and provide a short explanation. "
554
+ "Respond strictly as JSON with the following keys:\n"
555
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
556
+ "Use rupees for all amounts.\n\n"
557
+ f"{summary}"
558
+ )
559
+
560
+ try:
561
+ response = requests.post(
562
+ "https://api.openai.com/v1/chat/completions",
563
+ headers={
564
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
565
+ "Content-Type": "application/json",
566
+ },
567
+ json={
568
+ "model": "gpt-4o-mini",
569
+ "messages": [
570
+ {"role": "user", "content": prompt}
571
+ ],
572
+ "temperature": 0.1,
573
+ "response_format": {"type": "json_object"},
574
+ },
575
+ timeout=30,
576
+ )
577
+ response.raise_for_status()
578
+ response_data = response.json()
579
+ content = response_data["choices"][0]["message"]["content"]
580
+ return json.loads(content)
581
+ except Exception as exc:
582
+ print(f"OpenAI recommendation error for {category}: {exc}")
583
+ return None
.history/app/smart_recommendation_20251225160759.py ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) If there are no budgets, fall back to expenses history
44
+ if not category_data:
45
+ end_date = datetime(year, month, 1) - timedelta(days=1)
46
+ start_date = end_date - timedelta(days=180) # ~6 months
47
+
48
+ expenses = list(
49
+ self.db.expenses.find(
50
+ {
51
+ "user_id": user_id,
52
+ "date": {"$gte": start_date, "$lte": end_date},
53
+ "type": "expense",
54
+ }
55
+ )
56
+ )
57
+
58
+ if not expenses:
59
+ return []
60
+
61
+ # Group expenses by category and calculate monthly averages
62
+ category_data = self._calculate_category_statistics(
63
+ expenses, start_date, end_date
64
+ )
65
+
66
+ recommendations: List[BudgetRecommendation] = []
67
+
68
+ for category, data in category_data.items():
69
+ avg_expense = data["average_monthly"]
70
+ confidence = self._calculate_confidence(data)
71
+
72
+ # Always try OpenAI first (primary source of recommendation)
73
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
74
+ if ai_result and ai_result.get("recommended_budget"):
75
+ recommended_budget = ai_result.get("recommended_budget")
76
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
77
+ action = ai_result.get("action")
78
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
79
+ else:
80
+ # Fallback to rule-based recommendation if OpenAI fails
81
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
82
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
83
+ action = None
84
+ if not ai_result:
85
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
86
+ else:
87
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
+
89
+ recommendations.append(BudgetRecommendation(
90
+ budget_name=category,
91
+ average_expense=round(avg_expense, 2),
92
+ recommended_budget=round(recommended_budget or 0, 2),
93
+ reason=reason,
94
+ confidence=confidence,
95
+ action=action
96
+ ))
97
+
98
+ # Sort by average expense (highest first)
99
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
100
+
101
+ return recommendations
102
+
103
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
104
+ """Calculate statistics for each category"""
105
+ category_data = defaultdict(lambda: {
106
+ "total": 0,
107
+ "count": 0,
108
+ "months": set(),
109
+ "monthly_totals": defaultdict(float)
110
+ })
111
+
112
+ for expense in expenses:
113
+ category = expense.get("category", "Uncategorized")
114
+ amount = expense.get("amount", 0)
115
+ date = expense.get("date")
116
+
117
+ # Handle date conversion - skip if date is None or invalid
118
+ if date is None:
119
+ continue
120
+
121
+ if isinstance(date, str):
122
+ try:
123
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
124
+ except (ValueError, AttributeError):
125
+ continue
126
+ elif not isinstance(date, datetime):
127
+ # If date is not a string or datetime, skip this expense
128
+ continue
129
+
130
+ category_data[category]["total"] += amount
131
+ category_data[category]["count"] += 1
132
+
133
+ # Track monthly totals
134
+ month_key = (date.year, date.month)
135
+ category_data[category]["months"].add(month_key)
136
+ category_data[category]["monthly_totals"][month_key] += amount
137
+
138
+ # Calculate averages
139
+ result = {}
140
+ for category, data in category_data.items():
141
+ num_months = len(data["months"]) or 1
142
+ avg_monthly = data["total"] / num_months
143
+
144
+ # Calculate standard deviation for variability
145
+ monthly_values = list(data["monthly_totals"].values())
146
+ if len(monthly_values) > 1:
147
+ mean = sum(monthly_values) / len(monthly_values)
148
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
149
+ std_dev = math.sqrt(variance)
150
+ else:
151
+ std_dev = 0
152
+
153
+ result[category] = {
154
+ "average_monthly": avg_monthly,
155
+ "total": data["total"],
156
+ "count": data["count"],
157
+ "months_analyzed": num_months,
158
+ "std_dev": std_dev,
159
+ "monthly_values": monthly_values
160
+ }
161
+
162
+ return result
163
+
164
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
165
+ """
166
+ Calculate recommended budget based on average expense.
167
+
168
+ Strategy:
169
+ - Base: Average monthly expense
170
+ - Add 5% buffer for variability
171
+ - Round to nearest 100 for cleaner numbers
172
+ """
173
+ # Add 5% buffer to handle variability
174
+ buffer = avg_expense * 0.05
175
+
176
+ # If there's high variability (std_dev > 20% of mean), add more buffer
177
+ if data["std_dev"] > 0:
178
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
179
+ if coefficient_of_variation > 0.2:
180
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
181
+
182
+ recommended = avg_expense + buffer
183
+
184
+ # Round to nearest 100 for cleaner budget numbers
185
+ recommended = round(recommended / 100) * 100
186
+
187
+ # Ensure minimum of 100 if there was any expense
188
+ if recommended < 100 and avg_expense > 0:
189
+ recommended = 100
190
+
191
+ return recommended
192
+
193
+ def _calculate_confidence(self, data: Dict) -> float:
194
+ """
195
+ Calculate confidence score (0-1) based on data quality.
196
+
197
+ Factors:
198
+ - Number of months analyzed (more = higher confidence)
199
+ - Number of transactions (more = higher confidence)
200
+ - Consistency of spending (lower std_dev = higher confidence)
201
+ """
202
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
203
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
204
+
205
+ # Consistency score (inverse of coefficient of variation)
206
+ if data["average_monthly"] > 0:
207
+ cv = data["std_dev"] / data["average_monthly"]
208
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
209
+ else:
210
+ consistency_score = 0.5
211
+
212
+ # Weighted average
213
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
214
+
215
+ return round(confidence, 2)
216
+
217
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
218
+ """Generate human-readable reason for the recommendation"""
219
+ # Format amounts with currency symbol
220
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
221
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
222
+
223
+ if recommended_budget > avg_expense:
224
+ buffer = recommended_budget - avg_expense
225
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
226
+ return (
227
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
228
+ f"We suggest setting your budget to {budget_formatted} for next month "
229
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
230
+ )
231
+ else:
232
+ return (
233
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
234
+ f"We recommend a budget of {budget_formatted} for next month."
235
+ )
236
+
237
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
238
+ """Get average expenses by category for the past N months"""
239
+ end_date = datetime.now()
240
+ start_date = end_date - timedelta(days=months * 30)
241
+
242
+ expenses = list(self.db.expenses.find({
243
+ "user_id": user_id,
244
+ "date": {"$gte": start_date, "$lte": end_date},
245
+ "type": "expense"
246
+ }))
247
+
248
+ if not expenses:
249
+ return []
250
+
251
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
252
+
253
+ result = []
254
+ for category, data in category_data.items():
255
+ result.append(CategoryExpense(
256
+ category=category,
257
+ average_monthly_expense=round(data["average_monthly"], 2),
258
+ total_expenses=data["count"],
259
+ months_analyzed=data["months_analyzed"]
260
+ ))
261
+
262
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
263
+ return result
264
+
265
+ def _get_category_name(self, category_id) -> str:
266
+ """Look up category name from categories collection"""
267
+ if not category_id:
268
+ return "Uncategorized"
269
+
270
+ try:
271
+ # Try to find category in categories collection
272
+ if isinstance(category_id, ObjectId):
273
+ category_doc = self.db.categories.find_one({"_id": category_id})
274
+ else:
275
+ try:
276
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
277
+ except (ValueError, TypeError):
278
+ category_doc = self.db.categories.find_one({"_id": category_id})
279
+
280
+ if category_doc:
281
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
282
+ except Exception as e:
283
+ print(f"Error looking up category name for {category_id}: {e}")
284
+ pass
285
+
286
+ return str(category_id) if category_id else "Uncategorized"
287
+
288
+ def _get_category_stats_from_budgets(
289
+ self, user_id: str, month: int, year: int
290
+ ) -> Dict:
291
+ """
292
+ Build category stats from existing budgets for this user.
293
+
294
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
295
+ as a spending category and derive an \"average\" from its amounts.
296
+ Also extracts categories from headCategories array.
297
+ """
298
+ budgets = []
299
+
300
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
301
+
302
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
303
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
304
+ try:
305
+ query_objid = {"createdBy": ObjectId(user_id)}
306
+ budgets_objid = list(self.db.budgets.find(query_objid))
307
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
308
+ if budgets_objid:
309
+ budgets.extend(budgets_objid)
310
+ except (ValueError, TypeError) as e:
311
+ print(f"Pattern 1 failed: {e}")
312
+ pass
313
+
314
+ # Pattern 2: Try with string user_id - no status filter
315
+ try:
316
+ query_str = {"createdBy": user_id}
317
+ budgets_str = list(self.db.budgets.find(query_str))
318
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
319
+ if budgets_str:
320
+ budgets.extend(budgets_str)
321
+ except Exception as e:
322
+ print(f"Pattern 2 failed: {e}")
323
+ pass
324
+
325
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
326
+ try:
327
+ query_userid = {"user_id": user_id}
328
+ budgets_userid = list(self.db.budgets.find(query_userid))
329
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
330
+ if budgets_userid:
331
+ budgets.extend(budgets_userid)
332
+ except Exception as e:
333
+ print(f"Pattern 3 failed: {e}")
334
+ pass
335
+
336
+ # Pattern 4: Try ObjectId with user_id field - no status filter
337
+ try:
338
+ query_objid_userid = {"user_id": ObjectId(user_id)}
339
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
340
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
341
+ if budgets_objid_userid:
342
+ budgets.extend(budgets_objid_userid)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 4 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
348
+ try:
349
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
350
+ if budget_by_id:
351
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
352
+ created_by = budget_by_id.get("createdBy")
353
+ if created_by:
354
+ # Now find all budgets for this createdBy
355
+ query_by_creator = {"createdBy": created_by}
356
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
357
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
358
+ if budgets_by_creator:
359
+ budgets.extend(budgets_by_creator)
360
+ except (ValueError, TypeError) as e:
361
+ print(f"Pattern 5 failed: {e}")
362
+ pass
363
+
364
+ # Pattern 6: Try finding by budget _id as string
365
+ try:
366
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
367
+ if budget_by_id_str:
368
+ print(f"Pattern 6: Found budget by _id as string")
369
+ budgets.append(budget_by_id_str)
370
+ except Exception as e:
371
+ print(f"Pattern 6 failed: {e}")
372
+ pass
373
+
374
+ # Remove duplicates based on _id
375
+ seen_ids = set()
376
+ unique_budgets = []
377
+ for b in budgets:
378
+ budget_id = str(b.get("_id", ""))
379
+ if budget_id not in seen_ids:
380
+ seen_ids.add(budget_id)
381
+ unique_budgets.append(b)
382
+
383
+ budgets = unique_budgets
384
+
385
+ if not budgets:
386
+ print(f"No budgets found for user_id: {user_id}")
387
+ print(f"Tried all query patterns. Checking sample budget structure...")
388
+ # Get a sample budget to see the structure
389
+ sample = self.db.budgets.find_one()
390
+ if sample:
391
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
392
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
393
+ return {}
394
+
395
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
396
+
397
+ result: Dict[str, Dict] = {}
398
+ for b in budgets:
399
+ # First, try to extract categories from headCategories array
400
+ head_categories = b.get("headCategories", [])
401
+
402
+ if head_categories and isinstance(head_categories, list):
403
+ # Process nested categories from headCategories
404
+ for head_cat in head_categories:
405
+ if not isinstance(head_cat, dict):
406
+ continue
407
+
408
+ # Get headCategory ID and amounts
409
+ head_cat_id = head_cat.get("headCategory")
410
+ try:
411
+ head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
412
+ head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
413
+ except (ValueError, TypeError):
414
+ head_cat_max = 0
415
+ head_cat_spend = 0
416
+
417
+ # Process nested categories within headCategory
418
+ nested_categories = head_cat.get("categories", [])
419
+ if nested_categories and isinstance(nested_categories, list):
420
+ for nested_cat in nested_categories:
421
+ if not isinstance(nested_cat, dict):
422
+ continue
423
+
424
+ nested_cat_id = nested_cat.get("category")
425
+ try:
426
+ nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
427
+ nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
428
+ except (ValueError, TypeError):
429
+ nested_cat_max = 0
430
+ nested_cat_spend = 0
431
+ spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
432
+
433
+ # Only include categories with limits (must have maxAmount > 0)
434
+ if nested_cat_max > 0:
435
+ # Look up actual category name
436
+ nested_category_name = self._get_category_name(nested_cat_id)
437
+ nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
438
+
439
+ if nested_category_name not in result:
440
+ result[nested_category_name] = {
441
+ "average_monthly": nested_base_amount,
442
+ "total": nested_base_amount,
443
+ "count": 1,
444
+ "months_analyzed": 1,
445
+ "std_dev": 0.0,
446
+ "monthly_values": [nested_base_amount],
447
+ }
448
+ else:
449
+ result[nested_category_name]["total"] += nested_base_amount
450
+ result[nested_category_name]["count"] += 1
451
+ result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
452
+ result[nested_category_name]["average_monthly"] = (
453
+ result[nested_category_name]["total"] / result[nested_category_name]["count"]
454
+ )
455
+ result[nested_category_name]["monthly_values"].append(nested_base_amount)
456
+
457
+ # Also include headCategory if it has amounts
458
+ if head_cat_max > 0 or head_cat_spend > 0:
459
+ head_category_name = self._get_category_name(head_cat_id)
460
+ head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
461
+
462
+ if head_category_name not in result:
463
+ result[head_category_name] = {
464
+ "average_monthly": head_base_amount,
465
+ "total": head_base_amount,
466
+ "count": 1,
467
+ "months_analyzed": 1,
468
+ "std_dev": 0.0,
469
+ "monthly_values": [head_base_amount],
470
+ }
471
+ else:
472
+ result[head_category_name]["total"] += head_base_amount
473
+ result[head_category_name]["count"] += 1
474
+ result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
475
+ result[head_category_name]["average_monthly"] = (
476
+ result[head_category_name]["total"] / result[head_category_name]["count"]
477
+ )
478
+ result[head_category_name]["monthly_values"].append(head_base_amount)
479
+
480
+ # Also include the main budget as a category (if it has amounts)
481
+ budget_name = b.get("name", "Uncategorized")
482
+ if not budget_name or budget_name == "Uncategorized":
483
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
484
+
485
+ # Derive a base amount from WalletSync fields
486
+ try:
487
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
488
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
489
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
490
+ except (ValueError, TypeError):
491
+ max_amount = 0
492
+ spend_amount = 0
493
+ budget_amount = 0
494
+
495
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
496
+ if spend_amount > 0:
497
+ base_amount = spend_amount
498
+ elif max_amount > 0:
499
+ base_amount = max_amount
500
+ elif budget_amount > 0:
501
+ base_amount = budget_amount
502
+ else:
503
+ base_amount = 0
504
+
505
+ # Only add main budget if it has an amount and we haven't processed categories
506
+ if base_amount > 0:
507
+ if budget_name not in result:
508
+ result[budget_name] = {
509
+ "average_monthly": base_amount,
510
+ "total": base_amount,
511
+ "count": 1,
512
+ "months_analyzed": 1,
513
+ "std_dev": 0.0,
514
+ "monthly_values": [base_amount],
515
+ }
516
+ else:
517
+ result[budget_name]["total"] += base_amount
518
+ result[budget_name]["count"] += 1
519
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
520
+ result[budget_name]["average_monthly"] = (
521
+ result[budget_name]["total"] / result[budget_name]["count"]
522
+ )
523
+ result[budget_name]["monthly_values"].append(base_amount)
524
+
525
+ print(f"Processed {len(result)} budget categories for recommendations")
526
+ return result
527
+
528
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
529
+ """Use OpenAI to refine the budget recommendation."""
530
+ if not OPENAI_API_KEY:
531
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
532
+ return None
533
+
534
+ print(f"🔄 Calling OpenAI API for category: {category}...")
535
+
536
+ # Handle empty monthly_values
537
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
538
+ history = f"{avg_expense:.0f}"
539
+ else:
540
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
541
+
542
+ summary = (
543
+ f"Category: {category}\n"
544
+ f"Monthly totals: [{history}]\n"
545
+ f"Average spend: {avg_expense:.2f}\n"
546
+ f"Std deviation: {data['std_dev']:.2f}\n"
547
+ f"Months observed: {data['months_analyzed']}\n"
548
+ )
549
+
550
+ prompt = (
551
+ "You are an Indian personal finance coach. "
552
+ "Given the user's spending history, decide whether to increase, decrease, "
553
+ "or keep the upcoming month's budget and provide a short explanation. "
554
+ "Respond strictly as JSON with the following keys:\n"
555
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
556
+ "Use rupees for all amounts.\n\n"
557
+ f"{summary}"
558
+ )
559
+
560
+ try:
561
+ response = requests.post(
562
+ "https://api.openai.com/v1/chat/completions",
563
+ headers={
564
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
565
+ "Content-Type": "application/json",
566
+ },
567
+ json={
568
+ "model": "gpt-4o-mini",
569
+ "messages": [
570
+ {"role": "user", "content": prompt}
571
+ ],
572
+ "temperature": 0.1,
573
+ "response_format": {"type": "json_object"},
574
+ },
575
+ timeout=30,
576
+ )
577
+ response.raise_for_status()
578
+ response_data = response.json()
579
+ content = response_data["choices"][0]["message"]["content"]
580
+ return json.loads(content)
581
+ except Exception as exc:
582
+ print(f"OpenAI recommendation error for {category}: {exc}")
583
+ return None
.history/app/smart_recommendation_20251225160914.py ADDED
@@ -0,0 +1,503 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) If there are no budgets, fall back to expenses history
44
+ if not category_data:
45
+ end_date = datetime(year, month, 1) - timedelta(days=1)
46
+ start_date = end_date - timedelta(days=180) # ~6 months
47
+
48
+ expenses = list(
49
+ self.db.expenses.find(
50
+ {
51
+ "user_id": user_id,
52
+ "date": {"$gte": start_date, "$lte": end_date},
53
+ "type": "expense",
54
+ }
55
+ )
56
+ )
57
+
58
+ if not expenses:
59
+ return []
60
+
61
+ # Group expenses by category and calculate monthly averages
62
+ category_data = self._calculate_category_statistics(
63
+ expenses, start_date, end_date
64
+ )
65
+
66
+ recommendations: List[BudgetRecommendation] = []
67
+
68
+ for category, data in category_data.items():
69
+ avg_expense = data["average_monthly"]
70
+ confidence = self._calculate_confidence(data)
71
+
72
+ # Always try OpenAI first (primary source of recommendation)
73
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
74
+ if ai_result and ai_result.get("recommended_budget"):
75
+ recommended_budget = ai_result.get("recommended_budget")
76
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
77
+ action = ai_result.get("action")
78
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
79
+ else:
80
+ # Fallback to rule-based recommendation if OpenAI fails
81
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
82
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
83
+ action = None
84
+ if not ai_result:
85
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
86
+ else:
87
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
+
89
+ recommendations.append(BudgetRecommendation(
90
+ budget_name=category,
91
+ average_expense=round(avg_expense, 2),
92
+ recommended_budget=round(recommended_budget or 0, 2),
93
+ reason=reason,
94
+ confidence=confidence,
95
+ action=action
96
+ ))
97
+
98
+ # Sort by average expense (highest first)
99
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
100
+
101
+ return recommendations
102
+
103
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
104
+ """Calculate statistics for each category"""
105
+ category_data = defaultdict(lambda: {
106
+ "total": 0,
107
+ "count": 0,
108
+ "months": set(),
109
+ "monthly_totals": defaultdict(float)
110
+ })
111
+
112
+ for expense in expenses:
113
+ category = expense.get("category", "Uncategorized")
114
+ amount = expense.get("amount", 0)
115
+ date = expense.get("date")
116
+
117
+ # Handle date conversion - skip if date is None or invalid
118
+ if date is None:
119
+ continue
120
+
121
+ if isinstance(date, str):
122
+ try:
123
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
124
+ except (ValueError, AttributeError):
125
+ continue
126
+ elif not isinstance(date, datetime):
127
+ # If date is not a string or datetime, skip this expense
128
+ continue
129
+
130
+ category_data[category]["total"] += amount
131
+ category_data[category]["count"] += 1
132
+
133
+ # Track monthly totals
134
+ month_key = (date.year, date.month)
135
+ category_data[category]["months"].add(month_key)
136
+ category_data[category]["monthly_totals"][month_key] += amount
137
+
138
+ # Calculate averages
139
+ result = {}
140
+ for category, data in category_data.items():
141
+ num_months = len(data["months"]) or 1
142
+ avg_monthly = data["total"] / num_months
143
+
144
+ # Calculate standard deviation for variability
145
+ monthly_values = list(data["monthly_totals"].values())
146
+ if len(monthly_values) > 1:
147
+ mean = sum(monthly_values) / len(monthly_values)
148
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
149
+ std_dev = math.sqrt(variance)
150
+ else:
151
+ std_dev = 0
152
+
153
+ result[category] = {
154
+ "average_monthly": avg_monthly,
155
+ "total": data["total"],
156
+ "count": data["count"],
157
+ "months_analyzed": num_months,
158
+ "std_dev": std_dev,
159
+ "monthly_values": monthly_values
160
+ }
161
+
162
+ return result
163
+
164
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
165
+ """
166
+ Calculate recommended budget based on average expense.
167
+
168
+ Strategy:
169
+ - Base: Average monthly expense
170
+ - Add 5% buffer for variability
171
+ - Round to nearest 100 for cleaner numbers
172
+ """
173
+ # Add 5% buffer to handle variability
174
+ buffer = avg_expense * 0.05
175
+
176
+ # If there's high variability (std_dev > 20% of mean), add more buffer
177
+ if data["std_dev"] > 0:
178
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
179
+ if coefficient_of_variation > 0.2:
180
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
181
+
182
+ recommended = avg_expense + buffer
183
+
184
+ # Round to nearest 100 for cleaner budget numbers
185
+ recommended = round(recommended / 100) * 100
186
+
187
+ # Ensure minimum of 100 if there was any expense
188
+ if recommended < 100 and avg_expense > 0:
189
+ recommended = 100
190
+
191
+ return recommended
192
+
193
+ def _calculate_confidence(self, data: Dict) -> float:
194
+ """
195
+ Calculate confidence score (0-1) based on data quality.
196
+
197
+ Factors:
198
+ - Number of months analyzed (more = higher confidence)
199
+ - Number of transactions (more = higher confidence)
200
+ - Consistency of spending (lower std_dev = higher confidence)
201
+ """
202
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
203
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
204
+
205
+ # Consistency score (inverse of coefficient of variation)
206
+ if data["average_monthly"] > 0:
207
+ cv = data["std_dev"] / data["average_monthly"]
208
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
209
+ else:
210
+ consistency_score = 0.5
211
+
212
+ # Weighted average
213
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
214
+
215
+ return round(confidence, 2)
216
+
217
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
218
+ """Generate human-readable reason for the recommendation"""
219
+ # Format amounts with currency symbol
220
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
221
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
222
+
223
+ if recommended_budget > avg_expense:
224
+ buffer = recommended_budget - avg_expense
225
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
226
+ return (
227
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
228
+ f"We suggest setting your budget to {budget_formatted} for next month "
229
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
230
+ )
231
+ else:
232
+ return (
233
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
234
+ f"We recommend a budget of {budget_formatted} for next month."
235
+ )
236
+
237
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
238
+ """Get average expenses by category for the past N months"""
239
+ end_date = datetime.now()
240
+ start_date = end_date - timedelta(days=months * 30)
241
+
242
+ expenses = list(self.db.expenses.find({
243
+ "user_id": user_id,
244
+ "date": {"$gte": start_date, "$lte": end_date},
245
+ "type": "expense"
246
+ }))
247
+
248
+ if not expenses:
249
+ return []
250
+
251
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
252
+
253
+ result = []
254
+ for category, data in category_data.items():
255
+ result.append(CategoryExpense(
256
+ category=category,
257
+ average_monthly_expense=round(data["average_monthly"], 2),
258
+ total_expenses=data["count"],
259
+ months_analyzed=data["months_analyzed"]
260
+ ))
261
+
262
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
263
+ return result
264
+
265
+ def _get_category_name(self, category_id) -> str:
266
+ """Look up category name from categories collection"""
267
+ if not category_id:
268
+ return "Uncategorized"
269
+
270
+ try:
271
+ # Try to find category in categories collection
272
+ if isinstance(category_id, ObjectId):
273
+ category_doc = self.db.categories.find_one({"_id": category_id})
274
+ else:
275
+ try:
276
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
277
+ except (ValueError, TypeError):
278
+ category_doc = self.db.categories.find_one({"_id": category_id})
279
+
280
+ if category_doc:
281
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
282
+ except Exception as e:
283
+ print(f"Error looking up category name for {category_id}: {e}")
284
+ pass
285
+
286
+ return str(category_id) if category_id else "Uncategorized"
287
+
288
+ def _get_category_stats_from_budgets(
289
+ self, user_id: str, month: int, year: int
290
+ ) -> Dict:
291
+ """
292
+ Build category stats from existing budgets for this user.
293
+
294
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
295
+ as a spending category and derive an \"average\" from its amounts.
296
+ Also extracts categories from headCategories array.
297
+ """
298
+ budgets = []
299
+
300
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
301
+
302
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
303
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
304
+ try:
305
+ query_objid = {"createdBy": ObjectId(user_id)}
306
+ budgets_objid = list(self.db.budgets.find(query_objid))
307
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
308
+ if budgets_objid:
309
+ budgets.extend(budgets_objid)
310
+ except (ValueError, TypeError) as e:
311
+ print(f"Pattern 1 failed: {e}")
312
+ pass
313
+
314
+ # Pattern 2: Try with string user_id - no status filter
315
+ try:
316
+ query_str = {"createdBy": user_id}
317
+ budgets_str = list(self.db.budgets.find(query_str))
318
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
319
+ if budgets_str:
320
+ budgets.extend(budgets_str)
321
+ except Exception as e:
322
+ print(f"Pattern 2 failed: {e}")
323
+ pass
324
+
325
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
326
+ try:
327
+ query_userid = {"user_id": user_id}
328
+ budgets_userid = list(self.db.budgets.find(query_userid))
329
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
330
+ if budgets_userid:
331
+ budgets.extend(budgets_userid)
332
+ except Exception as e:
333
+ print(f"Pattern 3 failed: {e}")
334
+ pass
335
+
336
+ # Pattern 4: Try ObjectId with user_id field - no status filter
337
+ try:
338
+ query_objid_userid = {"user_id": ObjectId(user_id)}
339
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
340
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
341
+ if budgets_objid_userid:
342
+ budgets.extend(budgets_objid_userid)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 4 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
348
+ try:
349
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
350
+ if budget_by_id:
351
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
352
+ created_by = budget_by_id.get("createdBy")
353
+ if created_by:
354
+ # Now find all budgets for this createdBy
355
+ query_by_creator = {"createdBy": created_by}
356
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
357
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
358
+ if budgets_by_creator:
359
+ budgets.extend(budgets_by_creator)
360
+ except (ValueError, TypeError) as e:
361
+ print(f"Pattern 5 failed: {e}")
362
+ pass
363
+
364
+ # Pattern 6: Try finding by budget _id as string
365
+ try:
366
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
367
+ if budget_by_id_str:
368
+ print(f"Pattern 6: Found budget by _id as string")
369
+ budgets.append(budget_by_id_str)
370
+ except Exception as e:
371
+ print(f"Pattern 6 failed: {e}")
372
+ pass
373
+
374
+ # Remove duplicates based on _id
375
+ seen_ids = set()
376
+ unique_budgets = []
377
+ for b in budgets:
378
+ budget_id = str(b.get("_id", ""))
379
+ if budget_id not in seen_ids:
380
+ seen_ids.add(budget_id)
381
+ unique_budgets.append(b)
382
+
383
+ budgets = unique_budgets
384
+
385
+ if not budgets:
386
+ print(f"No budgets found for user_id: {user_id}")
387
+ print(f"Tried all query patterns. Checking sample budget structure...")
388
+ # Get a sample budget to see the structure
389
+ sample = self.db.budgets.find_one()
390
+ if sample:
391
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
392
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
393
+ return {}
394
+
395
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
396
+
397
+ result: Dict[str, Dict] = {}
398
+ for b in budgets:
399
+ # Only use the main budget name - don't extract nested categories from headCategories
400
+ # This ensures we only return recommendations for budgets the user actually created
401
+ budget_name = b.get("name", "Uncategorized")
402
+ if not budget_name or budget_name == "Uncategorized":
403
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
404
+
405
+ # Derive a base amount from WalletSync fields
406
+ try:
407
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
408
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
409
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
410
+ except (ValueError, TypeError):
411
+ max_amount = 0
412
+ spend_amount = 0
413
+ budget_amount = 0
414
+
415
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
416
+ if spend_amount > 0:
417
+ base_amount = spend_amount
418
+ elif max_amount > 0:
419
+ base_amount = max_amount
420
+ elif budget_amount > 0:
421
+ base_amount = budget_amount
422
+ else:
423
+ base_amount = 0
424
+
425
+ # Only add main budget if it has an amount and we haven't processed categories
426
+ if base_amount > 0:
427
+ if budget_name not in result:
428
+ result[budget_name] = {
429
+ "average_monthly": base_amount,
430
+ "total": base_amount,
431
+ "count": 1,
432
+ "months_analyzed": 1,
433
+ "std_dev": 0.0,
434
+ "monthly_values": [base_amount],
435
+ }
436
+ else:
437
+ result[budget_name]["total"] += base_amount
438
+ result[budget_name]["count"] += 1
439
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
440
+ result[budget_name]["average_monthly"] = (
441
+ result[budget_name]["total"] / result[budget_name]["count"]
442
+ )
443
+ result[budget_name]["monthly_values"].append(base_amount)
444
+
445
+ print(f"Processed {len(result)} budget categories for recommendations")
446
+ return result
447
+
448
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
449
+ """Use OpenAI to refine the budget recommendation."""
450
+ if not OPENAI_API_KEY:
451
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
452
+ return None
453
+
454
+ print(f"🔄 Calling OpenAI API for category: {category}...")
455
+
456
+ # Handle empty monthly_values
457
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
458
+ history = f"{avg_expense:.0f}"
459
+ else:
460
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
461
+
462
+ summary = (
463
+ f"Category: {category}\n"
464
+ f"Monthly totals: [{history}]\n"
465
+ f"Average spend: {avg_expense:.2f}\n"
466
+ f"Std deviation: {data['std_dev']:.2f}\n"
467
+ f"Months observed: {data['months_analyzed']}\n"
468
+ )
469
+
470
+ prompt = (
471
+ "You are an Indian personal finance coach. "
472
+ "Given the user's spending history, decide whether to increase, decrease, "
473
+ "or keep the upcoming month's budget and provide a short explanation. "
474
+ "Respond strictly as JSON with the following keys:\n"
475
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
476
+ "Use rupees for all amounts.\n\n"
477
+ f"{summary}"
478
+ )
479
+
480
+ try:
481
+ response = requests.post(
482
+ "https://api.openai.com/v1/chat/completions",
483
+ headers={
484
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
485
+ "Content-Type": "application/json",
486
+ },
487
+ json={
488
+ "model": "gpt-4o-mini",
489
+ "messages": [
490
+ {"role": "user", "content": prompt}
491
+ ],
492
+ "temperature": 0.1,
493
+ "response_format": {"type": "json_object"},
494
+ },
495
+ timeout=30,
496
+ )
497
+ response.raise_for_status()
498
+ response_data = response.json()
499
+ content = response_data["choices"][0]["message"]["content"]
500
+ return json.loads(content)
501
+ except Exception as exc:
502
+ print(f"OpenAI recommendation error for {category}: {exc}")
503
+ return None
.history/app/smart_recommendation_20251225161000.py ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) If there are no budgets, fall back to expenses history
44
+ if not category_data:
45
+ end_date = datetime(year, month, 1) - timedelta(days=1)
46
+ start_date = end_date - timedelta(days=180) # ~6 months
47
+
48
+ expenses = list(
49
+ self.db.expenses.find(
50
+ {
51
+ "user_id": user_id,
52
+ "date": {"$gte": start_date, "$lte": end_date},
53
+ "type": "expense",
54
+ }
55
+ )
56
+ )
57
+
58
+ if not expenses:
59
+ return []
60
+
61
+ # Group expenses by category and calculate monthly averages
62
+ category_data = self._calculate_category_statistics(
63
+ expenses, start_date, end_date
64
+ )
65
+
66
+ recommendations: List[BudgetRecommendation] = []
67
+
68
+ for category, data in category_data.items():
69
+ avg_expense = data["average_monthly"]
70
+ confidence = self._calculate_confidence(data)
71
+
72
+ # Always try OpenAI first (primary source of recommendation)
73
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
74
+ if ai_result and ai_result.get("recommended_budget"):
75
+ recommended_budget = ai_result.get("recommended_budget")
76
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
77
+ action = ai_result.get("action")
78
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
79
+ else:
80
+ # Fallback to rule-based recommendation if OpenAI fails
81
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
82
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
83
+ action = None
84
+ if not ai_result:
85
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
86
+ else:
87
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
+
89
+ recommendations.append(BudgetRecommendation(
90
+ budget_name=category,
91
+ average_expense=round(avg_expense, 2),
92
+ recommended_budget=round(recommended_budget or 0, 2),
93
+ reason=reason,
94
+ confidence=confidence,
95
+ action=action
96
+ ))
97
+
98
+ # Sort by average expense (highest first)
99
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
100
+
101
+ return recommendations
102
+
103
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
104
+ """Calculate statistics for each category"""
105
+ category_data = defaultdict(lambda: {
106
+ "total": 0,
107
+ "count": 0,
108
+ "months": set(),
109
+ "monthly_totals": defaultdict(float)
110
+ })
111
+
112
+ for expense in expenses:
113
+ category = expense.get("category", "Uncategorized")
114
+ amount = expense.get("amount", 0)
115
+ date = expense.get("date")
116
+
117
+ # Handle date conversion - skip if date is None or invalid
118
+ if date is None:
119
+ continue
120
+
121
+ if isinstance(date, str):
122
+ try:
123
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
124
+ except (ValueError, AttributeError):
125
+ continue
126
+ elif not isinstance(date, datetime):
127
+ # If date is not a string or datetime, skip this expense
128
+ continue
129
+
130
+ category_data[category]["total"] += amount
131
+ category_data[category]["count"] += 1
132
+
133
+ # Track monthly totals
134
+ month_key = (date.year, date.month)
135
+ category_data[category]["months"].add(month_key)
136
+ category_data[category]["monthly_totals"][month_key] += amount
137
+
138
+ # Calculate averages
139
+ result = {}
140
+ for category, data in category_data.items():
141
+ num_months = len(data["months"]) or 1
142
+ avg_monthly = data["total"] / num_months
143
+
144
+ # Calculate standard deviation for variability
145
+ monthly_values = list(data["monthly_totals"].values())
146
+ if len(monthly_values) > 1:
147
+ mean = sum(monthly_values) / len(monthly_values)
148
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
149
+ std_dev = math.sqrt(variance)
150
+ else:
151
+ std_dev = 0
152
+
153
+ result[category] = {
154
+ "average_monthly": avg_monthly,
155
+ "total": data["total"],
156
+ "count": data["count"],
157
+ "months_analyzed": num_months,
158
+ "std_dev": std_dev,
159
+ "monthly_values": monthly_values
160
+ }
161
+
162
+ return result
163
+
164
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
165
+ """
166
+ Calculate recommended budget based on average expense.
167
+
168
+ Strategy:
169
+ - Base: Average monthly expense
170
+ - Add 5% buffer for variability
171
+ - Round to nearest 100 for cleaner numbers
172
+ """
173
+ # Add 5% buffer to handle variability
174
+ buffer = avg_expense * 0.05
175
+
176
+ # If there's high variability (std_dev > 20% of mean), add more buffer
177
+ if data["std_dev"] > 0:
178
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
179
+ if coefficient_of_variation > 0.2:
180
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
181
+
182
+ recommended = avg_expense + buffer
183
+
184
+ # Round to nearest 100 for cleaner budget numbers
185
+ recommended = round(recommended / 100) * 100
186
+
187
+ # Ensure minimum of 100 if there was any expense
188
+ if recommended < 100 and avg_expense > 0:
189
+ recommended = 100
190
+
191
+ return recommended
192
+
193
+ def _calculate_confidence(self, data: Dict) -> float:
194
+ """
195
+ Calculate confidence score (0-1) based on data quality.
196
+
197
+ Factors:
198
+ - Number of months analyzed (more = higher confidence)
199
+ - Number of transactions (more = higher confidence)
200
+ - Consistency of spending (lower std_dev = higher confidence)
201
+ """
202
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
203
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
204
+
205
+ # Consistency score (inverse of coefficient of variation)
206
+ if data["average_monthly"] > 0:
207
+ cv = data["std_dev"] / data["average_monthly"]
208
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
209
+ else:
210
+ consistency_score = 0.5
211
+
212
+ # Weighted average
213
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
214
+
215
+ return round(confidence, 2)
216
+
217
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
218
+ """Generate human-readable reason for the recommendation"""
219
+ # Format amounts with currency symbol
220
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
221
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
222
+
223
+ if recommended_budget > avg_expense:
224
+ buffer = recommended_budget - avg_expense
225
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
226
+ return (
227
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
228
+ f"We suggest setting your budget to {budget_formatted} for next month "
229
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
230
+ )
231
+ else:
232
+ return (
233
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
234
+ f"We recommend a budget of {budget_formatted} for next month."
235
+ )
236
+
237
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
238
+ """Get average expenses by category for the past N months"""
239
+ end_date = datetime.now()
240
+ start_date = end_date - timedelta(days=months * 30)
241
+
242
+ expenses = list(self.db.expenses.find({
243
+ "user_id": user_id,
244
+ "date": {"$gte": start_date, "$lte": end_date},
245
+ "type": "expense"
246
+ }))
247
+
248
+ if not expenses:
249
+ return []
250
+
251
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
252
+
253
+ result = []
254
+ for category, data in category_data.items():
255
+ result.append(CategoryExpense(
256
+ category=category,
257
+ average_monthly_expense=round(data["average_monthly"], 2),
258
+ total_expenses=data["count"],
259
+ months_analyzed=data["months_analyzed"]
260
+ ))
261
+
262
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
263
+ return result
264
+
265
+ def _get_category_name(self, category_id) -> str:
266
+ """Look up category name from categories collection"""
267
+ if not category_id:
268
+ return "Uncategorized"
269
+
270
+ try:
271
+ # Try to find category in categories collection
272
+ if isinstance(category_id, ObjectId):
273
+ category_doc = self.db.categories.find_one({"_id": category_id})
274
+ else:
275
+ try:
276
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
277
+ except (ValueError, TypeError):
278
+ category_doc = self.db.categories.find_one({"_id": category_id})
279
+
280
+ if category_doc:
281
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
282
+ except Exception as e:
283
+ print(f"Error looking up category name for {category_id}: {e}")
284
+ pass
285
+
286
+ return str(category_id) if category_id else "Uncategorized"
287
+
288
+ def _get_category_stats_from_budgets(
289
+ self, user_id: str, month: int, year: int
290
+ ) -> Dict:
291
+ """
292
+ Build category stats from existing budgets for this user.
293
+
294
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
295
+ as a spending category and derive an \"average\" from its amounts.
296
+ Also extracts categories from headCategories array.
297
+ """
298
+ budgets = []
299
+
300
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
301
+
302
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
303
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
304
+ try:
305
+ query_objid = {"createdBy": ObjectId(user_id)}
306
+ budgets_objid = list(self.db.budgets.find(query_objid))
307
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
308
+ if budgets_objid:
309
+ budgets.extend(budgets_objid)
310
+ except (ValueError, TypeError) as e:
311
+ print(f"Pattern 1 failed: {e}")
312
+ pass
313
+
314
+ # Pattern 2: Try with string user_id - no status filter
315
+ try:
316
+ query_str = {"createdBy": user_id}
317
+ budgets_str = list(self.db.budgets.find(query_str))
318
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
319
+ if budgets_str:
320
+ budgets.extend(budgets_str)
321
+ except Exception as e:
322
+ print(f"Pattern 2 failed: {e}")
323
+ pass
324
+
325
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
326
+ try:
327
+ query_userid = {"user_id": user_id}
328
+ budgets_userid = list(self.db.budgets.find(query_userid))
329
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
330
+ if budgets_userid:
331
+ budgets.extend(budgets_userid)
332
+ except Exception as e:
333
+ print(f"Pattern 3 failed: {e}")
334
+ pass
335
+
336
+ # Pattern 4: Try ObjectId with user_id field - no status filter
337
+ try:
338
+ query_objid_userid = {"user_id": ObjectId(user_id)}
339
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
340
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
341
+ if budgets_objid_userid:
342
+ budgets.extend(budgets_objid_userid)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 4 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
348
+ try:
349
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
350
+ if budget_by_id:
351
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
352
+ created_by = budget_by_id.get("createdBy")
353
+ if created_by:
354
+ # Now find all budgets for this createdBy
355
+ query_by_creator = {"createdBy": created_by}
356
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
357
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
358
+ if budgets_by_creator:
359
+ budgets.extend(budgets_by_creator)
360
+ except (ValueError, TypeError) as e:
361
+ print(f"Pattern 5 failed: {e}")
362
+ pass
363
+
364
+ # Pattern 6: Try finding by budget _id as string
365
+ try:
366
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
367
+ if budget_by_id_str:
368
+ print(f"Pattern 6: Found budget by _id as string")
369
+ budgets.append(budget_by_id_str)
370
+ except Exception as e:
371
+ print(f"Pattern 6 failed: {e}")
372
+ pass
373
+
374
+ # Remove duplicates based on _id
375
+ seen_ids = set()
376
+ unique_budgets = []
377
+ for b in budgets:
378
+ budget_id = str(b.get("_id", ""))
379
+ if budget_id not in seen_ids:
380
+ seen_ids.add(budget_id)
381
+ unique_budgets.append(b)
382
+
383
+ budgets = unique_budgets
384
+
385
+ if not budgets:
386
+ print(f"No budgets found for user_id: {user_id}")
387
+ print(f"Tried all query patterns. Checking sample budget structure...")
388
+ # Get a sample budget to see the structure
389
+ sample = self.db.budgets.find_one()
390
+ if sample:
391
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
392
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
393
+ return {}
394
+
395
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
396
+
397
+ result: Dict[str, Dict] = {}
398
+ for b in budgets:
399
+ # Only use the main budget name - don't extract nested categories from headCategories
400
+ # This ensures we only return recommendations for budgets the user actually created
401
+ budget_name = b.get("name", "Uncategorized")
402
+ if not budget_name or budget_name == "Uncategorized":
403
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
404
+
405
+ # Skip if budget name is still Uncategorized or empty
406
+ if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
407
+ print(f"Skipping budget with invalid name: {b.get('_id')}")
408
+ continue
409
+
410
+ # Derive a base amount from WalletSync fields
411
+ try:
412
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
413
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
414
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
415
+ except (ValueError, TypeError):
416
+ max_amount = 0
417
+ spend_amount = 0
418
+ budget_amount = 0
419
+
420
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
421
+ if spend_amount > 0:
422
+ base_amount = spend_amount
423
+ elif max_amount > 0:
424
+ base_amount = max_amount
425
+ elif budget_amount > 0:
426
+ base_amount = budget_amount
427
+ else:
428
+ base_amount = 0
429
+
430
+ # Only add main budget if it has an amount and we haven't processed categories
431
+ if base_amount > 0:
432
+ if budget_name not in result:
433
+ result[budget_name] = {
434
+ "average_monthly": base_amount,
435
+ "total": base_amount,
436
+ "count": 1,
437
+ "months_analyzed": 1,
438
+ "std_dev": 0.0,
439
+ "monthly_values": [base_amount],
440
+ }
441
+ else:
442
+ result[budget_name]["total"] += base_amount
443
+ result[budget_name]["count"] += 1
444
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
445
+ result[budget_name]["average_monthly"] = (
446
+ result[budget_name]["total"] / result[budget_name]["count"]
447
+ )
448
+ result[budget_name]["monthly_values"].append(base_amount)
449
+
450
+ print(f"Processed {len(result)} budget categories for recommendations")
451
+ return result
452
+
453
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
454
+ """Use OpenAI to refine the budget recommendation."""
455
+ if not OPENAI_API_KEY:
456
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
457
+ return None
458
+
459
+ print(f"🔄 Calling OpenAI API for category: {category}...")
460
+
461
+ # Handle empty monthly_values
462
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
463
+ history = f"{avg_expense:.0f}"
464
+ else:
465
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
466
+
467
+ summary = (
468
+ f"Category: {category}\n"
469
+ f"Monthly totals: [{history}]\n"
470
+ f"Average spend: {avg_expense:.2f}\n"
471
+ f"Std deviation: {data['std_dev']:.2f}\n"
472
+ f"Months observed: {data['months_analyzed']}\n"
473
+ )
474
+
475
+ prompt = (
476
+ "You are an Indian personal finance coach. "
477
+ "Given the user's spending history, decide whether to increase, decrease, "
478
+ "or keep the upcoming month's budget and provide a short explanation. "
479
+ "Respond strictly as JSON with the following keys:\n"
480
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
481
+ "Use rupees for all amounts.\n\n"
482
+ f"{summary}"
483
+ )
484
+
485
+ try:
486
+ response = requests.post(
487
+ "https://api.openai.com/v1/chat/completions",
488
+ headers={
489
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
490
+ "Content-Type": "application/json",
491
+ },
492
+ json={
493
+ "model": "gpt-4o-mini",
494
+ "messages": [
495
+ {"role": "user", "content": prompt}
496
+ ],
497
+ "temperature": 0.1,
498
+ "response_format": {"type": "json_object"},
499
+ },
500
+ timeout=30,
501
+ )
502
+ response.raise_for_status()
503
+ response_data = response.json()
504
+ content = response_data["choices"][0]["message"]["content"]
505
+ return json.loads(content)
506
+ except Exception as exc:
507
+ print(f"OpenAI recommendation error for {category}: {exc}")
508
+ return None
.history/app/smart_recommendation_20251225161022.py ADDED
@@ -0,0 +1,511 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) Only return recommendations for actual budgets - do NOT use expenses history
44
+ # This ensures we only show recommendations for budgets the user actually created
45
+ if not category_data:
46
+ print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
47
+ return []
48
+ end_date = datetime(year, month, 1) - timedelta(days=1)
49
+ start_date = end_date - timedelta(days=180) # ~6 months
50
+
51
+ expenses = list(
52
+ self.db.expenses.find(
53
+ {
54
+ "user_id": user_id,
55
+ "date": {"$gte": start_date, "$lte": end_date},
56
+ "type": "expense",
57
+ }
58
+ )
59
+ )
60
+
61
+ if not expenses:
62
+ return []
63
+
64
+ # Group expenses by category and calculate monthly averages
65
+ category_data = self._calculate_category_statistics(
66
+ expenses, start_date, end_date
67
+ )
68
+
69
+ recommendations: List[BudgetRecommendation] = []
70
+
71
+ for category, data in category_data.items():
72
+ avg_expense = data["average_monthly"]
73
+ confidence = self._calculate_confidence(data)
74
+
75
+ # Always try OpenAI first (primary source of recommendation)
76
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
77
+ if ai_result and ai_result.get("recommended_budget"):
78
+ recommended_budget = ai_result.get("recommended_budget")
79
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
80
+ action = ai_result.get("action")
81
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
82
+ else:
83
+ # Fallback to rule-based recommendation if OpenAI fails
84
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
85
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
86
+ action = None
87
+ if not ai_result:
88
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
89
+ else:
90
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
91
+
92
+ recommendations.append(BudgetRecommendation(
93
+ budget_name=category,
94
+ average_expense=round(avg_expense, 2),
95
+ recommended_budget=round(recommended_budget or 0, 2),
96
+ reason=reason,
97
+ confidence=confidence,
98
+ action=action
99
+ ))
100
+
101
+ # Sort by average expense (highest first)
102
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
103
+
104
+ return recommendations
105
+
106
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
107
+ """Calculate statistics for each category"""
108
+ category_data = defaultdict(lambda: {
109
+ "total": 0,
110
+ "count": 0,
111
+ "months": set(),
112
+ "monthly_totals": defaultdict(float)
113
+ })
114
+
115
+ for expense in expenses:
116
+ category = expense.get("category", "Uncategorized")
117
+ amount = expense.get("amount", 0)
118
+ date = expense.get("date")
119
+
120
+ # Handle date conversion - skip if date is None or invalid
121
+ if date is None:
122
+ continue
123
+
124
+ if isinstance(date, str):
125
+ try:
126
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
127
+ except (ValueError, AttributeError):
128
+ continue
129
+ elif not isinstance(date, datetime):
130
+ # If date is not a string or datetime, skip this expense
131
+ continue
132
+
133
+ category_data[category]["total"] += amount
134
+ category_data[category]["count"] += 1
135
+
136
+ # Track monthly totals
137
+ month_key = (date.year, date.month)
138
+ category_data[category]["months"].add(month_key)
139
+ category_data[category]["monthly_totals"][month_key] += amount
140
+
141
+ # Calculate averages
142
+ result = {}
143
+ for category, data in category_data.items():
144
+ num_months = len(data["months"]) or 1
145
+ avg_monthly = data["total"] / num_months
146
+
147
+ # Calculate standard deviation for variability
148
+ monthly_values = list(data["monthly_totals"].values())
149
+ if len(monthly_values) > 1:
150
+ mean = sum(monthly_values) / len(monthly_values)
151
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
152
+ std_dev = math.sqrt(variance)
153
+ else:
154
+ std_dev = 0
155
+
156
+ result[category] = {
157
+ "average_monthly": avg_monthly,
158
+ "total": data["total"],
159
+ "count": data["count"],
160
+ "months_analyzed": num_months,
161
+ "std_dev": std_dev,
162
+ "monthly_values": monthly_values
163
+ }
164
+
165
+ return result
166
+
167
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
168
+ """
169
+ Calculate recommended budget based on average expense.
170
+
171
+ Strategy:
172
+ - Base: Average monthly expense
173
+ - Add 5% buffer for variability
174
+ - Round to nearest 100 for cleaner numbers
175
+ """
176
+ # Add 5% buffer to handle variability
177
+ buffer = avg_expense * 0.05
178
+
179
+ # If there's high variability (std_dev > 20% of mean), add more buffer
180
+ if data["std_dev"] > 0:
181
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
182
+ if coefficient_of_variation > 0.2:
183
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
184
+
185
+ recommended = avg_expense + buffer
186
+
187
+ # Round to nearest 100 for cleaner budget numbers
188
+ recommended = round(recommended / 100) * 100
189
+
190
+ # Ensure minimum of 100 if there was any expense
191
+ if recommended < 100 and avg_expense > 0:
192
+ recommended = 100
193
+
194
+ return recommended
195
+
196
+ def _calculate_confidence(self, data: Dict) -> float:
197
+ """
198
+ Calculate confidence score (0-1) based on data quality.
199
+
200
+ Factors:
201
+ - Number of months analyzed (more = higher confidence)
202
+ - Number of transactions (more = higher confidence)
203
+ - Consistency of spending (lower std_dev = higher confidence)
204
+ """
205
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
206
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
207
+
208
+ # Consistency score (inverse of coefficient of variation)
209
+ if data["average_monthly"] > 0:
210
+ cv = data["std_dev"] / data["average_monthly"]
211
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
212
+ else:
213
+ consistency_score = 0.5
214
+
215
+ # Weighted average
216
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
217
+
218
+ return round(confidence, 2)
219
+
220
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
221
+ """Generate human-readable reason for the recommendation"""
222
+ # Format amounts with currency symbol
223
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
224
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
225
+
226
+ if recommended_budget > avg_expense:
227
+ buffer = recommended_budget - avg_expense
228
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
229
+ return (
230
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
231
+ f"We suggest setting your budget to {budget_formatted} for next month "
232
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
233
+ )
234
+ else:
235
+ return (
236
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
237
+ f"We recommend a budget of {budget_formatted} for next month."
238
+ )
239
+
240
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
241
+ """Get average expenses by category for the past N months"""
242
+ end_date = datetime.now()
243
+ start_date = end_date - timedelta(days=months * 30)
244
+
245
+ expenses = list(self.db.expenses.find({
246
+ "user_id": user_id,
247
+ "date": {"$gte": start_date, "$lte": end_date},
248
+ "type": "expense"
249
+ }))
250
+
251
+ if not expenses:
252
+ return []
253
+
254
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
255
+
256
+ result = []
257
+ for category, data in category_data.items():
258
+ result.append(CategoryExpense(
259
+ category=category,
260
+ average_monthly_expense=round(data["average_monthly"], 2),
261
+ total_expenses=data["count"],
262
+ months_analyzed=data["months_analyzed"]
263
+ ))
264
+
265
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
266
+ return result
267
+
268
+ def _get_category_name(self, category_id) -> str:
269
+ """Look up category name from categories collection"""
270
+ if not category_id:
271
+ return "Uncategorized"
272
+
273
+ try:
274
+ # Try to find category in categories collection
275
+ if isinstance(category_id, ObjectId):
276
+ category_doc = self.db.categories.find_one({"_id": category_id})
277
+ else:
278
+ try:
279
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
280
+ except (ValueError, TypeError):
281
+ category_doc = self.db.categories.find_one({"_id": category_id})
282
+
283
+ if category_doc:
284
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
285
+ except Exception as e:
286
+ print(f"Error looking up category name for {category_id}: {e}")
287
+ pass
288
+
289
+ return str(category_id) if category_id else "Uncategorized"
290
+
291
+ def _get_category_stats_from_budgets(
292
+ self, user_id: str, month: int, year: int
293
+ ) -> Dict:
294
+ """
295
+ Build category stats from existing budgets for this user.
296
+
297
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
298
+ as a spending category and derive an \"average\" from its amounts.
299
+ Also extracts categories from headCategories array.
300
+ """
301
+ budgets = []
302
+
303
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
304
+
305
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
306
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
307
+ try:
308
+ query_objid = {"createdBy": ObjectId(user_id)}
309
+ budgets_objid = list(self.db.budgets.find(query_objid))
310
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
311
+ if budgets_objid:
312
+ budgets.extend(budgets_objid)
313
+ except (ValueError, TypeError) as e:
314
+ print(f"Pattern 1 failed: {e}")
315
+ pass
316
+
317
+ # Pattern 2: Try with string user_id - no status filter
318
+ try:
319
+ query_str = {"createdBy": user_id}
320
+ budgets_str = list(self.db.budgets.find(query_str))
321
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
322
+ if budgets_str:
323
+ budgets.extend(budgets_str)
324
+ except Exception as e:
325
+ print(f"Pattern 2 failed: {e}")
326
+ pass
327
+
328
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
329
+ try:
330
+ query_userid = {"user_id": user_id}
331
+ budgets_userid = list(self.db.budgets.find(query_userid))
332
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
333
+ if budgets_userid:
334
+ budgets.extend(budgets_userid)
335
+ except Exception as e:
336
+ print(f"Pattern 3 failed: {e}")
337
+ pass
338
+
339
+ # Pattern 4: Try ObjectId with user_id field - no status filter
340
+ try:
341
+ query_objid_userid = {"user_id": ObjectId(user_id)}
342
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
343
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
344
+ if budgets_objid_userid:
345
+ budgets.extend(budgets_objid_userid)
346
+ except (ValueError, TypeError) as e:
347
+ print(f"Pattern 4 failed: {e}")
348
+ pass
349
+
350
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
351
+ try:
352
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
353
+ if budget_by_id:
354
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
355
+ created_by = budget_by_id.get("createdBy")
356
+ if created_by:
357
+ # Now find all budgets for this createdBy
358
+ query_by_creator = {"createdBy": created_by}
359
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
360
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
361
+ if budgets_by_creator:
362
+ budgets.extend(budgets_by_creator)
363
+ except (ValueError, TypeError) as e:
364
+ print(f"Pattern 5 failed: {e}")
365
+ pass
366
+
367
+ # Pattern 6: Try finding by budget _id as string
368
+ try:
369
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
370
+ if budget_by_id_str:
371
+ print(f"Pattern 6: Found budget by _id as string")
372
+ budgets.append(budget_by_id_str)
373
+ except Exception as e:
374
+ print(f"Pattern 6 failed: {e}")
375
+ pass
376
+
377
+ # Remove duplicates based on _id
378
+ seen_ids = set()
379
+ unique_budgets = []
380
+ for b in budgets:
381
+ budget_id = str(b.get("_id", ""))
382
+ if budget_id not in seen_ids:
383
+ seen_ids.add(budget_id)
384
+ unique_budgets.append(b)
385
+
386
+ budgets = unique_budgets
387
+
388
+ if not budgets:
389
+ print(f"No budgets found for user_id: {user_id}")
390
+ print(f"Tried all query patterns. Checking sample budget structure...")
391
+ # Get a sample budget to see the structure
392
+ sample = self.db.budgets.find_one()
393
+ if sample:
394
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
395
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
396
+ return {}
397
+
398
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
399
+
400
+ result: Dict[str, Dict] = {}
401
+ for b in budgets:
402
+ # Only use the main budget name - don't extract nested categories from headCategories
403
+ # This ensures we only return recommendations for budgets the user actually created
404
+ budget_name = b.get("name", "Uncategorized")
405
+ if not budget_name or budget_name == "Uncategorized":
406
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
407
+
408
+ # Skip if budget name is still Uncategorized or empty
409
+ if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
410
+ print(f"Skipping budget with invalid name: {b.get('_id')}")
411
+ continue
412
+
413
+ # Derive a base amount from WalletSync fields
414
+ try:
415
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
416
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
417
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
418
+ except (ValueError, TypeError):
419
+ max_amount = 0
420
+ spend_amount = 0
421
+ budget_amount = 0
422
+
423
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
424
+ if spend_amount > 0:
425
+ base_amount = spend_amount
426
+ elif max_amount > 0:
427
+ base_amount = max_amount
428
+ elif budget_amount > 0:
429
+ base_amount = budget_amount
430
+ else:
431
+ base_amount = 0
432
+
433
+ # Only add main budget if it has an amount and we haven't processed categories
434
+ if base_amount > 0:
435
+ if budget_name not in result:
436
+ result[budget_name] = {
437
+ "average_monthly": base_amount,
438
+ "total": base_amount,
439
+ "count": 1,
440
+ "months_analyzed": 1,
441
+ "std_dev": 0.0,
442
+ "monthly_values": [base_amount],
443
+ }
444
+ else:
445
+ result[budget_name]["total"] += base_amount
446
+ result[budget_name]["count"] += 1
447
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
448
+ result[budget_name]["average_monthly"] = (
449
+ result[budget_name]["total"] / result[budget_name]["count"]
450
+ )
451
+ result[budget_name]["monthly_values"].append(base_amount)
452
+
453
+ print(f"Processed {len(result)} budget categories for recommendations")
454
+ return result
455
+
456
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
457
+ """Use OpenAI to refine the budget recommendation."""
458
+ if not OPENAI_API_KEY:
459
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
460
+ return None
461
+
462
+ print(f"🔄 Calling OpenAI API for category: {category}...")
463
+
464
+ # Handle empty monthly_values
465
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
466
+ history = f"{avg_expense:.0f}"
467
+ else:
468
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
469
+
470
+ summary = (
471
+ f"Category: {category}\n"
472
+ f"Monthly totals: [{history}]\n"
473
+ f"Average spend: {avg_expense:.2f}\n"
474
+ f"Std deviation: {data['std_dev']:.2f}\n"
475
+ f"Months observed: {data['months_analyzed']}\n"
476
+ )
477
+
478
+ prompt = (
479
+ "You are an Indian personal finance coach. "
480
+ "Given the user's spending history, decide whether to increase, decrease, "
481
+ "or keep the upcoming month's budget and provide a short explanation. "
482
+ "Respond strictly as JSON with the following keys:\n"
483
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
484
+ "Use rupees for all amounts.\n\n"
485
+ f"{summary}"
486
+ )
487
+
488
+ try:
489
+ response = requests.post(
490
+ "https://api.openai.com/v1/chat/completions",
491
+ headers={
492
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
493
+ "Content-Type": "application/json",
494
+ },
495
+ json={
496
+ "model": "gpt-4o-mini",
497
+ "messages": [
498
+ {"role": "user", "content": prompt}
499
+ ],
500
+ "temperature": 0.1,
501
+ "response_format": {"type": "json_object"},
502
+ },
503
+ timeout=30,
504
+ )
505
+ response.raise_for_status()
506
+ response_data = response.json()
507
+ content = response_data["choices"][0]["message"]["content"]
508
+ return json.loads(content)
509
+ except Exception as exc:
510
+ print(f"OpenAI recommendation error for {category}: {exc}")
511
+ return None
.history/app/smart_recommendation_20251225161052.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) Only return recommendations for actual budgets - do NOT use expenses history
44
+ # This ensures we only show recommendations for budgets the user actually created
45
+ if not category_data:
46
+ print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
47
+ return []
48
+
49
+ recommendations: List[BudgetRecommendation] = []
50
+
51
+ for category, data in category_data.items():
52
+ avg_expense = data["average_monthly"]
53
+ confidence = self._calculate_confidence(data)
54
+
55
+ # Always try OpenAI first (primary source of recommendation)
56
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
57
+ if ai_result and ai_result.get("recommended_budget"):
58
+ recommended_budget = ai_result.get("recommended_budget")
59
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
60
+ action = ai_result.get("action")
61
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
62
+ else:
63
+ # Fallback to rule-based recommendation if OpenAI fails
64
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
65
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
66
+ action = None
67
+ if not ai_result:
68
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
69
+ else:
70
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
71
+
72
+ recommendations.append(BudgetRecommendation(
73
+ budget_name=category,
74
+ average_expense=round(avg_expense, 2),
75
+ recommended_budget=round(recommended_budget or 0, 2),
76
+ reason=reason,
77
+ confidence=confidence,
78
+ action=action
79
+ ))
80
+
81
+ # Sort by average expense (highest first)
82
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
83
+
84
+ return recommendations
85
+
86
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
87
+ """Calculate statistics for each category"""
88
+ category_data = defaultdict(lambda: {
89
+ "total": 0,
90
+ "count": 0,
91
+ "months": set(),
92
+ "monthly_totals": defaultdict(float)
93
+ })
94
+
95
+ for expense in expenses:
96
+ category = expense.get("category", "Uncategorized")
97
+ amount = expense.get("amount", 0)
98
+ date = expense.get("date")
99
+
100
+ # Handle date conversion - skip if date is None or invalid
101
+ if date is None:
102
+ continue
103
+
104
+ if isinstance(date, str):
105
+ try:
106
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
107
+ except (ValueError, AttributeError):
108
+ continue
109
+ elif not isinstance(date, datetime):
110
+ # If date is not a string or datetime, skip this expense
111
+ continue
112
+
113
+ category_data[category]["total"] += amount
114
+ category_data[category]["count"] += 1
115
+
116
+ # Track monthly totals
117
+ month_key = (date.year, date.month)
118
+ category_data[category]["months"].add(month_key)
119
+ category_data[category]["monthly_totals"][month_key] += amount
120
+
121
+ # Calculate averages
122
+ result = {}
123
+ for category, data in category_data.items():
124
+ num_months = len(data["months"]) or 1
125
+ avg_monthly = data["total"] / num_months
126
+
127
+ # Calculate standard deviation for variability
128
+ monthly_values = list(data["monthly_totals"].values())
129
+ if len(monthly_values) > 1:
130
+ mean = sum(monthly_values) / len(monthly_values)
131
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
132
+ std_dev = math.sqrt(variance)
133
+ else:
134
+ std_dev = 0
135
+
136
+ result[category] = {
137
+ "average_monthly": avg_monthly,
138
+ "total": data["total"],
139
+ "count": data["count"],
140
+ "months_analyzed": num_months,
141
+ "std_dev": std_dev,
142
+ "monthly_values": monthly_values
143
+ }
144
+
145
+ return result
146
+
147
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
148
+ """
149
+ Calculate recommended budget based on average expense.
150
+
151
+ Strategy:
152
+ - Base: Average monthly expense
153
+ - Add 5% buffer for variability
154
+ - Round to nearest 100 for cleaner numbers
155
+ """
156
+ # Add 5% buffer to handle variability
157
+ buffer = avg_expense * 0.05
158
+
159
+ # If there's high variability (std_dev > 20% of mean), add more buffer
160
+ if data["std_dev"] > 0:
161
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
162
+ if coefficient_of_variation > 0.2:
163
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
164
+
165
+ recommended = avg_expense + buffer
166
+
167
+ # Round to nearest 100 for cleaner budget numbers
168
+ recommended = round(recommended / 100) * 100
169
+
170
+ # Ensure minimum of 100 if there was any expense
171
+ if recommended < 100 and avg_expense > 0:
172
+ recommended = 100
173
+
174
+ return recommended
175
+
176
+ def _calculate_confidence(self, data: Dict) -> float:
177
+ """
178
+ Calculate confidence score (0-1) based on data quality.
179
+
180
+ Factors:
181
+ - Number of months analyzed (more = higher confidence)
182
+ - Number of transactions (more = higher confidence)
183
+ - Consistency of spending (lower std_dev = higher confidence)
184
+ """
185
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
186
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
187
+
188
+ # Consistency score (inverse of coefficient of variation)
189
+ if data["average_monthly"] > 0:
190
+ cv = data["std_dev"] / data["average_monthly"]
191
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
192
+ else:
193
+ consistency_score = 0.5
194
+
195
+ # Weighted average
196
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
197
+
198
+ return round(confidence, 2)
199
+
200
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
201
+ """Generate human-readable reason for the recommendation"""
202
+ # Format amounts with currency symbol
203
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
204
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
205
+
206
+ if recommended_budget > avg_expense:
207
+ buffer = recommended_budget - avg_expense
208
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
209
+ return (
210
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
211
+ f"We suggest setting your budget to {budget_formatted} for next month "
212
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
213
+ )
214
+ else:
215
+ return (
216
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
217
+ f"We recommend a budget of {budget_formatted} for next month."
218
+ )
219
+
220
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
221
+ """Get average expenses by category for the past N months"""
222
+ end_date = datetime.now()
223
+ start_date = end_date - timedelta(days=months * 30)
224
+
225
+ expenses = list(self.db.expenses.find({
226
+ "user_id": user_id,
227
+ "date": {"$gte": start_date, "$lte": end_date},
228
+ "type": "expense"
229
+ }))
230
+
231
+ if not expenses:
232
+ return []
233
+
234
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
235
+
236
+ result = []
237
+ for category, data in category_data.items():
238
+ result.append(CategoryExpense(
239
+ category=category,
240
+ average_monthly_expense=round(data["average_monthly"], 2),
241
+ total_expenses=data["count"],
242
+ months_analyzed=data["months_analyzed"]
243
+ ))
244
+
245
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
246
+ return result
247
+
248
+ def _get_category_name(self, category_id) -> str:
249
+ """Look up category name from categories collection"""
250
+ if not category_id:
251
+ return "Uncategorized"
252
+
253
+ try:
254
+ # Try to find category in categories collection
255
+ if isinstance(category_id, ObjectId):
256
+ category_doc = self.db.categories.find_one({"_id": category_id})
257
+ else:
258
+ try:
259
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
260
+ except (ValueError, TypeError):
261
+ category_doc = self.db.categories.find_one({"_id": category_id})
262
+
263
+ if category_doc:
264
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
265
+ except Exception as e:
266
+ print(f"Error looking up category name for {category_id}: {e}")
267
+ pass
268
+
269
+ return str(category_id) if category_id else "Uncategorized"
270
+
271
+ def _get_category_stats_from_budgets(
272
+ self, user_id: str, month: int, year: int
273
+ ) -> Dict:
274
+ """
275
+ Build category stats from existing budgets for this user.
276
+
277
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
278
+ as a spending category and derive an \"average\" from its amounts.
279
+ Also extracts categories from headCategories array.
280
+ """
281
+ budgets = []
282
+
283
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
284
+
285
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
286
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
287
+ try:
288
+ query_objid = {"createdBy": ObjectId(user_id)}
289
+ budgets_objid = list(self.db.budgets.find(query_objid))
290
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
291
+ if budgets_objid:
292
+ budgets.extend(budgets_objid)
293
+ except (ValueError, TypeError) as e:
294
+ print(f"Pattern 1 failed: {e}")
295
+ pass
296
+
297
+ # Pattern 2: Try with string user_id - no status filter
298
+ try:
299
+ query_str = {"createdBy": user_id}
300
+ budgets_str = list(self.db.budgets.find(query_str))
301
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
302
+ if budgets_str:
303
+ budgets.extend(budgets_str)
304
+ except Exception as e:
305
+ print(f"Pattern 2 failed: {e}")
306
+ pass
307
+
308
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
309
+ try:
310
+ query_userid = {"user_id": user_id}
311
+ budgets_userid = list(self.db.budgets.find(query_userid))
312
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
313
+ if budgets_userid:
314
+ budgets.extend(budgets_userid)
315
+ except Exception as e:
316
+ print(f"Pattern 3 failed: {e}")
317
+ pass
318
+
319
+ # Pattern 4: Try ObjectId with user_id field - no status filter
320
+ try:
321
+ query_objid_userid = {"user_id": ObjectId(user_id)}
322
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
323
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
324
+ if budgets_objid_userid:
325
+ budgets.extend(budgets_objid_userid)
326
+ except (ValueError, TypeError) as e:
327
+ print(f"Pattern 4 failed: {e}")
328
+ pass
329
+
330
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
331
+ try:
332
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
333
+ if budget_by_id:
334
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
335
+ created_by = budget_by_id.get("createdBy")
336
+ if created_by:
337
+ # Now find all budgets for this createdBy
338
+ query_by_creator = {"createdBy": created_by}
339
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
340
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
341
+ if budgets_by_creator:
342
+ budgets.extend(budgets_by_creator)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 5 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 6: Try finding by budget _id as string
348
+ try:
349
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
350
+ if budget_by_id_str:
351
+ print(f"Pattern 6: Found budget by _id as string")
352
+ budgets.append(budget_by_id_str)
353
+ except Exception as e:
354
+ print(f"Pattern 6 failed: {e}")
355
+ pass
356
+
357
+ # Remove duplicates based on _id
358
+ seen_ids = set()
359
+ unique_budgets = []
360
+ for b in budgets:
361
+ budget_id = str(b.get("_id", ""))
362
+ if budget_id not in seen_ids:
363
+ seen_ids.add(budget_id)
364
+ unique_budgets.append(b)
365
+
366
+ budgets = unique_budgets
367
+
368
+ if not budgets:
369
+ print(f"No budgets found for user_id: {user_id}")
370
+ print(f"Tried all query patterns. Checking sample budget structure...")
371
+ # Get a sample budget to see the structure
372
+ sample = self.db.budgets.find_one()
373
+ if sample:
374
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
375
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
376
+ return {}
377
+
378
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
379
+
380
+ result: Dict[str, Dict] = {}
381
+ for b in budgets:
382
+ # Only use the main budget name - don't extract nested categories from headCategories
383
+ # This ensures we only return recommendations for budgets the user actually created
384
+ budget_name = b.get("name", "Uncategorized")
385
+ if not budget_name or budget_name == "Uncategorized":
386
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
387
+
388
+ # Skip if budget name is still Uncategorized or empty
389
+ if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
390
+ print(f"Skipping budget with invalid name: {b.get('_id')}")
391
+ continue
392
+
393
+ # Derive a base amount from WalletSync fields
394
+ try:
395
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
396
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
397
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
398
+ except (ValueError, TypeError):
399
+ max_amount = 0
400
+ spend_amount = 0
401
+ budget_amount = 0
402
+
403
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
404
+ if spend_amount > 0:
405
+ base_amount = spend_amount
406
+ elif max_amount > 0:
407
+ base_amount = max_amount
408
+ elif budget_amount > 0:
409
+ base_amount = budget_amount
410
+ else:
411
+ base_amount = 0
412
+
413
+ # Only add main budget if it has an amount and we haven't processed categories
414
+ if base_amount > 0:
415
+ if budget_name not in result:
416
+ result[budget_name] = {
417
+ "average_monthly": base_amount,
418
+ "total": base_amount,
419
+ "count": 1,
420
+ "months_analyzed": 1,
421
+ "std_dev": 0.0,
422
+ "monthly_values": [base_amount],
423
+ }
424
+ else:
425
+ result[budget_name]["total"] += base_amount
426
+ result[budget_name]["count"] += 1
427
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
428
+ result[budget_name]["average_monthly"] = (
429
+ result[budget_name]["total"] / result[budget_name]["count"]
430
+ )
431
+ result[budget_name]["monthly_values"].append(base_amount)
432
+
433
+ print(f"Processed {len(result)} budget categories for recommendations")
434
+ return result
435
+
436
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
437
+ """Use OpenAI to refine the budget recommendation."""
438
+ if not OPENAI_API_KEY:
439
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
440
+ return None
441
+
442
+ print(f"🔄 Calling OpenAI API for category: {category}...")
443
+
444
+ # Handle empty monthly_values
445
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
446
+ history = f"{avg_expense:.0f}"
447
+ else:
448
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
449
+
450
+ summary = (
451
+ f"Category: {category}\n"
452
+ f"Monthly totals: [{history}]\n"
453
+ f"Average spend: {avg_expense:.2f}\n"
454
+ f"Std deviation: {data['std_dev']:.2f}\n"
455
+ f"Months observed: {data['months_analyzed']}\n"
456
+ )
457
+
458
+ prompt = (
459
+ "You are an Indian personal finance coach. "
460
+ "Given the user's spending history, decide whether to increase, decrease, "
461
+ "or keep the upcoming month's budget and provide a short explanation. "
462
+ "Respond strictly as JSON with the following keys:\n"
463
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
464
+ "Use rupees for all amounts.\n\n"
465
+ f"{summary}"
466
+ )
467
+
468
+ try:
469
+ response = requests.post(
470
+ "https://api.openai.com/v1/chat/completions",
471
+ headers={
472
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
473
+ "Content-Type": "application/json",
474
+ },
475
+ json={
476
+ "model": "gpt-4o-mini",
477
+ "messages": [
478
+ {"role": "user", "content": prompt}
479
+ ],
480
+ "temperature": 0.1,
481
+ "response_format": {"type": "json_object"},
482
+ },
483
+ timeout=30,
484
+ )
485
+ response.raise_for_status()
486
+ response_data = response.json()
487
+ content = response_data["choices"][0]["message"]["content"]
488
+ return json.loads(content)
489
+ except Exception as exc:
490
+ print(f"OpenAI recommendation error for {category}: {exc}")
491
+ return None
.history/app/smart_recommendation_20251225161110.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) Only return recommendations for actual budgets - do NOT use expenses history
44
+ # This ensures we only show recommendations for budgets the user actually created
45
+ if not category_data:
46
+ print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
47
+ return []
48
+
49
+ recommendations: List[BudgetRecommendation] = []
50
+
51
+ for category, data in category_data.items():
52
+ avg_expense = data["average_monthly"]
53
+ confidence = self._calculate_confidence(data)
54
+
55
+ # Always try OpenAI first (primary source of recommendation)
56
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
57
+ if ai_result and ai_result.get("recommended_budget"):
58
+ recommended_budget = ai_result.get("recommended_budget")
59
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
60
+ action = ai_result.get("action")
61
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
62
+ else:
63
+ # Fallback to rule-based recommendation if OpenAI fails
64
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
65
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
66
+ action = None
67
+ if not ai_result:
68
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
69
+ else:
70
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
71
+
72
+ recommendations.append(BudgetRecommendation(
73
+ budget_name=category,
74
+ average_expense=round(avg_expense, 2),
75
+ recommended_budget=round(recommended_budget or 0, 2),
76
+ reason=reason,
77
+ confidence=confidence,
78
+ action=action
79
+ ))
80
+
81
+ # Sort by average expense (highest first)
82
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
83
+
84
+ return recommendations
85
+
86
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
87
+ """Calculate statistics for each category"""
88
+ category_data = defaultdict(lambda: {
89
+ "total": 0,
90
+ "count": 0,
91
+ "months": set(),
92
+ "monthly_totals": defaultdict(float)
93
+ })
94
+
95
+ for expense in expenses:
96
+ category = expense.get("category", "Uncategorized")
97
+ amount = expense.get("amount", 0)
98
+ date = expense.get("date")
99
+
100
+ # Handle date conversion - skip if date is None or invalid
101
+ if date is None:
102
+ continue
103
+
104
+ if isinstance(date, str):
105
+ try:
106
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
107
+ except (ValueError, AttributeError):
108
+ continue
109
+ elif not isinstance(date, datetime):
110
+ # If date is not a string or datetime, skip this expense
111
+ continue
112
+
113
+ category_data[category]["total"] += amount
114
+ category_data[category]["count"] += 1
115
+
116
+ # Track monthly totals
117
+ month_key = (date.year, date.month)
118
+ category_data[category]["months"].add(month_key)
119
+ category_data[category]["monthly_totals"][month_key] += amount
120
+
121
+ # Calculate averages
122
+ result = {}
123
+ for category, data in category_data.items():
124
+ num_months = len(data["months"]) or 1
125
+ avg_monthly = data["total"] / num_months
126
+
127
+ # Calculate standard deviation for variability
128
+ monthly_values = list(data["monthly_totals"].values())
129
+ if len(monthly_values) > 1:
130
+ mean = sum(monthly_values) / len(monthly_values)
131
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
132
+ std_dev = math.sqrt(variance)
133
+ else:
134
+ std_dev = 0
135
+
136
+ result[category] = {
137
+ "average_monthly": avg_monthly,
138
+ "total": data["total"],
139
+ "count": data["count"],
140
+ "months_analyzed": num_months,
141
+ "std_dev": std_dev,
142
+ "monthly_values": monthly_values
143
+ }
144
+
145
+ return result
146
+
147
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
148
+ """
149
+ Calculate recommended budget based on average expense.
150
+
151
+ Strategy:
152
+ - Base: Average monthly expense
153
+ - Add 5% buffer for variability
154
+ - Round to nearest 100 for cleaner numbers
155
+ """
156
+ # Add 5% buffer to handle variability
157
+ buffer = avg_expense * 0.05
158
+
159
+ # If there's high variability (std_dev > 20% of mean), add more buffer
160
+ if data["std_dev"] > 0:
161
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
162
+ if coefficient_of_variation > 0.2:
163
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
164
+
165
+ recommended = avg_expense + buffer
166
+
167
+ # Round to nearest 100 for cleaner budget numbers
168
+ recommended = round(recommended / 100) * 100
169
+
170
+ # Ensure minimum of 100 if there was any expense
171
+ if recommended < 100 and avg_expense > 0:
172
+ recommended = 100
173
+
174
+ return recommended
175
+
176
+ def _calculate_confidence(self, data: Dict) -> float:
177
+ """
178
+ Calculate confidence score (0-1) based on data quality.
179
+
180
+ Factors:
181
+ - Number of months analyzed (more = higher confidence)
182
+ - Number of transactions (more = higher confidence)
183
+ - Consistency of spending (lower std_dev = higher confidence)
184
+ """
185
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
186
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
187
+
188
+ # Consistency score (inverse of coefficient of variation)
189
+ if data["average_monthly"] > 0:
190
+ cv = data["std_dev"] / data["average_monthly"]
191
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
192
+ else:
193
+ consistency_score = 0.5
194
+
195
+ # Weighted average
196
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
197
+
198
+ return round(confidence, 2)
199
+
200
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
201
+ """Generate human-readable reason for the recommendation"""
202
+ # Format amounts with currency symbol
203
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
204
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
205
+
206
+ if recommended_budget > avg_expense:
207
+ buffer = recommended_budget - avg_expense
208
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
209
+ return (
210
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
211
+ f"We suggest setting your budget to {budget_formatted} for next month "
212
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
213
+ )
214
+ else:
215
+ return (
216
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
217
+ f"We recommend a budget of {budget_formatted} for next month."
218
+ )
219
+
220
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
221
+ """Get average expenses by category for the past N months"""
222
+ end_date = datetime.now()
223
+ start_date = end_date - timedelta(days=months * 30)
224
+
225
+ expenses = list(self.db.expenses.find({
226
+ "user_id": user_id,
227
+ "date": {"$gte": start_date, "$lte": end_date},
228
+ "type": "expense"
229
+ }))
230
+
231
+ if not expenses:
232
+ return []
233
+
234
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
235
+
236
+ result = []
237
+ for category, data in category_data.items():
238
+ result.append(CategoryExpense(
239
+ category=category,
240
+ average_monthly_expense=round(data["average_monthly"], 2),
241
+ total_expenses=data["count"],
242
+ months_analyzed=data["months_analyzed"]
243
+ ))
244
+
245
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
246
+ return result
247
+
248
+ def _get_category_name(self, category_id) -> str:
249
+ """Look up category name from categories collection"""
250
+ if not category_id:
251
+ return "Uncategorized"
252
+
253
+ try:
254
+ # Try to find category in categories collection
255
+ if isinstance(category_id, ObjectId):
256
+ category_doc = self.db.categories.find_one({"_id": category_id})
257
+ else:
258
+ try:
259
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
260
+ except (ValueError, TypeError):
261
+ category_doc = self.db.categories.find_one({"_id": category_id})
262
+
263
+ if category_doc:
264
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
265
+ except Exception as e:
266
+ print(f"Error looking up category name for {category_id}: {e}")
267
+ pass
268
+
269
+ return str(category_id) if category_id else "Uncategorized"
270
+
271
+ def _get_category_stats_from_budgets(
272
+ self, user_id: str, month: int, year: int
273
+ ) -> Dict:
274
+ """
275
+ Build category stats from existing budgets for this user.
276
+
277
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
278
+ as a spending category and derive an \"average\" from its amounts.
279
+ Also extracts categories from headCategories array.
280
+ """
281
+ budgets = []
282
+
283
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
284
+
285
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
286
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
287
+ try:
288
+ query_objid = {"createdBy": ObjectId(user_id)}
289
+ budgets_objid = list(self.db.budgets.find(query_objid))
290
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
291
+ if budgets_objid:
292
+ budgets.extend(budgets_objid)
293
+ except (ValueError, TypeError) as e:
294
+ print(f"Pattern 1 failed: {e}")
295
+ pass
296
+
297
+ # Pattern 2: Try with string user_id - no status filter
298
+ try:
299
+ query_str = {"createdBy": user_id}
300
+ budgets_str = list(self.db.budgets.find(query_str))
301
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
302
+ if budgets_str:
303
+ budgets.extend(budgets_str)
304
+ except Exception as e:
305
+ print(f"Pattern 2 failed: {e}")
306
+ pass
307
+
308
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
309
+ try:
310
+ query_userid = {"user_id": user_id}
311
+ budgets_userid = list(self.db.budgets.find(query_userid))
312
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
313
+ if budgets_userid:
314
+ budgets.extend(budgets_userid)
315
+ except Exception as e:
316
+ print(f"Pattern 3 failed: {e}")
317
+ pass
318
+
319
+ # Pattern 4: Try ObjectId with user_id field - no status filter
320
+ try:
321
+ query_objid_userid = {"user_id": ObjectId(user_id)}
322
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
323
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
324
+ if budgets_objid_userid:
325
+ budgets.extend(budgets_objid_userid)
326
+ except (ValueError, TypeError) as e:
327
+ print(f"Pattern 4 failed: {e}")
328
+ pass
329
+
330
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
331
+ try:
332
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
333
+ if budget_by_id:
334
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
335
+ created_by = budget_by_id.get("createdBy")
336
+ if created_by:
337
+ # Now find all budgets for this createdBy
338
+ query_by_creator = {"createdBy": created_by}
339
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
340
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
341
+ if budgets_by_creator:
342
+ budgets.extend(budgets_by_creator)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 5 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 6: Try finding by budget _id as string
348
+ try:
349
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
350
+ if budget_by_id_str:
351
+ print(f"Pattern 6: Found budget by _id as string")
352
+ budgets.append(budget_by_id_str)
353
+ except Exception as e:
354
+ print(f"Pattern 6 failed: {e}")
355
+ pass
356
+
357
+ # Remove duplicates based on _id
358
+ seen_ids = set()
359
+ unique_budgets = []
360
+ for b in budgets:
361
+ budget_id = str(b.get("_id", ""))
362
+ if budget_id not in seen_ids:
363
+ seen_ids.add(budget_id)
364
+ unique_budgets.append(b)
365
+
366
+ budgets = unique_budgets
367
+
368
+ if not budgets:
369
+ print(f"No budgets found for user_id: {user_id}")
370
+ print(f"Tried all query patterns. Checking sample budget structure...")
371
+ # Get a sample budget to see the structure
372
+ sample = self.db.budgets.find_one()
373
+ if sample:
374
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
375
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
376
+ return {}
377
+
378
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
379
+
380
+ result: Dict[str, Dict] = {}
381
+ for b in budgets:
382
+ # Only use the main budget name - don't extract nested categories from headCategories
383
+ # This ensures we only return recommendations for budgets the user actually created
384
+ budget_name = b.get("name", "Uncategorized")
385
+ if not budget_name or budget_name == "Uncategorized":
386
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
387
+
388
+ # Skip if budget name is still Uncategorized or empty
389
+ if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
390
+ print(f"⚠️ Skipping budget with invalid name: {b.get('_id')}")
391
+ continue
392
+
393
+ print(f"✅ Processing budget: '{budget_name}' (id: {b.get('_id')})")
394
+
395
+ # Derive a base amount from WalletSync fields
396
+ try:
397
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
398
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
399
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
400
+ except (ValueError, TypeError):
401
+ max_amount = 0
402
+ spend_amount = 0
403
+ budget_amount = 0
404
+
405
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
406
+ if spend_amount > 0:
407
+ base_amount = spend_amount
408
+ elif max_amount > 0:
409
+ base_amount = max_amount
410
+ elif budget_amount > 0:
411
+ base_amount = budget_amount
412
+ else:
413
+ base_amount = 0
414
+
415
+ # Only add main budget if it has an amount and we haven't processed categories
416
+ if base_amount > 0:
417
+ if budget_name not in result:
418
+ result[budget_name] = {
419
+ "average_monthly": base_amount,
420
+ "total": base_amount,
421
+ "count": 1,
422
+ "months_analyzed": 1,
423
+ "std_dev": 0.0,
424
+ "monthly_values": [base_amount],
425
+ }
426
+ else:
427
+ result[budget_name]["total"] += base_amount
428
+ result[budget_name]["count"] += 1
429
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
430
+ result[budget_name]["average_monthly"] = (
431
+ result[budget_name]["total"] / result[budget_name]["count"]
432
+ )
433
+ result[budget_name]["monthly_values"].append(base_amount)
434
+
435
+ print(f"Processed {len(result)} budget categories for recommendations")
436
+ return result
437
+
438
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
439
+ """Use OpenAI to refine the budget recommendation."""
440
+ if not OPENAI_API_KEY:
441
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
442
+ return None
443
+
444
+ print(f"🔄 Calling OpenAI API for category: {category}...")
445
+
446
+ # Handle empty monthly_values
447
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
448
+ history = f"{avg_expense:.0f}"
449
+ else:
450
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
451
+
452
+ summary = (
453
+ f"Category: {category}\n"
454
+ f"Monthly totals: [{history}]\n"
455
+ f"Average spend: {avg_expense:.2f}\n"
456
+ f"Std deviation: {data['std_dev']:.2f}\n"
457
+ f"Months observed: {data['months_analyzed']}\n"
458
+ )
459
+
460
+ prompt = (
461
+ "You are an Indian personal finance coach. "
462
+ "Given the user's spending history, decide whether to increase, decrease, "
463
+ "or keep the upcoming month's budget and provide a short explanation. "
464
+ "Respond strictly as JSON with the following keys:\n"
465
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
466
+ "Use rupees for all amounts.\n\n"
467
+ f"{summary}"
468
+ )
469
+
470
+ try:
471
+ response = requests.post(
472
+ "https://api.openai.com/v1/chat/completions",
473
+ headers={
474
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
475
+ "Content-Type": "application/json",
476
+ },
477
+ json={
478
+ "model": "gpt-4o-mini",
479
+ "messages": [
480
+ {"role": "user", "content": prompt}
481
+ ],
482
+ "temperature": 0.1,
483
+ "response_format": {"type": "json_object"},
484
+ },
485
+ timeout=30,
486
+ )
487
+ response.raise_for_status()
488
+ response_data = response.json()
489
+ content = response_data["choices"][0]["message"]["content"]
490
+ return json.loads(content)
491
+ except Exception as exc:
492
+ print(f"OpenAI recommendation error for {category}: {exc}")
493
+ return None
.history/app/smart_recommendation_20251225161134.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) Only return recommendations for actual budgets - do NOT use expenses history
44
+ # This ensures we only show recommendations for budgets the user actually created
45
+ if not category_data:
46
+ print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
47
+ return []
48
+
49
+ recommendations: List[BudgetRecommendation] = []
50
+
51
+ for category, data in category_data.items():
52
+ avg_expense = data["average_monthly"]
53
+ confidence = self._calculate_confidence(data)
54
+
55
+ # Always try OpenAI first (primary source of recommendation)
56
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
57
+ if ai_result and ai_result.get("recommended_budget"):
58
+ recommended_budget = ai_result.get("recommended_budget")
59
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
60
+ action = ai_result.get("action")
61
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
62
+ else:
63
+ # Fallback to rule-based recommendation if OpenAI fails
64
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
65
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
66
+ action = None
67
+ if not ai_result:
68
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
69
+ else:
70
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
71
+
72
+ recommendations.append(BudgetRecommendation(
73
+ budget_name=category,
74
+ average_expense=round(avg_expense, 2),
75
+ recommended_budget=round(recommended_budget or 0, 2),
76
+ reason=reason,
77
+ confidence=confidence,
78
+ action=action
79
+ ))
80
+
81
+ # Sort by average expense (highest first)
82
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
83
+
84
+ return recommendations
85
+
86
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
87
+ """Calculate statistics for each category"""
88
+ category_data = defaultdict(lambda: {
89
+ "total": 0,
90
+ "count": 0,
91
+ "months": set(),
92
+ "monthly_totals": defaultdict(float)
93
+ })
94
+
95
+ for expense in expenses:
96
+ category = expense.get("category", "Uncategorized")
97
+ amount = expense.get("amount", 0)
98
+ date = expense.get("date")
99
+
100
+ # Handle date conversion - skip if date is None or invalid
101
+ if date is None:
102
+ continue
103
+
104
+ if isinstance(date, str):
105
+ try:
106
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
107
+ except (ValueError, AttributeError):
108
+ continue
109
+ elif not isinstance(date, datetime):
110
+ # If date is not a string or datetime, skip this expense
111
+ continue
112
+
113
+ category_data[category]["total"] += amount
114
+ category_data[category]["count"] += 1
115
+
116
+ # Track monthly totals
117
+ month_key = (date.year, date.month)
118
+ category_data[category]["months"].add(month_key)
119
+ category_data[category]["monthly_totals"][month_key] += amount
120
+
121
+ # Calculate averages
122
+ result = {}
123
+ for category, data in category_data.items():
124
+ num_months = len(data["months"]) or 1
125
+ avg_monthly = data["total"] / num_months
126
+
127
+ # Calculate standard deviation for variability
128
+ monthly_values = list(data["monthly_totals"].values())
129
+ if len(monthly_values) > 1:
130
+ mean = sum(monthly_values) / len(monthly_values)
131
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
132
+ std_dev = math.sqrt(variance)
133
+ else:
134
+ std_dev = 0
135
+
136
+ result[category] = {
137
+ "average_monthly": avg_monthly,
138
+ "total": data["total"],
139
+ "count": data["count"],
140
+ "months_analyzed": num_months,
141
+ "std_dev": std_dev,
142
+ "monthly_values": monthly_values
143
+ }
144
+
145
+ return result
146
+
147
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
148
+ """
149
+ Calculate recommended budget based on average expense.
150
+
151
+ Strategy:
152
+ - Base: Average monthly expense
153
+ - Add 5% buffer for variability
154
+ - Round to nearest 100 for cleaner numbers
155
+ """
156
+ # Add 5% buffer to handle variability
157
+ buffer = avg_expense * 0.05
158
+
159
+ # If there's high variability (std_dev > 20% of mean), add more buffer
160
+ if data["std_dev"] > 0:
161
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
162
+ if coefficient_of_variation > 0.2:
163
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
164
+
165
+ recommended = avg_expense + buffer
166
+
167
+ # Round to nearest 100 for cleaner budget numbers
168
+ recommended = round(recommended / 100) * 100
169
+
170
+ # Ensure minimum of 100 if there was any expense
171
+ if recommended < 100 and avg_expense > 0:
172
+ recommended = 100
173
+
174
+ return recommended
175
+
176
+ def _calculate_confidence(self, data: Dict) -> float:
177
+ """
178
+ Calculate confidence score (0-1) based on data quality.
179
+
180
+ Factors:
181
+ - Number of months analyzed (more = higher confidence)
182
+ - Number of transactions (more = higher confidence)
183
+ - Consistency of spending (lower std_dev = higher confidence)
184
+ """
185
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
186
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
187
+
188
+ # Consistency score (inverse of coefficient of variation)
189
+ if data["average_monthly"] > 0:
190
+ cv = data["std_dev"] / data["average_monthly"]
191
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
192
+ else:
193
+ consistency_score = 0.5
194
+
195
+ # Weighted average
196
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
197
+
198
+ return round(confidence, 2)
199
+
200
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
201
+ """Generate human-readable reason for the recommendation"""
202
+ # Format amounts with currency symbol
203
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
204
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
205
+
206
+ if recommended_budget > avg_expense:
207
+ buffer = recommended_budget - avg_expense
208
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
209
+ return (
210
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
211
+ f"We suggest setting your budget to {budget_formatted} for next month "
212
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
213
+ )
214
+ else:
215
+ return (
216
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
217
+ f"We recommend a budget of {budget_formatted} for next month."
218
+ )
219
+
220
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
221
+ """Get average expenses by category for the past N months"""
222
+ end_date = datetime.now()
223
+ start_date = end_date - timedelta(days=months * 30)
224
+
225
+ expenses = list(self.db.expenses.find({
226
+ "user_id": user_id,
227
+ "date": {"$gte": start_date, "$lte": end_date},
228
+ "type": "expense"
229
+ }))
230
+
231
+ if not expenses:
232
+ return []
233
+
234
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
235
+
236
+ result = []
237
+ for category, data in category_data.items():
238
+ result.append(CategoryExpense(
239
+ category=category,
240
+ average_monthly_expense=round(data["average_monthly"], 2),
241
+ total_expenses=data["count"],
242
+ months_analyzed=data["months_analyzed"]
243
+ ))
244
+
245
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
246
+ return result
247
+
248
+ def _get_category_name(self, category_id) -> str:
249
+ """Look up category name from categories collection"""
250
+ if not category_id:
251
+ return "Uncategorized"
252
+
253
+ try:
254
+ # Try to find category in categories collection
255
+ if isinstance(category_id, ObjectId):
256
+ category_doc = self.db.categories.find_one({"_id": category_id})
257
+ else:
258
+ try:
259
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
260
+ except (ValueError, TypeError):
261
+ category_doc = self.db.categories.find_one({"_id": category_id})
262
+
263
+ if category_doc:
264
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
265
+ except Exception as e:
266
+ print(f"Error looking up category name for {category_id}: {e}")
267
+ pass
268
+
269
+ return str(category_id) if category_id else "Uncategorized"
270
+
271
+ def _get_category_stats_from_budgets(
272
+ self, user_id: str, month: int, year: int
273
+ ) -> Dict:
274
+ """
275
+ Build category stats from existing budgets for this user.
276
+
277
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
278
+ as a spending category and derive an \"average\" from its amounts.
279
+ Also extracts categories from headCategories array.
280
+ """
281
+ budgets = []
282
+
283
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
284
+
285
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
286
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
287
+ try:
288
+ query_objid = {"createdBy": ObjectId(user_id)}
289
+ budgets_objid = list(self.db.budgets.find(query_objid))
290
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
291
+ if budgets_objid:
292
+ budgets.extend(budgets_objid)
293
+ except (ValueError, TypeError) as e:
294
+ print(f"Pattern 1 failed: {e}")
295
+ pass
296
+
297
+ # Pattern 2: Try with string user_id - no status filter
298
+ try:
299
+ query_str = {"createdBy": user_id}
300
+ budgets_str = list(self.db.budgets.find(query_str))
301
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
302
+ if budgets_str:
303
+ budgets.extend(budgets_str)
304
+ except Exception as e:
305
+ print(f"Pattern 2 failed: {e}")
306
+ pass
307
+
308
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
309
+ try:
310
+ query_userid = {"user_id": user_id}
311
+ budgets_userid = list(self.db.budgets.find(query_userid))
312
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
313
+ if budgets_userid:
314
+ budgets.extend(budgets_userid)
315
+ except Exception as e:
316
+ print(f"Pattern 3 failed: {e}")
317
+ pass
318
+
319
+ # Pattern 4: Try ObjectId with user_id field - no status filter
320
+ try:
321
+ query_objid_userid = {"user_id": ObjectId(user_id)}
322
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
323
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
324
+ if budgets_objid_userid:
325
+ budgets.extend(budgets_objid_userid)
326
+ except (ValueError, TypeError) as e:
327
+ print(f"Pattern 4 failed: {e}")
328
+ pass
329
+
330
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
331
+ try:
332
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
333
+ if budget_by_id:
334
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
335
+ created_by = budget_by_id.get("createdBy")
336
+ if created_by:
337
+ # Now find all budgets for this createdBy
338
+ query_by_creator = {"createdBy": created_by}
339
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
340
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
341
+ if budgets_by_creator:
342
+ budgets.extend(budgets_by_creator)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 5 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 6: Try finding by budget _id as string
348
+ try:
349
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
350
+ if budget_by_id_str:
351
+ print(f"Pattern 6: Found budget by _id as string")
352
+ budgets.append(budget_by_id_str)
353
+ except Exception as e:
354
+ print(f"Pattern 6 failed: {e}")
355
+ pass
356
+
357
+ # Remove duplicates based on _id
358
+ seen_ids = set()
359
+ unique_budgets = []
360
+ for b in budgets:
361
+ budget_id = str(b.get("_id", ""))
362
+ if budget_id not in seen_ids:
363
+ seen_ids.add(budget_id)
364
+ unique_budgets.append(b)
365
+
366
+ budgets = unique_budgets
367
+
368
+ if not budgets:
369
+ print(f"No budgets found for user_id: {user_id}")
370
+ print(f"Tried all query patterns. Checking sample budget structure...")
371
+ # Get a sample budget to see the structure
372
+ sample = self.db.budgets.find_one()
373
+ if sample:
374
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
375
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
376
+ return {}
377
+
378
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
379
+
380
+ result: Dict[str, Dict] = {}
381
+ for b in budgets:
382
+ # Only use the main budget name - don't extract nested categories from headCategories
383
+ # This ensures we only return recommendations for budgets the user actually created
384
+ budget_name = b.get("name", "Uncategorized")
385
+ if not budget_name or budget_name == "Uncategorized":
386
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
387
+
388
+ # Skip if budget name is still Uncategorized or empty
389
+ if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
390
+ print(f"⚠️ Skipping budget with invalid name: {b.get('_id')}")
391
+ continue
392
+
393
+ print(f"✅ Processing budget: '{budget_name}' (id: {b.get('_id')})")
394
+
395
+ # Derive a base amount from WalletSync fields
396
+ try:
397
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
398
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
399
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
400
+ except (ValueError, TypeError):
401
+ max_amount = 0
402
+ spend_amount = 0
403
+ budget_amount = 0
404
+
405
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
406
+ if spend_amount > 0:
407
+ base_amount = spend_amount
408
+ elif max_amount > 0:
409
+ base_amount = max_amount
410
+ elif budget_amount > 0:
411
+ base_amount = budget_amount
412
+ else:
413
+ base_amount = 0
414
+
415
+ # Only add main budget if it has an amount and we haven't processed categories
416
+ if base_amount > 0:
417
+ if budget_name not in result:
418
+ result[budget_name] = {
419
+ "average_monthly": base_amount,
420
+ "total": base_amount,
421
+ "count": 1,
422
+ "months_analyzed": 1,
423
+ "std_dev": 0.0,
424
+ "monthly_values": [base_amount],
425
+ }
426
+ else:
427
+ result[budget_name]["total"] += base_amount
428
+ result[budget_name]["count"] += 1
429
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
430
+ result[budget_name]["average_monthly"] = (
431
+ result[budget_name]["total"] / result[budget_name]["count"]
432
+ )
433
+ result[budget_name]["monthly_values"].append(base_amount)
434
+
435
+ print(f"✅ Processed {len(result)} budget categories for recommendations: {list(result.keys())}")
436
+ return result
437
+
438
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
439
+ """Use OpenAI to refine the budget recommendation."""
440
+ if not OPENAI_API_KEY:
441
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
442
+ return None
443
+
444
+ print(f"🔄 Calling OpenAI API for category: {category}...")
445
+
446
+ # Handle empty monthly_values
447
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
448
+ history = f"{avg_expense:.0f}"
449
+ else:
450
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
451
+
452
+ summary = (
453
+ f"Category: {category}\n"
454
+ f"Monthly totals: [{history}]\n"
455
+ f"Average spend: {avg_expense:.2f}\n"
456
+ f"Std deviation: {data['std_dev']:.2f}\n"
457
+ f"Months observed: {data['months_analyzed']}\n"
458
+ )
459
+
460
+ prompt = (
461
+ "You are an Indian personal finance coach. "
462
+ "Given the user's spending history, decide whether to increase, decrease, "
463
+ "or keep the upcoming month's budget and provide a short explanation. "
464
+ "Respond strictly as JSON with the following keys:\n"
465
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
466
+ "Use rupees for all amounts.\n\n"
467
+ f"{summary}"
468
+ )
469
+
470
+ try:
471
+ response = requests.post(
472
+ "https://api.openai.com/v1/chat/completions",
473
+ headers={
474
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
475
+ "Content-Type": "application/json",
476
+ },
477
+ json={
478
+ "model": "gpt-4o-mini",
479
+ "messages": [
480
+ {"role": "user", "content": prompt}
481
+ ],
482
+ "temperature": 0.1,
483
+ "response_format": {"type": "json_object"},
484
+ },
485
+ timeout=30,
486
+ )
487
+ response.raise_for_status()
488
+ response_data = response.json()
489
+ content = response_data["choices"][0]["message"]["content"]
490
+ return json.loads(content)
491
+ except Exception as exc:
492
+ print(f"OpenAI recommendation error for {category}: {exc}")
493
+ return None
.history/app/smart_recommendation_20251225161144.py ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import math
3
+ import os
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from typing import Dict, List
7
+
8
+ import requests
9
+ from dotenv import load_dotenv
10
+ from bson import ObjectId
11
+
12
+ from app.models import BudgetRecommendation, CategoryExpense
13
+
14
+ load_dotenv()
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
+
17
+ class SmartBudgetRecommender:
18
+ """
19
+ Smart Budget Recommendation Engine
20
+
21
+ Analyzes past spending behavior and recommends personalized budgets
22
+ for each category based on historical data.
23
+ """
24
+
25
+ def __init__(self, db):
26
+ self.db = db
27
+
28
+ def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
29
+ """
30
+ Get budget recommendations for all categories based on past behavior.
31
+
32
+ Args:
33
+ user_id: User identifier
34
+ month: Target month (1-12)
35
+ year: Target year
36
+
37
+ Returns:
38
+ List of budget recommendations for each category
39
+ """
40
+ # 1) Try to build stats from existing budgets for this user (createdBy)
41
+ category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
+
43
+ # 2) Only return recommendations for actual budgets - do NOT use expenses history
44
+ # This ensures we only show recommendations for budgets the user actually created
45
+ if not category_data:
46
+ print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
47
+ return []
48
+
49
+ recommendations: List[BudgetRecommendation] = []
50
+
51
+ for category, data in category_data.items():
52
+ avg_expense = data["average_monthly"]
53
+ confidence = self._calculate_confidence(data)
54
+
55
+ # Always try OpenAI first (primary source of recommendation)
56
+ ai_result = self._get_ai_recommendation(category, data, avg_expense)
57
+ if ai_result and ai_result.get("recommended_budget"):
58
+ recommended_budget = ai_result.get("recommended_budget")
59
+ reason = ai_result.get("reason", f"AI recommendation for {category}")
60
+ action = ai_result.get("action")
61
+ print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
62
+ else:
63
+ # Fallback to rule-based recommendation if OpenAI fails
64
+ recommended_budget = self._calculate_recommended_budget(avg_expense, data)
65
+ reason = self._generate_reason(category, avg_expense, recommended_budget)
66
+ action = None
67
+ if not ai_result:
68
+ print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
69
+ else:
70
+ print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
71
+
72
+ recommendations.append(BudgetRecommendation(
73
+ budget_name=category,
74
+ average_expense=round(avg_expense, 2),
75
+ recommended_budget=round(recommended_budget or 0, 2),
76
+ reason=reason,
77
+ confidence=confidence,
78
+ action=action
79
+ ))
80
+
81
+ # Sort by average expense (highest first)
82
+ recommendations.sort(key=lambda x: x.average_expense, reverse=True)
83
+
84
+ return recommendations
85
+
86
+ def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
87
+ """Calculate statistics for each category"""
88
+ category_data = defaultdict(lambda: {
89
+ "total": 0,
90
+ "count": 0,
91
+ "months": set(),
92
+ "monthly_totals": defaultdict(float)
93
+ })
94
+
95
+ for expense in expenses:
96
+ category = expense.get("category", "Uncategorized")
97
+ amount = expense.get("amount", 0)
98
+ date = expense.get("date")
99
+
100
+ # Handle date conversion - skip if date is None or invalid
101
+ if date is None:
102
+ continue
103
+
104
+ if isinstance(date, str):
105
+ try:
106
+ date = datetime.fromisoformat(date.replace('Z', '+00:00'))
107
+ except (ValueError, AttributeError):
108
+ continue
109
+ elif not isinstance(date, datetime):
110
+ # If date is not a string or datetime, skip this expense
111
+ continue
112
+
113
+ category_data[category]["total"] += amount
114
+ category_data[category]["count"] += 1
115
+
116
+ # Track monthly totals
117
+ month_key = (date.year, date.month)
118
+ category_data[category]["months"].add(month_key)
119
+ category_data[category]["monthly_totals"][month_key] += amount
120
+
121
+ # Calculate averages
122
+ result = {}
123
+ for category, data in category_data.items():
124
+ num_months = len(data["months"]) or 1
125
+ avg_monthly = data["total"] / num_months
126
+
127
+ # Calculate standard deviation for variability
128
+ monthly_values = list(data["monthly_totals"].values())
129
+ if len(monthly_values) > 1:
130
+ mean = sum(monthly_values) / len(monthly_values)
131
+ variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
132
+ std_dev = math.sqrt(variance)
133
+ else:
134
+ std_dev = 0
135
+
136
+ result[category] = {
137
+ "average_monthly": avg_monthly,
138
+ "total": data["total"],
139
+ "count": data["count"],
140
+ "months_analyzed": num_months,
141
+ "std_dev": std_dev,
142
+ "monthly_values": monthly_values
143
+ }
144
+
145
+ return result
146
+
147
+ def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
148
+ """
149
+ Calculate recommended budget based on average expense.
150
+
151
+ Strategy:
152
+ - Base: Average monthly expense
153
+ - Add 5% buffer for variability
154
+ - Round to nearest 100 for cleaner numbers
155
+ """
156
+ # Add 5% buffer to handle variability
157
+ buffer = avg_expense * 0.05
158
+
159
+ # If there's high variability (std_dev > 20% of mean), add more buffer
160
+ if data["std_dev"] > 0:
161
+ coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
162
+ if coefficient_of_variation > 0.2:
163
+ buffer = avg_expense * 0.10 # 10% buffer for high variability
164
+
165
+ recommended = avg_expense + buffer
166
+
167
+ # Round to nearest 100 for cleaner budget numbers
168
+ recommended = round(recommended / 100) * 100
169
+
170
+ # Ensure minimum of 100 if there was any expense
171
+ if recommended < 100 and avg_expense > 0:
172
+ recommended = 100
173
+
174
+ return recommended
175
+
176
+ def _calculate_confidence(self, data: Dict) -> float:
177
+ """
178
+ Calculate confidence score (0-1) based on data quality.
179
+
180
+ Factors:
181
+ - Number of months analyzed (more = higher confidence)
182
+ - Number of transactions (more = higher confidence)
183
+ - Consistency of spending (lower std_dev = higher confidence)
184
+ """
185
+ months_score = min(data["months_analyzed"] / 6, 1.0) # Max at 6 months
186
+ count_score = min(data["count"] / 10, 1.0) # Max at 10 transactions
187
+
188
+ # Consistency score (inverse of coefficient of variation)
189
+ if data["average_monthly"] > 0:
190
+ cv = data["std_dev"] / data["average_monthly"]
191
+ consistency_score = max(0, 1 - min(cv, 1.0)) # Lower CV = higher score
192
+ else:
193
+ consistency_score = 0.5
194
+
195
+ # Weighted average
196
+ confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
197
+
198
+ return round(confidence, 2)
199
+
200
+ def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
201
+ """Generate human-readable reason for the recommendation"""
202
+ # Format amounts with currency symbol
203
+ avg_formatted = f"Rs.{avg_expense:,.0f}"
204
+ budget_formatted = f"Rs.{recommended_budget:,.0f}"
205
+
206
+ if recommended_budget > avg_expense:
207
+ buffer = recommended_budget - avg_expense
208
+ buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
209
+ return (
210
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
211
+ f"We suggest setting your budget to {budget_formatted} for next month "
212
+ f"(includes a {buffer_pct:.0f}% buffer for variability)."
213
+ )
214
+ else:
215
+ return (
216
+ f"Your average monthly {category.lower()} expense is {avg_formatted}. "
217
+ f"We recommend a budget of {budget_formatted} for next month."
218
+ )
219
+
220
+ def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
221
+ """Get average expenses by category for the past N months"""
222
+ end_date = datetime.now()
223
+ start_date = end_date - timedelta(days=months * 30)
224
+
225
+ expenses = list(self.db.expenses.find({
226
+ "user_id": user_id,
227
+ "date": {"$gte": start_date, "$lte": end_date},
228
+ "type": "expense"
229
+ }))
230
+
231
+ if not expenses:
232
+ return []
233
+
234
+ category_data = self._calculate_category_statistics(expenses, start_date, end_date)
235
+
236
+ result = []
237
+ for category, data in category_data.items():
238
+ result.append(CategoryExpense(
239
+ category=category,
240
+ average_monthly_expense=round(data["average_monthly"], 2),
241
+ total_expenses=data["count"],
242
+ months_analyzed=data["months_analyzed"]
243
+ ))
244
+
245
+ result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
246
+ return result
247
+
248
+ def _get_category_name(self, category_id) -> str:
249
+ """Look up category name from categories collection"""
250
+ if not category_id:
251
+ return "Uncategorized"
252
+
253
+ try:
254
+ # Try to find category in categories collection
255
+ if isinstance(category_id, ObjectId):
256
+ category_doc = self.db.categories.find_one({"_id": category_id})
257
+ else:
258
+ try:
259
+ category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
260
+ except (ValueError, TypeError):
261
+ category_doc = self.db.categories.find_one({"_id": category_id})
262
+
263
+ if category_doc:
264
+ return category_doc.get("name") or category_doc.get("title") or str(category_id)
265
+ except Exception as e:
266
+ print(f"Error looking up category name for {category_id}: {e}")
267
+ pass
268
+
269
+ return str(category_id) if category_id else "Uncategorized"
270
+
271
+ def _get_category_stats_from_budgets(
272
+ self, user_id: str, month: int, year: int
273
+ ) -> Dict:
274
+ """
275
+ Build category stats from existing budgets for this user.
276
+
277
+ We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
278
+ as a spending category and derive an \"average\" from its amounts.
279
+ Also extracts categories from headCategories array.
280
+ """
281
+ budgets = []
282
+
283
+ print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
284
+
285
+ # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
286
+ # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
287
+ try:
288
+ query_objid = {"createdBy": ObjectId(user_id)}
289
+ budgets_objid = list(self.db.budgets.find(query_objid))
290
+ print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
291
+ if budgets_objid:
292
+ budgets.extend(budgets_objid)
293
+ except (ValueError, TypeError) as e:
294
+ print(f"Pattern 1 failed: {e}")
295
+ pass
296
+
297
+ # Pattern 2: Try with string user_id - no status filter
298
+ try:
299
+ query_str = {"createdBy": user_id}
300
+ budgets_str = list(self.db.budgets.find(query_str))
301
+ print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
302
+ if budgets_str:
303
+ budgets.extend(budgets_str)
304
+ except Exception as e:
305
+ print(f"Pattern 2 failed: {e}")
306
+ pass
307
+
308
+ # Pattern 3: Try with user_id field (alternative field name) - no status filter
309
+ try:
310
+ query_userid = {"user_id": user_id}
311
+ budgets_userid = list(self.db.budgets.find(query_userid))
312
+ print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
313
+ if budgets_userid:
314
+ budgets.extend(budgets_userid)
315
+ except Exception as e:
316
+ print(f"Pattern 3 failed: {e}")
317
+ pass
318
+
319
+ # Pattern 4: Try ObjectId with user_id field - no status filter
320
+ try:
321
+ query_objid_userid = {"user_id": ObjectId(user_id)}
322
+ budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
323
+ print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
324
+ if budgets_objid_userid:
325
+ budgets.extend(budgets_objid_userid)
326
+ except (ValueError, TypeError) as e:
327
+ print(f"Pattern 4 failed: {e}")
328
+ pass
329
+
330
+ # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
331
+ try:
332
+ budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
333
+ if budget_by_id:
334
+ print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
335
+ created_by = budget_by_id.get("createdBy")
336
+ if created_by:
337
+ # Now find all budgets for this createdBy
338
+ query_by_creator = {"createdBy": created_by}
339
+ budgets_by_creator = list(self.db.budgets.find(query_by_creator))
340
+ print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
341
+ if budgets_by_creator:
342
+ budgets.extend(budgets_by_creator)
343
+ except (ValueError, TypeError) as e:
344
+ print(f"Pattern 5 failed: {e}")
345
+ pass
346
+
347
+ # Pattern 6: Try finding by budget _id as string
348
+ try:
349
+ budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
350
+ if budget_by_id_str:
351
+ print(f"Pattern 6: Found budget by _id as string")
352
+ budgets.append(budget_by_id_str)
353
+ except Exception as e:
354
+ print(f"Pattern 6 failed: {e}")
355
+ pass
356
+
357
+ # Remove duplicates based on _id
358
+ seen_ids = set()
359
+ unique_budgets = []
360
+ for b in budgets:
361
+ budget_id = str(b.get("_id", ""))
362
+ if budget_id not in seen_ids:
363
+ seen_ids.add(budget_id)
364
+ unique_budgets.append(b)
365
+
366
+ budgets = unique_budgets
367
+
368
+ if not budgets:
369
+ print(f"No budgets found for user_id: {user_id}")
370
+ print(f"Tried all query patterns. Checking sample budget structure...")
371
+ # Get a sample budget to see the structure
372
+ sample = self.db.budgets.find_one()
373
+ if sample:
374
+ print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
375
+ print(f"Sample budget has user_id field: {'user_id' in sample}")
376
+ return {}
377
+
378
+ print(f"Found {len(budgets)} budgets for user_id: {user_id}")
379
+
380
+ result: Dict[str, Dict] = {}
381
+ for b in budgets:
382
+ # Only use the main budget name - don't extract nested categories from headCategories
383
+ # This ensures we only return recommendations for budgets the user actually created
384
+ budget_name = b.get("name", "Uncategorized")
385
+ if not budget_name or budget_name == "Uncategorized":
386
+ budget_name = b.get("category") or b.get("title") or "Uncategorized"
387
+
388
+ # Skip if budget name is still Uncategorized or empty
389
+ if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
390
+ print(f"⚠️ Skipping budget with invalid name: {b.get('_id')}")
391
+ continue
392
+
393
+ print(f"✅ Processing budget: '{budget_name}' (id: {b.get('_id')})")
394
+
395
+ # Derive a base amount from WalletSync fields
396
+ try:
397
+ max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
398
+ spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
399
+ budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
400
+ except (ValueError, TypeError):
401
+ max_amount = 0
402
+ spend_amount = 0
403
+ budget_amount = 0
404
+
405
+ # Priority: spendAmount > maxAmount > budgetAmount > budget
406
+ if spend_amount > 0:
407
+ base_amount = spend_amount
408
+ elif max_amount > 0:
409
+ base_amount = max_amount
410
+ elif budget_amount > 0:
411
+ base_amount = budget_amount
412
+ else:
413
+ base_amount = 0
414
+
415
+ # Only add main budget if it has an amount and we haven't processed categories
416
+ if base_amount > 0:
417
+ if budget_name not in result:
418
+ result[budget_name] = {
419
+ "average_monthly": base_amount,
420
+ "total": base_amount,
421
+ "count": 1,
422
+ "months_analyzed": 1,
423
+ "std_dev": 0.0,
424
+ "monthly_values": [base_amount],
425
+ }
426
+ else:
427
+ result[budget_name]["total"] += base_amount
428
+ result[budget_name]["count"] += 1
429
+ result[budget_name]["months_analyzed"] = result[budget_name]["count"]
430
+ result[budget_name]["average_monthly"] = (
431
+ result[budget_name]["total"] / result[budget_name]["count"]
432
+ )
433
+ result[budget_name]["monthly_values"].append(base_amount)
434
+
435
+ print(f"✅ Processed {len(result)} budget categories for recommendations: {list(result.keys())}")
436
+ return result
437
+
438
+ def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
439
+ """Use OpenAI to refine the budget recommendation."""
440
+ if not OPENAI_API_KEY:
441
+ print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
442
+ return None
443
+
444
+ print(f"🔄 Calling OpenAI API for category: {category}...")
445
+
446
+ # Handle empty monthly_values
447
+ if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
448
+ history = f"{avg_expense:.0f}"
449
+ else:
450
+ history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
451
+
452
+ summary = (
453
+ f"Category: {category}\n"
454
+ f"Monthly totals: [{history}]\n"
455
+ f"Average spend: {avg_expense:.2f}\n"
456
+ f"Std deviation: {data['std_dev']:.2f}\n"
457
+ f"Months observed: {data['months_analyzed']}\n"
458
+ )
459
+
460
+ prompt = (
461
+ "You are an Indian personal finance coach. "
462
+ "Given the user's spending history, decide whether to increase, decrease, "
463
+ "or keep the upcoming month's budget and provide a short explanation. "
464
+ "Respond strictly as JSON with the following keys:\n"
465
+ '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
466
+ "Use rupees for all amounts.\n\n"
467
+ f"{summary}"
468
+ )
469
+
470
+ try:
471
+ response = requests.post(
472
+ "https://api.openai.com/v1/chat/completions",
473
+ headers={
474
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
475
+ "Content-Type": "application/json",
476
+ },
477
+ json={
478
+ "model": "gpt-4o-mini",
479
+ "messages": [
480
+ {"role": "user", "content": prompt}
481
+ ],
482
+ "temperature": 0.1,
483
+ "response_format": {"type": "json_object"},
484
+ },
485
+ timeout=30,
486
+ )
487
+ response.raise_for_status()
488
+ response_data = response.json()
489
+ content = response_data["choices"][0]["message"]["content"]
490
+ return json.loads(content)
491
+ except Exception as exc:
492
+ print(f"OpenAI recommendation error for {category}: {exc}")
493
+ return None
Smart_Budget_Recommendation_API.postman_collection.json CHANGED
@@ -1,384 +1,398 @@
1
  {
2
  "info": {
3
- "_postman_id": "smart-budget-recommendation-api",
4
- "name": "Smart Budget Recommendation API",
5
- "description": "API collection for Smart Budget Recommendation service deployed on Hugging Face",
6
- "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
7
  },
8
- "item": [
 
 
9
  {
10
- "name": "Health Check",
11
- "request": {
12
- "method": "GET",
13
- "header": [],
14
- "url": {
15
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/health",
16
- "protocol": "https",
17
- "host": [
18
- "logicgoinfotechspaces-smart-budget-recommendation",
19
- "hf",
20
- "space"
21
- ],
22
- "path": [
23
- "health"
24
- ]
25
- },
26
- "description": "Check if the API and database are running"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
- },
29
- {
30
- "name": "Root Endpoint",
31
- "request": {
32
- "method": "GET",
33
- "header": [],
34
- "url": {
35
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/",
36
- "protocol": "https",
37
- "host": [
38
- "logicgoinfotechspaces-smart-budget-recommendation",
39
- "hf",
40
- "space"
41
- ],
42
- "path": [
43
- ""
44
- ]
45
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
- },
48
- {
49
- "name": "Create Expense",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  "request": {
51
- "method": "POST",
52
- "header": [
53
- {
54
- "key": "Content-Type",
55
- "value": "application/json"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
57
  ],
58
- "body": {
59
- "mode": "raw",
60
- "raw": "{\n \"user_id\": \"68a834c3f4694b11efedacd2\",\n \"amount\": 3800,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries\",\n \"date\": \"2025-01-15T00:00:00\",\n \"type\": \"expense\"\n}"
61
- },
62
- "url": {
63
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
64
- "protocol": "https",
65
- "host": [
66
- "logicgoinfotechspaces-smart-budget-recommendation",
67
- "hf",
68
- "space"
69
- ],
70
- "path": [
71
- "expenses"
72
- ]
73
- },
74
- "description": "Create a new expense record"
75
  }
76
- },
77
- {
78
- "name": "Get Expenses",
79
  "request": {
80
- "method": "GET",
81
- "header": [],
82
- "url": {
83
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses?user_id=68a834c3f4694b11efedacd2&limit=20",
84
- "protocol": "https",
85
- "host": [
86
- "logicgoinfotechspaces-smart-budget-recommendation",
87
- "hf",
88
- "space"
89
- ],
90
- "path": [
91
- "expenses"
92
- ],
93
- "query": [
94
- {
95
- "key": "user_id",
96
- "value": "68a834c3f4694b11efedacd2",
97
- "description": "User identifier"
98
- },
99
- {
100
- "key": "limit",
101
- "value": "20",
102
- "description": "Maximum number of expenses to return"
103
- }
104
- ]
105
- },
106
- "description": "Get expenses for a specific user"
107
- }
108
- },
109
- {
110
- "name": "Create Budget",
111
- "request": {
112
- "method": "POST",
113
- "header": [
114
- {
115
- "key": "Content-Type",
116
- "value": "application/json"
117
- }
118
  ],
119
- "body": {
120
- "mode": "raw",
121
- "raw": "{\n \"user_id\": \"68a834c3f4694b11efedacd2\",\n \"category\": \"Groceries\",\n \"amount\": 4000,\n \"period\": \"monthly\",\n \"start_date\": \"2025-02-01T00:00:00\",\n \"end_date\": \"2025-02-29T00:00:00\"\n}"
122
- },
123
- "url": {
124
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets",
125
- "protocol": "https",
126
- "host": [
127
- "logicgoinfotechspaces-smart-budget-recommendation",
128
- "hf",
129
- "space"
130
- ],
131
- "path": [
132
- "budgets"
133
- ]
134
- },
135
- "description": "Create a new budget"
136
  }
137
- },
138
- {
139
- "name": "Get Budgets",
140
  "request": {
141
- "method": "GET",
142
- "header": [],
143
- "url": {
144
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets?user_id=68a834c3f4694b11efedacd2",
145
- "protocol": "https",
146
- "host": [
147
- "logicgoinfotechspaces-smart-budget-recommendation",
148
- "hf",
149
- "space"
150
- ],
151
- "path": [
152
- "budgets"
153
- ],
154
- "query": [
155
- {
156
- "key": "user_id",
157
- "value": "68a834c3f4694b11efedacd2"
158
- }
159
- ]
160
- },
161
- "description": "Get budgets for a specific user"
 
 
162
  }
163
- },
164
- {
165
- "name": "Get Smart Budget Recommendations",
166
  "request": {
167
- "method": "GET",
168
- "header": [],
169
- "url": {
170
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/recommendations/68a834c3f4694b11efedacd2?month=2&year=2025",
171
- "protocol": "https",
172
- "host": [
173
- "logicgoinfotechspaces-smart-budget-recommendation",
174
- "hf",
175
- "space"
176
- ],
177
- "path": [
178
- "recommendations",
179
- "68a834c3f4694b11efedacd2"
180
- ],
181
- "query": [
182
- {
183
- "key": "month",
184
- "value": "2",
185
- "description": "Target month (1-12), optional - defaults to next month"
186
- },
187
- {
188
- "key": "year",
189
- "value": "2025",
190
- "description": "Target year, optional - defaults to next year"
191
- }
192
- ]
193
- },
194
- "description": "Get smart budget recommendations based on past spending behavior. Requires at least 2-3 months of expense data."
195
  }
196
- },
197
- {
198
- "name": "Get Category Expenses",
199
  "request": {
200
- "method": "GET",
201
- "header": [],
202
- "url": {
203
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/category-expenses/68a834c3f4694b11efedacd2?months=3",
204
- "protocol": "https",
205
- "host": [
206
- "logicgoinfotechspaces-smart-budget-recommendation",
207
- "hf",
208
- "space"
209
- ],
210
- "path": [
211
- "category-expenses",
212
- "68a834c3f4694b11efedacd2"
213
- ],
214
- "query": [
215
- {
216
- "key": "months",
217
- "value": "3",
218
- "description": "Number of months to analyze (default: 3)"
219
- }
220
- ]
221
- },
222
- "description": "Get average expenses by category for the past N months"
223
- }
224
- },
225
- {
226
- "name": "Sample Expenses - Create Multiple",
227
- "item": [
228
- {
229
- "name": "Groceries - Month 1 (Sept 2024)",
230
- "request": {
231
- "method": "POST",
232
- "header": [
233
- {
234
- "key": "Content-Type",
235
- "value": "application/json"
236
- }
237
- ],
238
- "body": {
239
- "mode": "raw",
240
- "raw": "{\n \"user_id\": \"68a834c3f4694b11efedacd2\",\n \"amount\": 3500,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries - September 2024\",\n \"date\": \"2024-09-15T00:00:00\",\n \"type\": \"expense\"\n}"
241
- },
242
- "url": {
243
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
244
- "protocol": "https",
245
- "host": [
246
- "logicgoinfotechspaces-smart-budget-recommendation",
247
- "hf",
248
- "space"
249
- ],
250
- "path": [
251
- "expenses"
252
- ]
253
- }
254
- }
255
- },
256
- {
257
- "name": "Groceries - Month 2 (Oct 2024)",
258
- "request": {
259
- "method": "POST",
260
- "header": [
261
- {
262
- "key": "Content-Type",
263
- "value": "application/json"
264
- }
265
- ],
266
- "body": {
267
- "mode": "raw",
268
- "raw": "{\n \"user_id\": \"68a834c3f4694b11efedacd2\",\n \"amount\": 3800,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries - October 2024\",\n \"date\": \"2024-10-15T00:00:00\",\n \"type\": \"expense\"\n}"
269
- },
270
- "url": {
271
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
272
- "protocol": "https",
273
- "host": [
274
- "logicgoinfotechspaces-smart-budget-recommendation",
275
- "hf",
276
- "space"
277
- ],
278
- "path": [
279
- "expenses"
280
- ]
281
- }
282
- }
283
- },
284
  {
285
- "name": "Groceries - Month 3 (Nov 2024)",
286
- "request": {
287
- "method": "POST",
288
- "header": [
289
- {
290
- "key": "Content-Type",
291
- "value": "application/json"
292
- }
293
- ],
294
- "body": {
295
- "mode": "raw",
296
- "raw": "{\n \"user_id\": \"68a834c3f4694b11efedacd2\",\n \"amount\": 4000,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries - November 2024\",\n \"date\": \"2024-11-15T00:00:00\",\n \"type\": \"expense\"\n}"
297
- },
298
- "url": {
299
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
300
- "protocol": "https",
301
- "host": [
302
- "logicgoinfotechspaces-smart-budget-recommendation",
303
- "hf",
304
- "space"
305
- ],
306
- "path": [
307
- "expenses"
308
- ]
309
- }
310
- }
311
- },
312
- {
313
- "name": "Transport - Month 1 (Sept 2024)",
314
- "request": {
315
- "method": "POST",
316
- "header": [
317
- {
318
- "key": "Content-Type",
319
- "value": "application/json"
320
- }
321
- ],
322
- "body": {
323
- "mode": "raw",
324
- "raw": "{\n \"user_id\": \"68a834c3f4694b11efedacd2\",\n \"amount\": 2000,\n \"category\": \"Transport\",\n \"description\": \"Monthly transport - September 2024\",\n \"date\": \"2024-09-20T00:00:00\",\n \"type\": \"expense\"\n}"
325
- },
326
- "url": {
327
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
328
- "protocol": "https",
329
- "host": [
330
- "logicgoinfotechspaces-smart-budget-recommendation",
331
- "hf",
332
- "space"
333
- ],
334
- "path": [
335
- "expenses"
336
- ]
337
- }
338
- }
339
- },
340
- {
341
- "name": "Transport - Month 2 (Oct 2024)",
342
- "request": {
343
- "method": "POST",
344
- "header": [
345
- {
346
- "key": "Content-Type",
347
- "value": "application/json"
348
- }
349
- ],
350
- "body": {
351
- "mode": "raw",
352
- "raw": "{\n \"user_id\": \"68a834c3f4694b11efedacd2\",\n \"amount\": 2200,\n \"category\": \"Transport\",\n \"description\": \"Monthly transport - October 2024\",\n \"date\": \"2024-10-20T00:00:00\",\n \"type\": \"expense\"\n}"
353
- },
354
- "url": {
355
- "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
356
- "protocol": "https",
357
- "host": [
358
- "logicgoinfotechspaces-smart-budget-recommendation",
359
- "hf",
360
- "space"
361
- ],
362
- "path": [
363
- "expenses"
364
- ]
365
- }
366
- }
367
  }
368
- ]
369
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  ],
371
  "variable": [
372
- {
373
- "key": "base_url",
374
- "value": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space",
375
- "type": "string"
376
- },
377
- {
378
- "key": "user_id",
379
- "value": "68a834c3f4694b11efedacd2",
380
- "type": "string"
381
- }
 
 
 
 
 
382
  ]
383
- }
384
-
 
1
  {
2
  "info": {
3
+ "_postman_id": "smart-budget-recommendation-api",
4
+ "name": "Smart Budget Recommendation API",
5
+ "description": "API collection for Smart Budget Recommendation service deployed on Hugging Face",
6
+ "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
7
  },
8
+ "auth": {
9
+ "type": "bearer",
10
+ "bearer": [
11
  {
12
+ "key": "token",
13
+ "value": "{{hf_token}}",
14
+ "type": "string"
15
+ }
16
+ ]
17
+ },
18
+ "item": [
19
+ {
20
+ "name": "Health Check",
21
+ "request": {
22
+ "method": "GET",
23
+ "header": [],
24
+ "url": {
25
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/health",
26
+ "protocol": "https",
27
+ "host": [
28
+ "logicgoinfotechspaces-smart-budget-recommendation",
29
+ "hf",
30
+ "space"
31
+ ],
32
+ "path": [
33
+ "health"
34
+ ]
35
+ },
36
+ "description": "Check if the API and database are running"
37
+ }
38
+ },
39
+ {
40
+ "name": "Root Endpoint",
41
+ "request": {
42
+ "method": "GET",
43
+ "header": [],
44
+ "url": {
45
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/",
46
+ "protocol": "https",
47
+ "host": [
48
+ "logicgoinfotechspaces-smart-budget-recommendation",
49
+ "hf",
50
+ "space"
51
+ ],
52
+ "path": [
53
+ ""
54
+ ]
55
+ }
56
+ }
57
+ },
58
+ {
59
+ "name": "Create Expense",
60
+ "request": {
61
+ "method": "POST",
62
+ "header": [
63
+ {
64
+ "key": "Content-Type",
65
+ "value": "application/json"
66
  }
67
+ ],
68
+ "body": {
69
+ "mode": "raw",
70
+ "raw": "{\n \"user_id\": \"{{user_id}}\",\n \"amount\": 3800,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries\",\n \"date\": \"2025-01-15T00:00:00\",\n \"type\": \"expense\"\n}"
71
+ },
72
+ "url": {
73
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
74
+ "protocol": "https",
75
+ "host": [
76
+ "logicgoinfotechspaces-smart-budget-recommendation",
77
+ "hf",
78
+ "space"
79
+ ],
80
+ "path": [
81
+ "expenses"
82
+ ]
83
+ },
84
+ "description": "Create a new expense record"
85
+ }
86
+ },
87
+ {
88
+ "name": "Get Expenses",
89
+ "request": {
90
+ "method": "GET",
91
+ "header": [],
92
+ "url": {
93
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses?user_id={{user_id}}&limit=20",
94
+ "protocol": "https",
95
+ "host": [
96
+ "logicgoinfotechspaces-smart-budget-recommendation",
97
+ "hf",
98
+ "space"
99
+ ],
100
+ "path": [
101
+ "expenses"
102
+ ],
103
+ "query": [
104
+ {
105
+ "key": "user_id",
106
+ "value": "{{user_id}}",
107
+ "description": "User identifier"
108
+ },
109
+ {
110
+ "key": "limit",
111
+ "value": "20",
112
+ "description": "Maximum number of expenses to return"
113
+ }
114
+ ]
115
+ },
116
+ "description": "Get expenses for a specific user"
117
+ }
118
+ },
119
+ {
120
+ "name": "Create Budget",
121
+ "request": {
122
+ "method": "POST",
123
+ "header": [
124
+ {
125
+ "key": "Content-Type",
126
+ "value": "application/json"
127
  }
128
+ ],
129
+ "body": {
130
+ "mode": "raw",
131
+ "raw": "{\n \"user_id\": \"{{user_id}}\",\n \"category\": \"Groceries\",\n \"amount\": 4000,\n \"period\": \"monthly\",\n \"start_date\": \"2025-02-01T00:00:00\",\n \"end_date\": \"2025-02-28T00:00:00\"\n}"
132
+ },
133
+ "url": {
134
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets",
135
+ "protocol": "https",
136
+ "host": [
137
+ "logicgoinfotechspaces-smart-budget-recommendation",
138
+ "hf",
139
+ "space"
140
+ ],
141
+ "path": [
142
+ "budgets"
143
+ ]
144
+ },
145
+ "description": "Create a new budget"
146
+ }
147
+ },
148
+ {
149
+ "name": "Get Budgets",
150
+ "request": {
151
+ "method": "GET",
152
+ "header": [],
153
+ "url": {
154
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets?user_id={{user_id}}",
155
+ "protocol": "https",
156
+ "host": [
157
+ "logicgoinfotechspaces-smart-budget-recommendation",
158
+ "hf",
159
+ "space"
160
+ ],
161
+ "path": [
162
+ "budgets"
163
+ ],
164
+ "query": [
165
+ {
166
+ "key": "user_id",
167
+ "value": "{{user_id}}"
168
+ }
169
+ ]
170
+ },
171
+ "description": "Get budgets for a specific user"
172
+ }
173
+ },
174
+ {
175
+ "name": "Get Smart Budget Recommendations",
176
+ "request": {
177
+ "method": "GET",
178
+ "header": [],
179
+ "url": {
180
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/recommendations/{{user_id}}?month=2&year=2025",
181
+ "protocol": "https",
182
+ "host": [
183
+ "logicgoinfotechspaces-smart-budget-recommendation",
184
+ "hf",
185
+ "space"
186
+ ],
187
+ "path": [
188
+ "recommendations",
189
+ "{{user_id}}"
190
+ ],
191
+ "query": [
192
+ {
193
+ "key": "month",
194
+ "value": "2",
195
+ "description": "Target month (1-12), optional - defaults to next month"
196
+ },
197
+ {
198
+ "key": "year",
199
+ "value": "2025",
200
+ "description": "Target year, optional - defaults to next year"
201
+ }
202
+ ]
203
+ },
204
+ "description": "Get smart budget recommendations based on past spending behavior. Uses expenses, then budgets as fallback."
205
+ }
206
+ },
207
+ {
208
+ "name": "Get Category Expenses",
209
+ "request": {
210
+ "method": "GET",
211
+ "header": [],
212
+ "url": {
213
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/category-expenses/{{user_id}}?months=3",
214
+ "protocol": "https",
215
+ "host": [
216
+ "logicgoinfotechspaces-smart-budget-recommendation",
217
+ "hf",
218
+ "space"
219
+ ],
220
+ "path": [
221
+ "category-expenses",
222
+ "{{user_id}}"
223
+ ],
224
+ "query": [
225
+ {
226
+ "key": "months",
227
+ "value": "3",
228
+ "description": "Number of months to analyze (default: 3)"
229
+ }
230
+ ]
231
+ },
232
+ "description": "Get average expenses by category for the past N months"
233
+ }
234
+ },
235
+ {
236
+ "name": "Sample Expenses - Create Multiple",
237
+ "item": [
238
+ {
239
+ "name": "Groceries - Month 1 (Sept 2024)",
240
  "request": {
241
+ "method": "POST",
242
+ "header": [
243
+ {
244
+ "key": "Content-Type",
245
+ "value": "application/json"
246
+ }
247
+ ],
248
+ "body": {
249
+ "mode": "raw",
250
+ "raw": "{\n \"user_id\": \"{{user_id}}\",\n \"amount\": 3500,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries - September 2024\",\n \"date\": \"2024-09-15T00:00:00\",\n \"type\": \"expense\"\n}"
251
+ },
252
+ "url": {
253
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
254
+ "protocol": "https",
255
+ "host": [
256
+ "logicgoinfotechspaces-smart-budget-recommendation",
257
+ "hf",
258
+ "space"
259
  ],
260
+ "path": [
261
+ "expenses"
262
+ ]
263
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  }
265
+ },
266
+ {
267
+ "name": "Groceries - Month 2 (Oct 2024)",
268
  "request": {
269
+ "method": "POST",
270
+ "header": [
271
+ {
272
+ "key": "Content-Type",
273
+ "value": "application/json"
274
+ }
275
+ ],
276
+ "body": {
277
+ "mode": "raw",
278
+ "raw": "{\n \"user_id\": \"{{user_id}}\",\n \"amount\": 3800,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries - October 2024\",\n \"date\": \"2024-10-15T00:00:00\",\n \"type\": \"expense\"\n}"
279
+ },
280
+ "url": {
281
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
282
+ "protocol": "https",
283
+ "host": [
284
+ "logicgoinfotechspaces-smart-budget-recommendation",
285
+ "hf",
286
+ "space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  ],
288
+ "path": [
289
+ "expenses"
290
+ ]
291
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  }
293
+ },
294
+ {
295
+ "name": "Groceries - Month 3 (Nov 2024)",
296
  "request": {
297
+ "method": "POST",
298
+ "header": [
299
+ {
300
+ "key": "Content-Type",
301
+ "value": "application/json"
302
+ }
303
+ ],
304
+ "body": {
305
+ "mode": "raw",
306
+ "raw": "{\n \"user_id\": \"{{user_id}}\",\n \"amount\": 4000,\n \"category\": \"Groceries\",\n \"description\": \"Monthly groceries - November 2024\",\n \"date\": \"2024-11-15T00:00:00\",\n \"type\": \"expense\"\n}"
307
+ },
308
+ "url": {
309
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
310
+ "protocol": "https",
311
+ "host": [
312
+ "logicgoinfotechspaces-smart-budget-recommendation",
313
+ "hf",
314
+ "space"
315
+ ],
316
+ "path": [
317
+ "expenses"
318
+ ]
319
+ }
320
  }
321
+ },
322
+ {
323
+ "name": "Transport - Month 1 (Sept 2024)",
324
  "request": {
325
+ "method": "POST",
326
+ "header": [
327
+ {
328
+ "key": "Content-Type",
329
+ "value": "application/json"
330
+ }
331
+ ],
332
+ "body": {
333
+ "mode": "raw",
334
+ "raw": "{\n \"user_id\": \"{{user_id}}\",\n \"amount\": 2000,\n \"category\": \"Transport\",\n \"description\": \"Monthly transport - September 2024\",\n \"date\": \"2024-09-20T00:00:00\",\n \"type\": \"expense\"\n}"
335
+ },
336
+ "url": {
337
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
338
+ "protocol": "https",
339
+ "host": [
340
+ "logicgoinfotechspaces-smart-budget-recommendation",
341
+ "hf",
342
+ "space"
343
+ ],
344
+ "path": [
345
+ "expenses"
346
+ ]
347
+ }
 
 
 
 
 
348
  }
349
+ },
350
+ {
351
+ "name": "Transport - Month 2 (Oct 2024)",
352
  "request": {
353
+ "method": "POST",
354
+ "header": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  {
356
+ "key": "Content-Type",
357
+ "value": "application/json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  }
359
+ ],
360
+ "body": {
361
+ "mode": "raw",
362
+ "raw": "{\n \"user_id\": \"{{user_id}}\",\n \"amount\": 2200,\n \"category\": \"Transport\",\n \"description\": \"Monthly transport - October 2024\",\n \"date\": \"2024-10-20T00:00:00\",\n \"type\": \"expense\"\n}"
363
+ },
364
+ "url": {
365
+ "raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
366
+ "protocol": "https",
367
+ "host": [
368
+ "logicgoinfotechspaces-smart-budget-recommendation",
369
+ "hf",
370
+ "space"
371
+ ],
372
+ "path": [
373
+ "expenses"
374
+ ]
375
+ }
376
+ }
377
+ }
378
+ ]
379
+ }
380
  ],
381
  "variable": [
382
+ {
383
+ "key": "base_url",
384
+ "value": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space",
385
+ "type": "string"
386
+ },
387
+ {
388
+ "key": "user_id",
389
+ "value": "68a834c3f4694b11efedacd2",
390
+ "type": "string"
391
+ },
392
+ {
393
+ "key": "hf_token",
394
+ "value": "",
395
+ "type": "string"
396
+ }
397
  ]
398
+ }
 
app/main.py CHANGED
@@ -190,10 +190,12 @@ async def get_budget_recommendations(user_id: str, month: Optional[int] = None,
190
 
191
  Example response:
192
  {
193
- "category": "Groceries",
194
  "average_expense": 3800,
195
  "recommended_budget": 4000,
196
- "reason": "Your average monthly grocery expense is Rs.3,800. We suggest setting your budget to Rs.4,000 for next month."
 
 
197
  }
198
  """
199
  if not month or not year:
 
190
 
191
  Example response:
192
  {
193
+ "budget_name": "Groceries",
194
  "average_expense": 3800,
195
  "recommended_budget": 4000,
196
+ "reason": "Your average monthly grocery expense is Rs.3,800. We suggest setting your budget to Rs.4,000 for next month.",
197
+ "confidence": 0.85,
198
+ "action": "increase"
199
  }
200
  """
201
  if not month or not year:
app/models.py CHANGED
@@ -24,7 +24,7 @@ class Budget(BaseModel):
24
  end_date: Optional[datetime] = None
25
 
26
  class BudgetRecommendation(BaseModel):
27
- category: str
28
  average_expense: float
29
  recommended_budget: float
30
  reason: str
 
24
  end_date: Optional[datetime] = None
25
 
26
  class BudgetRecommendation(BaseModel):
27
+ budget_name: str = Field(..., description="Budget name (e.g., Groceries, Transport)")
28
  average_expense: float
29
  recommended_budget: float
30
  reason: str
app/smart_recommendation.py CHANGED
@@ -40,28 +40,11 @@ class SmartBudgetRecommender:
40
  # 1) Try to build stats from existing budgets for this user (createdBy)
41
  category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
 
43
- # 2) If there are no budgets, fall back to expenses history
 
44
  if not category_data:
45
- end_date = datetime(year, month, 1) - timedelta(days=1)
46
- start_date = end_date - timedelta(days=180) # ~6 months
47
-
48
- expenses = list(
49
- self.db.expenses.find(
50
- {
51
- "user_id": user_id,
52
- "date": {"$gte": start_date, "$lte": end_date},
53
- "type": "expense",
54
- }
55
- )
56
- )
57
-
58
- if not expenses:
59
- return []
60
-
61
- # Group expenses by category and calculate monthly averages
62
- category_data = self._calculate_category_statistics(
63
- expenses, start_date, end_date
64
- )
65
 
66
  recommendations: List[BudgetRecommendation] = []
67
 
@@ -87,7 +70,7 @@ class SmartBudgetRecommender:
87
  print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
88
 
89
  recommendations.append(BudgetRecommendation(
90
- category=category,
91
  average_expense=round(avg_expense, 2),
92
  recommended_budget=round(recommended_budget or 0, 2),
93
  reason=reason,
@@ -263,14 +246,40 @@ class SmartBudgetRecommender:
263
  return result
264
 
265
  def _get_category_name(self, category_id) -> str:
266
- """Look up category name from categories collection"""
 
 
 
267
  if not category_id:
268
  return "Uncategorized"
269
 
270
  try:
271
- # Try to find category in categories collection
272
- if isinstance(category_id, ObjectId):
273
- category_doc = self.db.categories.find_one({"_id": category_id})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  else:
275
  try:
276
  category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
@@ -278,11 +287,14 @@ class SmartBudgetRecommender:
278
  category_doc = self.db.categories.find_one({"_id": category_id})
279
 
280
  if category_doc:
281
- return category_doc.get("name") or category_doc.get("title") or str(category_id)
 
 
282
  except Exception as e:
283
  print(f"Error looking up category name for {category_id}: {e}")
284
  pass
285
 
 
286
  return str(category_id) if category_id else "Uncategorized"
287
 
288
  def _get_category_stats_from_budgets(
@@ -396,91 +408,26 @@ class SmartBudgetRecommender:
396
 
397
  result: Dict[str, Dict] = {}
398
  for b in budgets:
399
- # First, try to extract categories from headCategories array
400
- head_categories = b.get("headCategories", [])
401
 
402
- if head_categories and isinstance(head_categories, list):
403
- # Process nested categories from headCategories
404
- for head_cat in head_categories:
405
- if not isinstance(head_cat, dict):
406
- continue
407
-
408
- # Get headCategory ID and amounts
409
- head_cat_id = head_cat.get("headCategory")
410
- try:
411
- head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
412
- head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
413
- except (ValueError, TypeError):
414
- head_cat_max = 0
415
- head_cat_spend = 0
416
-
417
- # Process nested categories within headCategory
418
- nested_categories = head_cat.get("categories", [])
419
- if nested_categories and isinstance(nested_categories, list):
420
- for nested_cat in nested_categories:
421
- if not isinstance(nested_cat, dict):
422
- continue
423
-
424
- nested_cat_id = nested_cat.get("category")
425
- try:
426
- nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
427
- nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
428
- except (ValueError, TypeError):
429
- nested_cat_max = 0
430
- nested_cat_spend = 0
431
- spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
432
-
433
- # Only include categories with limits (must have maxAmount > 0)
434
- if nested_cat_max > 0:
435
- # Look up actual category name
436
- nested_category_name = self._get_category_name(nested_cat_id)
437
- nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
438
-
439
- if nested_category_name not in result:
440
- result[nested_category_name] = {
441
- "average_monthly": nested_base_amount,
442
- "total": nested_base_amount,
443
- "count": 1,
444
- "months_analyzed": 1,
445
- "std_dev": 0.0,
446
- "monthly_values": [nested_base_amount],
447
- }
448
- else:
449
- result[nested_category_name]["total"] += nested_base_amount
450
- result[nested_category_name]["count"] += 1
451
- result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
452
- result[nested_category_name]["average_monthly"] = (
453
- result[nested_category_name]["total"] / result[nested_category_name]["count"]
454
- )
455
- result[nested_category_name]["monthly_values"].append(nested_base_amount)
456
-
457
- # Also include headCategory if it has amounts
458
- if head_cat_max > 0 or head_cat_spend > 0:
459
- head_category_name = self._get_category_name(head_cat_id)
460
- head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
461
-
462
- if head_category_name not in result:
463
- result[head_category_name] = {
464
- "average_monthly": head_base_amount,
465
- "total": head_base_amount,
466
- "count": 1,
467
- "months_analyzed": 1,
468
- "std_dev": 0.0,
469
- "monthly_values": [head_base_amount],
470
- }
471
- else:
472
- result[head_category_name]["total"] += head_base_amount
473
- result[head_category_name]["count"] += 1
474
- result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
475
- result[head_category_name]["average_monthly"] = (
476
- result[head_category_name]["total"] / result[head_category_name]["count"]
477
- )
478
- result[head_category_name]["monthly_values"].append(head_base_amount)
479
 
480
- # Also include the main budget as a category (if it has amounts)
481
- budget_name = b.get("name", "Uncategorized")
482
- if not budget_name or budget_name == "Uncategorized":
483
- budget_name = b.get("category") or b.get("title") or "Uncategorized"
 
 
484
 
485
  # Derive a base amount from WalletSync fields
486
  try:
@@ -502,10 +449,10 @@ class SmartBudgetRecommender:
502
  else:
503
  base_amount = 0
504
 
505
- # Only add main budget if it has an amount and we haven't processed categories
506
  if base_amount > 0:
507
- if budget_name not in result:
508
- result[budget_name] = {
509
  "average_monthly": base_amount,
510
  "total": base_amount,
511
  "count": 1,
@@ -514,15 +461,15 @@ class SmartBudgetRecommender:
514
  "monthly_values": [base_amount],
515
  }
516
  else:
517
- result[budget_name]["total"] += base_amount
518
- result[budget_name]["count"] += 1
519
- result[budget_name]["months_analyzed"] = result[budget_name]["count"]
520
- result[budget_name]["average_monthly"] = (
521
- result[budget_name]["total"] / result[budget_name]["count"]
522
  )
523
- result[budget_name]["monthly_values"].append(base_amount)
524
 
525
- print(f"Processed {len(result)} budget categories for recommendations")
526
  return result
527
 
528
  def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
 
40
  # 1) Try to build stats from existing budgets for this user (createdBy)
41
  category_data = self._get_category_stats_from_budgets(user_id, month, year)
42
 
43
+ # 2) Only return recommendations for actual budgets - do NOT use expenses history
44
+ # This ensures we only show recommendations for budgets the user actually created
45
  if not category_data:
46
+ print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
47
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  recommendations: List[BudgetRecommendation] = []
50
 
 
70
  print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
71
 
72
  recommendations.append(BudgetRecommendation(
73
+ budget_name=category,
74
  average_expense=round(avg_expense, 2),
75
  recommended_budget=round(recommended_budget or 0, 2),
76
  reason=reason,
 
246
  return result
247
 
248
  def _get_category_name(self, category_id) -> str:
249
+ """
250
+ Look up category name from headCategories and categories collections.
251
+ Checks headCategories first, then categories collection.
252
+ """
253
  if not category_id:
254
  return "Uncategorized"
255
 
256
  try:
257
+ # Convert to ObjectId if it's a string
258
+ if isinstance(category_id, str):
259
+ try:
260
+ category_id_obj = ObjectId(category_id)
261
+ except (ValueError, TypeError):
262
+ category_id_obj = category_id
263
+ else:
264
+ category_id_obj = category_id
265
+
266
+ # First, try to find in headCategories collection
267
+ if isinstance(category_id_obj, ObjectId):
268
+ head_category_doc = self.db.headcategories.find_one({"_id": category_id_obj})
269
+ else:
270
+ try:
271
+ head_category_doc = self.db.headcategories.find_one({"_id": ObjectId(category_id)})
272
+ except (ValueError, TypeError):
273
+ head_category_doc = self.db.headcategories.find_one({"_id": category_id})
274
+
275
+ if head_category_doc:
276
+ category_name = head_category_doc.get("name") or head_category_doc.get("title")
277
+ if category_name:
278
+ return category_name
279
+
280
+ # If not found in headCategories, try categories collection
281
+ if isinstance(category_id_obj, ObjectId):
282
+ category_doc = self.db.categories.find_one({"_id": category_id_obj})
283
  else:
284
  try:
285
  category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
 
287
  category_doc = self.db.categories.find_one({"_id": category_id})
288
 
289
  if category_doc:
290
+ category_name = category_doc.get("name") or category_doc.get("title")
291
+ if category_name:
292
+ return category_name
293
  except Exception as e:
294
  print(f"Error looking up category name for {category_id}: {e}")
295
  pass
296
 
297
+ # If not found in either collection, return the ID as string
298
  return str(category_id) if category_id else "Uncategorized"
299
 
300
  def _get_category_stats_from_budgets(
 
408
 
409
  result: Dict[str, Dict] = {}
410
  for b in budgets:
411
+ # Extract category ID from budget (could be in category, categoryId, headCategory fields)
412
+ category_id = b.get("category") or b.get("categoryId") or b.get("headCategory") or b.get("category_id")
413
 
414
+ # Get category name from headCategories or categories collection using category ID
415
+ if category_id:
416
+ category_name = self._get_category_name(category_id)
417
+ print(f"✅ Found category ID: {category_id} -> Name: '{category_name}'")
418
+ else:
419
+ # Fallback to budget name if no category ID found
420
+ category_name = b.get("name", "Uncategorized")
421
+ if not category_name or category_name == "Uncategorized":
422
+ category_name = b.get("title") or "Uncategorized"
423
+ print(f"⚠️ No category ID found, using budget name: '{category_name}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
+ # Skip if category name is still Uncategorized or empty
426
+ if not category_name or category_name == "Uncategorized" or category_name.strip() == "":
427
+ print(f"⚠️ Skipping budget with invalid category name: {b.get('_id')}")
428
+ continue
429
+
430
+ print(f"✅ Processing budget: '{category_name}' (budget id: {b.get('_id')}, category id: {category_id})")
431
 
432
  # Derive a base amount from WalletSync fields
433
  try:
 
449
  else:
450
  base_amount = 0
451
 
452
+ # Only add budget if it has an amount - use category name as key
453
  if base_amount > 0:
454
+ if category_name not in result:
455
+ result[category_name] = {
456
  "average_monthly": base_amount,
457
  "total": base_amount,
458
  "count": 1,
 
461
  "monthly_values": [base_amount],
462
  }
463
  else:
464
+ result[category_name]["total"] += base_amount
465
+ result[category_name]["count"] += 1
466
+ result[category_name]["months_analyzed"] = result[category_name]["count"]
467
+ result[category_name]["average_monthly"] = (
468
+ result[category_name]["total"] / result[category_name]["count"]
469
  )
470
+ result[category_name]["monthly_values"].append(base_amount)
471
 
472
+ print(f"Processed {len(result)} budget categories for recommendations: {list(result.keys())}")
473
  return result
474
 
475
  def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):