Spaces:

LogicGoInfotechSpaces
/

SMART_BUDGET_RECOMMENDATION

Paused

App Files Files Community

LogicGoInfotechSpaces commited on 9 days ago

Commit

b71b4c6

1 Parent(s): 46c4337

Use category ID from budgets and lookup names from headCategories and categories collections

Browse files

Files changed (16) hide show

.history/app/smart_recommendation_20251225155108.py +580 -0
.history/app/smart_recommendation_20251225155112.py +583 -0
.history/app/smart_recommendation_20251225155130.py +583 -0
.history/app/smart_recommendation_20251225160734.py +583 -0
.history/app/smart_recommendation_20251225160759.py +583 -0
.history/app/smart_recommendation_20251225160914.py +503 -0
.history/app/smart_recommendation_20251225161000.py +508 -0
.history/app/smart_recommendation_20251225161022.py +511 -0
.history/app/smart_recommendation_20251225161052.py +491 -0
.history/app/smart_recommendation_20251225161110.py +493 -0
.history/app/smart_recommendation_20251225161134.py +493 -0
.history/app/smart_recommendation_20251225161144.py +493 -0
Smart_Budget_Recommendation_API.postman_collection.json +376 -362
app/main.py +4 -2
app/models.py +1 -1
app/smart_recommendation.py +67 -120

.history/app/smart_recommendation_20251225155108.py ADDED Viewed

	@@ -0,0 +1,580 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) If there are no budgets, fall back to expenses history
+        if not category_data:
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                category=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # First, try to extract categories from headCategories array
+            head_categories = b.get("headCategories", [])
+            if head_categories and isinstance(head_categories, list):
+                # Process nested categories from headCategories
+                for head_cat in head_categories:
+                    if not isinstance(head_cat, dict):
+                        continue
+                    # Get headCategory ID and amounts
+                    head_cat_id = head_cat.get("headCategory")
+                    try:
+                        head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
+                        head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
+                    except (ValueError, TypeError):
+                        head_cat_max = 0
+                        head_cat_spend = 0
+                    # Process nested categories within headCategory
+                    nested_categories = head_cat.get("categories", [])
+                    if nested_categories and isinstance(nested_categories, list):
+                        for nested_cat in nested_categories:
+                            if not isinstance(nested_cat, dict):
+                                continue
+                            nested_cat_id = nested_cat.get("category")
+                            try:
+                                nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
+                                nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
+                            except (ValueError, TypeError):
+                                nested_cat_max = 0
+                                nested_cat_spend = 0
+                            spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
+                            # Only include categories with limits (must have maxAmount > 0)
+                            if nested_cat_max > 0:
+                                # Look up actual category name
+                                nested_category_name = self._get_category_name(nested_cat_id)
+                                nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
+                                if nested_category_name not in result:
+                                    result[nested_category_name] = {
+                                        "average_monthly": nested_base_amount,
+                                        "total": nested_base_amount,
+                                        "count": 1,
+                                        "months_analyzed": 1,
+                                        "std_dev": 0.0,
+                                        "monthly_values": [nested_base_amount],
+                                    }
+                                else:
+                                    result[nested_category_name]["total"] += nested_base_amount
+                                    result[nested_category_name]["count"] += 1
+                                    result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
+                                    result[nested_category_name]["average_monthly"] = (
+                                        result[nested_category_name]["total"] / result[nested_category_name]["count"]
+                                    )
+                                    result[nested_category_name]["monthly_values"].append(nested_base_amount)
+                    # Also include headCategory if it has amounts
+                    if head_cat_max > 0 or head_cat_spend > 0:
+                        head_category_name = self._get_category_name(head_cat_id)
+                        head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
+                        if head_category_name not in result:
+                            result[head_category_name] = {
+                                "average_monthly": head_base_amount,
+                                "total": head_base_amount,
+                                "count": 1,
+                                "months_analyzed": 1,
+                                "std_dev": 0.0,
+                                "monthly_values": [head_base_amount],
+                            }
+                        else:
+                            result[head_category_name]["total"] += head_base_amount
+                            result[head_category_name]["count"] += 1
+                            result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
+                            result[head_category_name]["average_monthly"] = (
+                                result[head_category_name]["total"] / result[head_category_name]["count"]
+                            )
+                            result[head_category_name]["monthly_values"].append(head_base_amount)
+            # Also include the main budget as a category (if it has amounts)
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            return None
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225155112.py ADDED Viewed

	@@ -0,0 +1,583 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) If there are no budgets, fall back to expenses history
+        if not category_data:
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                category=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # First, try to extract categories from headCategories array
+            head_categories = b.get("headCategories", [])
+            if head_categories and isinstance(head_categories, list):
+                # Process nested categories from headCategories
+                for head_cat in head_categories:
+                    if not isinstance(head_cat, dict):
+                        continue
+                    # Get headCategory ID and amounts
+                    head_cat_id = head_cat.get("headCategory")
+                    try:
+                        head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
+                        head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
+                    except (ValueError, TypeError):
+                        head_cat_max = 0
+                        head_cat_spend = 0
+                    # Process nested categories within headCategory
+                    nested_categories = head_cat.get("categories", [])
+                    if nested_categories and isinstance(nested_categories, list):
+                        for nested_cat in nested_categories:
+                            if not isinstance(nested_cat, dict):
+                                continue
+                            nested_cat_id = nested_cat.get("category")
+                            try:
+                                nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
+                                nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
+                            except (ValueError, TypeError):
+                                nested_cat_max = 0
+                                nested_cat_spend = 0
+                            spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
+                            # Only include categories with limits (must have maxAmount > 0)
+                            if nested_cat_max > 0:
+                                # Look up actual category name
+                                nested_category_name = self._get_category_name(nested_cat_id)
+                                nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
+                                if nested_category_name not in result:
+                                    result[nested_category_name] = {
+                                        "average_monthly": nested_base_amount,
+                                        "total": nested_base_amount,
+                                        "count": 1,
+                                        "months_analyzed": 1,
+                                        "std_dev": 0.0,
+                                        "monthly_values": [nested_base_amount],
+                                    }
+                                else:
+                                    result[nested_category_name]["total"] += nested_base_amount
+                                    result[nested_category_name]["count"] += 1
+                                    result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
+                                    result[nested_category_name]["average_monthly"] = (
+                                        result[nested_category_name]["total"] / result[nested_category_name]["count"]
+                                    )
+                                    result[nested_category_name]["monthly_values"].append(nested_base_amount)
+                    # Also include headCategory if it has amounts
+                    if head_cat_max > 0 or head_cat_spend > 0:
+                        head_category_name = self._get_category_name(head_cat_id)
+                        head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
+                        if head_category_name not in result:
+                            result[head_category_name] = {
+                                "average_monthly": head_base_amount,
+                                "total": head_base_amount,
+                                "count": 1,
+                                "months_analyzed": 1,
+                                "std_dev": 0.0,
+                                "monthly_values": [head_base_amount],
+                            }
+                        else:
+                            result[head_category_name]["total"] += head_base_amount
+                            result[head_category_name]["count"] += 1
+                            result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
+                            result[head_category_name]["average_monthly"] = (
+                                result[head_category_name]["total"] / result[head_category_name]["count"]
+                            )
+                            result[head_category_name]["monthly_values"].append(head_base_amount)
+            # Also include the main budget as a category (if it has amounts)
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225155130.py ADDED Viewed

	@@ -0,0 +1,583 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) If there are no budgets, fall back to expenses history
+        if not category_data:
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                category=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # First, try to extract categories from headCategories array
+            head_categories = b.get("headCategories", [])
+            if head_categories and isinstance(head_categories, list):
+                # Process nested categories from headCategories
+                for head_cat in head_categories:
+                    if not isinstance(head_cat, dict):
+                        continue
+                    # Get headCategory ID and amounts
+                    head_cat_id = head_cat.get("headCategory")
+                    try:
+                        head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
+                        head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
+                    except (ValueError, TypeError):
+                        head_cat_max = 0
+                        head_cat_spend = 0
+                    # Process nested categories within headCategory
+                    nested_categories = head_cat.get("categories", [])
+                    if nested_categories and isinstance(nested_categories, list):
+                        for nested_cat in nested_categories:
+                            if not isinstance(nested_cat, dict):
+                                continue
+                            nested_cat_id = nested_cat.get("category")
+                            try:
+                                nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
+                                nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
+                            except (ValueError, TypeError):
+                                nested_cat_max = 0
+                                nested_cat_spend = 0
+                            spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
+                            # Only include categories with limits (must have maxAmount > 0)
+                            if nested_cat_max > 0:
+                                # Look up actual category name
+                                nested_category_name = self._get_category_name(nested_cat_id)
+                                nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
+                                if nested_category_name not in result:
+                                    result[nested_category_name] = {
+                                        "average_monthly": nested_base_amount,
+                                        "total": nested_base_amount,
+                                        "count": 1,
+                                        "months_analyzed": 1,
+                                        "std_dev": 0.0,
+                                        "monthly_values": [nested_base_amount],
+                                    }
+                                else:
+                                    result[nested_category_name]["total"] += nested_base_amount
+                                    result[nested_category_name]["count"] += 1
+                                    result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
+                                    result[nested_category_name]["average_monthly"] = (
+                                        result[nested_category_name]["total"] / result[nested_category_name]["count"]
+                                    )
+                                    result[nested_category_name]["monthly_values"].append(nested_base_amount)
+                    # Also include headCategory if it has amounts
+                    if head_cat_max > 0 or head_cat_spend > 0:
+                        head_category_name = self._get_category_name(head_cat_id)
+                        head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
+                        if head_category_name not in result:
+                            result[head_category_name] = {
+                                "average_monthly": head_base_amount,
+                                "total": head_base_amount,
+                                "count": 1,
+                                "months_analyzed": 1,
+                                "std_dev": 0.0,
+                                "monthly_values": [head_base_amount],
+                            }
+                        else:
+                            result[head_category_name]["total"] += head_base_amount
+                            result[head_category_name]["count"] += 1
+                            result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
+                            result[head_category_name]["average_monthly"] = (
+                                result[head_category_name]["total"] / result[head_category_name]["count"]
+                            )
+                            result[head_category_name]["monthly_values"].append(head_base_amount)
+            # Also include the main budget as a category (if it has amounts)
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225160734.py ADDED Viewed

	@@ -0,0 +1,583 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) If there are no budgets, fall back to expenses history
+        if not category_data:
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # First, try to extract categories from headCategories array
+            head_categories = b.get("headCategories", [])
+            if head_categories and isinstance(head_categories, list):
+                # Process nested categories from headCategories
+                for head_cat in head_categories:
+                    if not isinstance(head_cat, dict):
+                        continue
+                    # Get headCategory ID and amounts
+                    head_cat_id = head_cat.get("headCategory")
+                    try:
+                        head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
+                        head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
+                    except (ValueError, TypeError):
+                        head_cat_max = 0
+                        head_cat_spend = 0
+                    # Process nested categories within headCategory
+                    nested_categories = head_cat.get("categories", [])
+                    if nested_categories and isinstance(nested_categories, list):
+                        for nested_cat in nested_categories:
+                            if not isinstance(nested_cat, dict):
+                                continue
+                            nested_cat_id = nested_cat.get("category")
+                            try:
+                                nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
+                                nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
+                            except (ValueError, TypeError):
+                                nested_cat_max = 0
+                                nested_cat_spend = 0
+                            spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
+                            # Only include categories with limits (must have maxAmount > 0)
+                            if nested_cat_max > 0:
+                                # Look up actual category name
+                                nested_category_name = self._get_category_name(nested_cat_id)
+                                nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
+                                if nested_category_name not in result:
+                                    result[nested_category_name] = {
+                                        "average_monthly": nested_base_amount,
+                                        "total": nested_base_amount,
+                                        "count": 1,
+                                        "months_analyzed": 1,
+                                        "std_dev": 0.0,
+                                        "monthly_values": [nested_base_amount],
+                                    }
+                                else:
+                                    result[nested_category_name]["total"] += nested_base_amount
+                                    result[nested_category_name]["count"] += 1
+                                    result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
+                                    result[nested_category_name]["average_monthly"] = (
+                                        result[nested_category_name]["total"] / result[nested_category_name]["count"]
+                                    )
+                                    result[nested_category_name]["monthly_values"].append(nested_base_amount)
+                    # Also include headCategory if it has amounts
+                    if head_cat_max > 0 or head_cat_spend > 0:
+                        head_category_name = self._get_category_name(head_cat_id)
+                        head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
+                        if head_category_name not in result:
+                            result[head_category_name] = {
+                                "average_monthly": head_base_amount,
+                                "total": head_base_amount,
+                                "count": 1,
+                                "months_analyzed": 1,
+                                "std_dev": 0.0,
+                                "monthly_values": [head_base_amount],
+                            }
+                        else:
+                            result[head_category_name]["total"] += head_base_amount
+                            result[head_category_name]["count"] += 1
+                            result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
+                            result[head_category_name]["average_monthly"] = (
+                                result[head_category_name]["total"] / result[head_category_name]["count"]
+                            )
+                            result[head_category_name]["monthly_values"].append(head_base_amount)
+            # Also include the main budget as a category (if it has amounts)
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225160759.py ADDED Viewed

	@@ -0,0 +1,583 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) If there are no budgets, fall back to expenses history
+        if not category_data:
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # First, try to extract categories from headCategories array
+            head_categories = b.get("headCategories", [])
+            if head_categories and isinstance(head_categories, list):
+                # Process nested categories from headCategories
+                for head_cat in head_categories:
+                    if not isinstance(head_cat, dict):
+                        continue
+                    # Get headCategory ID and amounts
+                    head_cat_id = head_cat.get("headCategory")
+                    try:
+                        head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
+                        head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
+                    except (ValueError, TypeError):
+                        head_cat_max = 0
+                        head_cat_spend = 0
+                    # Process nested categories within headCategory
+                    nested_categories = head_cat.get("categories", [])
+                    if nested_categories and isinstance(nested_categories, list):
+                        for nested_cat in nested_categories:
+                            if not isinstance(nested_cat, dict):
+                                continue
+                            nested_cat_id = nested_cat.get("category")
+                            try:
+                                nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
+                                nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
+                            except (ValueError, TypeError):
+                                nested_cat_max = 0
+                                nested_cat_spend = 0
+                            spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
+                            # Only include categories with limits (must have maxAmount > 0)
+                            if nested_cat_max > 0:
+                                # Look up actual category name
+                                nested_category_name = self._get_category_name(nested_cat_id)
+                                nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
+                                if nested_category_name not in result:
+                                    result[nested_category_name] = {
+                                        "average_monthly": nested_base_amount,
+                                        "total": nested_base_amount,
+                                        "count": 1,
+                                        "months_analyzed": 1,
+                                        "std_dev": 0.0,
+                                        "monthly_values": [nested_base_amount],
+                                    }
+                                else:
+                                    result[nested_category_name]["total"] += nested_base_amount
+                                    result[nested_category_name]["count"] += 1
+                                    result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
+                                    result[nested_category_name]["average_monthly"] = (
+                                        result[nested_category_name]["total"] / result[nested_category_name]["count"]
+                                    )
+                                    result[nested_category_name]["monthly_values"].append(nested_base_amount)
+                    # Also include headCategory if it has amounts
+                    if head_cat_max > 0 or head_cat_spend > 0:
+                        head_category_name = self._get_category_name(head_cat_id)
+                        head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
+                        if head_category_name not in result:
+                            result[head_category_name] = {
+                                "average_monthly": head_base_amount,
+                                "total": head_base_amount,
+                                "count": 1,
+                                "months_analyzed": 1,
+                                "std_dev": 0.0,
+                                "monthly_values": [head_base_amount],
+                            }
+                        else:
+                            result[head_category_name]["total"] += head_base_amount
+                            result[head_category_name]["count"] += 1
+                            result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
+                            result[head_category_name]["average_monthly"] = (
+                                result[head_category_name]["total"] / result[head_category_name]["count"]
+                            )
+                            result[head_category_name]["monthly_values"].append(head_base_amount)
+            # Also include the main budget as a category (if it has amounts)
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225160914.py ADDED Viewed

	@@ -0,0 +1,503 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) If there are no budgets, fall back to expenses history
+        if not category_data:
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # Only use the main budget name - don't extract nested categories from headCategories
+            # This ensures we only return recommendations for budgets the user actually created
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225161000.py ADDED Viewed

	@@ -0,0 +1,508 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) If there are no budgets, fall back to expenses history
+        if not category_data:
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # Only use the main budget name - don't extract nested categories from headCategories
+            # This ensures we only return recommendations for budgets the user actually created
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Skip if budget name is still Uncategorized or empty
+            if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
+                print(f"Skipping budget with invalid name: {b.get('_id')}")
+                continue
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225161022.py ADDED Viewed

	@@ -0,0 +1,511 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) Only return recommendations for actual budgets - do NOT use expenses history
+        # This ensures we only show recommendations for budgets the user actually created
+        if not category_data:
+            print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
+            return []
+            end_date = datetime(year, month, 1) - timedelta(days=1)
+            start_date = end_date - timedelta(days=180)  # ~6 months
+            expenses = list(
+                self.db.expenses.find(
+                    {
+                        "user_id": user_id,
+                        "date": {"$gte": start_date, "$lte": end_date},
+                        "type": "expense",
+                    }
+                )
+            )
+            if not expenses:
+                return []
+            # Group expenses by category and calculate monthly averages
+            category_data = self._calculate_category_statistics(
+                expenses, start_date, end_date
+            )
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # Only use the main budget name - don't extract nested categories from headCategories
+            # This ensures we only return recommendations for budgets the user actually created
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Skip if budget name is still Uncategorized or empty
+            if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
+                print(f"Skipping budget with invalid name: {b.get('_id')}")
+                continue
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225161052.py ADDED Viewed

	@@ -0,0 +1,491 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) Only return recommendations for actual budgets - do NOT use expenses history
+        # This ensures we only show recommendations for budgets the user actually created
+        if not category_data:
+            print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
+            return []
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # Only use the main budget name - don't extract nested categories from headCategories
+            # This ensures we only return recommendations for budgets the user actually created
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Skip if budget name is still Uncategorized or empty
+            if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
+                print(f"Skipping budget with invalid name: {b.get('_id')}")
+                continue
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225161110.py ADDED Viewed

	@@ -0,0 +1,493 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) Only return recommendations for actual budgets - do NOT use expenses history
+        # This ensures we only show recommendations for budgets the user actually created
+        if not category_data:
+            print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
+            return []
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # Only use the main budget name - don't extract nested categories from headCategories
+            # This ensures we only return recommendations for budgets the user actually created
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Skip if budget name is still Uncategorized or empty
+            if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
+                print(f"⚠️ Skipping budget with invalid name: {b.get('_id')}")
+                continue
+            print(f"✅ Processing budget: '{budget_name}' (id: {b.get('_id')})")
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225161134.py ADDED Viewed

	@@ -0,0 +1,493 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) Only return recommendations for actual budgets - do NOT use expenses history
+        # This ensures we only show recommendations for budgets the user actually created
+        if not category_data:
+            print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
+            return []
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # Only use the main budget name - don't extract nested categories from headCategories
+            # This ensures we only return recommendations for budgets the user actually created
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Skip if budget name is still Uncategorized or empty
+            if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
+                print(f"⚠️ Skipping budget with invalid name: {b.get('_id')}")
+                continue
+            print(f"✅ Processing budget: '{budget_name}' (id: {b.get('_id')})")
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"✅ Processed {len(result)} budget categories for recommendations: {list(result.keys())}")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

.history/app/smart_recommendation_20251225161144.py ADDED Viewed

	@@ -0,0 +1,493 @@

+import json
+import math
+import os
+from collections import defaultdict
+from datetime import datetime, timedelta
+from typing import Dict, List
+import requests
+from dotenv import load_dotenv
+from bson import ObjectId
+from app.models import BudgetRecommendation, CategoryExpense
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+class SmartBudgetRecommender:
+    """
+    Smart Budget Recommendation Engine
+    Analyzes past spending behavior and recommends personalized budgets
+    for each category based on historical data.
+    """
+    def __init__(self, db):
+        self.db = db
+    def get_recommendations(self, user_id: str, month: int, year: int) -> List[BudgetRecommendation]:
+        """
+        Get budget recommendations for all categories based on past behavior.
+        Args:
+            user_id: User identifier
+            month: Target month (1-12)
+            year: Target year
+        Returns:
+            List of budget recommendations for each category
+        """
+        # 1) Try to build stats from existing budgets for this user (createdBy)
+        category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) Only return recommendations for actual budgets - do NOT use expenses history
+        # This ensures we only show recommendations for budgets the user actually created
+        if not category_data:
+            print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
+            return []
+        recommendations: List[BudgetRecommendation] = []
+        for category, data in category_data.items():
+            avg_expense = data["average_monthly"]
+            confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
+            ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
+                recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
+                action = ai_result.get("action")
+                print(f"✅ OpenAI recommendation for {category}: {recommended_budget} (action: {action})")
+            else:
+                # Fallback to rule-based recommendation if OpenAI fails
+                recommended_budget = self._calculate_recommended_budget(avg_expense, data)
+                reason = self._generate_reason(category, avg_expense, recommended_budget)
+                action = None
+                if not ai_result:
+                    print(f"❌ OpenAI unavailable (no API key or error), using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
+            recommendations.append(BudgetRecommendation(
+                budget_name=category,
+                average_expense=round(avg_expense, 2),
+                recommended_budget=round(recommended_budget or 0, 2),
+                reason=reason,
+                confidence=confidence,
+                action=action
+            ))
+        # Sort by average expense (highest first)
+        recommendations.sort(key=lambda x: x.average_expense, reverse=True)
+        return recommendations
+    def _calculate_category_statistics(self, expenses: List[Dict], start_date: datetime, end_date: datetime) -> Dict:
+        """Calculate statistics for each category"""
+        category_data = defaultdict(lambda: {
+            "total": 0,
+            "count": 0,
+            "months": set(),
+            "monthly_totals": defaultdict(float)
+        })
+        for expense in expenses:
+            category = expense.get("category", "Uncategorized")
+            amount = expense.get("amount", 0)
+            date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
+            if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
+            category_data[category]["total"] += amount
+            category_data[category]["count"] += 1
+            # Track monthly totals
+            month_key = (date.year, date.month)
+            category_data[category]["months"].add(month_key)
+            category_data[category]["monthly_totals"][month_key] += amount
+        # Calculate averages
+        result = {}
+        for category, data in category_data.items():
+            num_months = len(data["months"]) or 1
+            avg_monthly = data["total"] / num_months
+            # Calculate standard deviation for variability
+            monthly_values = list(data["monthly_totals"].values())
+            if len(monthly_values) > 1:
+                mean = sum(monthly_values) / len(monthly_values)
+                variance = sum((x - mean) ** 2 for x in monthly_values) / len(monthly_values)
+                std_dev = math.sqrt(variance)
+            else:
+                std_dev = 0
+            result[category] = {
+                "average_monthly": avg_monthly,
+                "total": data["total"],
+                "count": data["count"],
+                "months_analyzed": num_months,
+                "std_dev": std_dev,
+                "monthly_values": monthly_values
+            }
+        return result
+    def _calculate_recommended_budget(self, avg_expense: float, data: Dict) -> float:
+        """
+        Calculate recommended budget based on average expense.
+        Strategy:
+        - Base: Average monthly expense
+        - Add 5% buffer for variability
+        - Round to nearest 100 for cleaner numbers
+        """
+        # Add 5% buffer to handle variability
+        buffer = avg_expense * 0.05
+        # If there's high variability (std_dev > 20% of mean), add more buffer
+        if data["std_dev"] > 0:
+            coefficient_of_variation = data["std_dev"] / avg_expense if avg_expense > 0 else 0
+            if coefficient_of_variation > 0.2:
+                buffer = avg_expense * 0.10  # 10% buffer for high variability
+        recommended = avg_expense + buffer
+        # Round to nearest 100 for cleaner budget numbers
+        recommended = round(recommended / 100) * 100
+        # Ensure minimum of 100 if there was any expense
+        if recommended < 100 and avg_expense > 0:
+            recommended = 100
+        return recommended
+    def _calculate_confidence(self, data: Dict) -> float:
+        """
+        Calculate confidence score (0-1) based on data quality.
+        Factors:
+        - Number of months analyzed (more = higher confidence)
+        - Number of transactions (more = higher confidence)
+        - Consistency of spending (lower std_dev = higher confidence)
+        """
+        months_score = min(data["months_analyzed"] / 6, 1.0)  # Max at 6 months
+        count_score = min(data["count"] / 10, 1.0)  # Max at 10 transactions
+        # Consistency score (inverse of coefficient of variation)
+        if data["average_monthly"] > 0:
+            cv = data["std_dev"] / data["average_monthly"]
+            consistency_score = max(0, 1 - min(cv, 1.0))  # Lower CV = higher score
+        else:
+            consistency_score = 0.5
+        # Weighted average
+        confidence = (months_score * 0.4 + count_score * 0.3 + consistency_score * 0.3)
+        return round(confidence, 2)
+    def _generate_reason(self, category: str, avg_expense: float, recommended_budget: float) -> str:
+        """Generate human-readable reason for the recommendation"""
+        # Format amounts with currency symbol
+        avg_formatted = f"Rs.{avg_expense:,.0f}"
+        budget_formatted = f"Rs.{recommended_budget:,.0f}"
+        if recommended_budget > avg_expense:
+            buffer = recommended_budget - avg_expense
+            buffer_pct = (buffer / avg_expense * 100) if avg_expense > 0 else 0
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We suggest setting your budget to {budget_formatted} for next month "
+                f"(includes a {buffer_pct:.0f}% buffer for variability)."
+            )
+        else:
+            return (
+                f"Your average monthly {category.lower()} expense is {avg_formatted}. "
+                f"We recommend a budget of {budget_formatted} for next month."
+            )
+    def get_category_averages(self, user_id: str, months: int = 3) -> List[CategoryExpense]:
+        """Get average expenses by category for the past N months"""
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=months * 30)
+        expenses = list(self.db.expenses.find({
+            "user_id": user_id,
+            "date": {"$gte": start_date, "$lte": end_date},
+            "type": "expense"
+        }))
+        if not expenses:
+            return []
+        category_data = self._calculate_category_statistics(expenses, start_date, end_date)
+        result = []
+        for category, data in category_data.items():
+            result.append(CategoryExpense(
+                category=category,
+                average_monthly_expense=round(data["average_monthly"], 2),
+                total_expenses=data["count"],
+                months_analyzed=data["months_analyzed"]
+            ))
+        result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
+        return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
+    def _get_category_stats_from_budgets(
+        self, user_id: str, month: int, year: int
+    ) -> Dict:
+        """
+        Build category stats from existing budgets for this user.
+        We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
+        as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
+        """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
+        try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
+        if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
+            return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
+        result: Dict[str, Dict] = {}
+        for b in budgets:
+            # Only use the main budget name - don't extract nested categories from headCategories
+            # This ensures we only return recommendations for budgets the user actually created
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
+            # Skip if budget name is still Uncategorized or empty
+            if not budget_name or budget_name == "Uncategorized" or budget_name.strip() == "":
+                print(f"⚠️ Skipping budget with invalid name: {b.get('_id')}")
+                continue
+            print(f"✅ Processing budget: '{budget_name}' (id: {b.get('_id')})")
+            # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
+            else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"✅ Processed {len(result)} budget categories for recommendations: {list(result.keys())}")
+        return result
+    def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
+        """Use OpenAI to refine the budget recommendation."""
+        if not OPENAI_API_KEY:
+            print(f"⚠️ OpenAI API key not found in environment variables for category: {category}")
+            return None
+        print(f"🔄 Calling OpenAI API for category: {category}...")
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
+        summary = (
+            f"Category: {category}\n"
+            f"Monthly totals: [{history}]\n"
+            f"Average spend: {avg_expense:.2f}\n"
+            f"Std deviation: {data['std_dev']:.2f}\n"
+            f"Months observed: {data['months_analyzed']}\n"
+        )
+        prompt = (
+            "You are an Indian personal finance coach. "
+            "Given the user's spending history, decide whether to increase, decrease, "
+            "or keep the upcoming month's budget and provide a short explanation. "
+            "Respond strictly as JSON with the following keys:\n"
+            '{ "recommended_budget": number, "action": "increase|decrease|keep", "reason": "string" }.\n'
+            "Use rupees for all amounts.\n\n"
+            f"{summary}"
+        )
+        try:
+            response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {OPENAI_API_KEY}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
+                    "temperature": 0.1,
+                    "response_format": {"type": "json_object"},
+                },
+                timeout=30,
+            )
+            response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
+            return json.loads(content)
+        except Exception as exc:
+            print(f"OpenAI recommendation error for {category}: {exc}")
+            return None

Smart_Budget_Recommendation_API.postman_collection.json CHANGED Viewed

@@ -1,384 +1,398 @@
 {
 	"info": {
-		"_postman_id": "smart-budget-recommendation-api",
-		"name": "Smart Budget Recommendation API",
-		"description": "API collection for Smart Budget Recommendation service deployed on Hugging Face",
-		"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
 	},
-	"item": [
 		{
-			"name": "Health Check",
-			"request": {
-				"method": "GET",
-				"header": [],
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/health",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						"health"
-					]
-				},
-				"description": "Check if the API and database are running"
 			}
-		},
-		{
-			"name": "Root Endpoint",
-			"request": {
-				"method": "GET",
-				"header": [],
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						""
-					]
-				}
 			}
-		},
-		{
-			"name": "Create Expense",
 			"request": {
-				"method": "POST",
-				"header": [
-					{
-						"key": "Content-Type",
-						"value": "application/json"
-					}
 				],
-				"body": {
-					"mode": "raw",
-					"raw": "{\n    \"user_id\": \"68a834c3f4694b11efedacd2\",\n    \"amount\": 3800,\n    \"category\": \"Groceries\",\n    \"description\": \"Monthly groceries\",\n    \"date\": \"2025-01-15T00:00:00\",\n    \"type\": \"expense\"\n}"
-				},
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						"expenses"
-					]
-				},
-				"description": "Create a new expense record"
 			}
-		},
-		{
-			"name": "Get Expenses",
 			"request": {
-				"method": "GET",
-				"header": [],
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses?user_id=68a834c3f4694b11efedacd2&limit=20",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						"expenses"
-					],
-					"query": [
-						{
-							"key": "user_id",
-							"value": "68a834c3f4694b11efedacd2",
-							"description": "User identifier"
-						},
-						{
-							"key": "limit",
-							"value": "20",
-							"description": "Maximum number of expenses to return"
-						}
-					]
-				},
-				"description": "Get expenses for a specific user"
-			}
-		},
-		{
-			"name": "Create Budget",
-			"request": {
-				"method": "POST",
-				"header": [
-					{
-						"key": "Content-Type",
-						"value": "application/json"
-					}
 				],
-				"body": {
-					"mode": "raw",
-					"raw": "{\n    \"user_id\": \"68a834c3f4694b11efedacd2\",\n    \"category\": \"Groceries\",\n    \"amount\": 4000,\n    \"period\": \"monthly\",\n    \"start_date\": \"2025-02-01T00:00:00\",\n    \"end_date\": \"2025-02-29T00:00:00\"\n}"
-				},
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						"budgets"
-					]
-				},
-				"description": "Create a new budget"
 			}
-		},
-		{
-			"name": "Get Budgets",
 			"request": {
-				"method": "GET",
-				"header": [],
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets?user_id=68a834c3f4694b11efedacd2",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						"budgets"
-					],
-					"query": [
-						{
-							"key": "user_id",
-							"value": "68a834c3f4694b11efedacd2"
-						}
-					]
-				},
-				"description": "Get budgets for a specific user"
 			}
-		},
-		{
-			"name": "Get Smart Budget Recommendations",
 			"request": {
-				"method": "GET",
-				"header": [],
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/recommendations/68a834c3f4694b11efedacd2?month=2&year=2025",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						"recommendations",
-						"68a834c3f4694b11efedacd2"
-					],
-					"query": [
-						{
-							"key": "month",
-							"value": "2",
-							"description": "Target month (1-12), optional - defaults to next month"
-						},
-						{
-							"key": "year",
-							"value": "2025",
-							"description": "Target year, optional - defaults to next year"
-						}
-					]
-				},
-				"description": "Get smart budget recommendations based on past spending behavior. Requires at least 2-3 months of expense data."
 			}
-		},
-		{
-			"name": "Get Category Expenses",
 			"request": {
-				"method": "GET",
-				"header": [],
-				"url": {
-					"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/category-expenses/68a834c3f4694b11efedacd2?months=3",
-					"protocol": "https",
-					"host": [
-						"logicgoinfotechspaces-smart-budget-recommendation",
-						"hf",
-						"space"
-					],
-					"path": [
-						"category-expenses",
-						"68a834c3f4694b11efedacd2"
-					],
-					"query": [
-						{
-							"key": "months",
-							"value": "3",
-							"description": "Number of months to analyze (default: 3)"
-						}
-					]
-				},
-				"description": "Get average expenses by category for the past N months"
-			}
-		},
-		{
-			"name": "Sample Expenses - Create Multiple",
-			"item": [
-				{
-					"name": "Groceries - Month 1 (Sept 2024)",
-					"request": {
-						"method": "POST",
-						"header": [
-							{
-								"key": "Content-Type",
-								"value": "application/json"
-							}
-						],
-						"body": {
-							"mode": "raw",
-							"raw": "{\n    \"user_id\": \"68a834c3f4694b11efedacd2\",\n    \"amount\": 3500,\n    \"category\": \"Groceries\",\n    \"description\": \"Monthly groceries - September 2024\",\n    \"date\": \"2024-09-15T00:00:00\",\n    \"type\": \"expense\"\n}"
-						},
-						"url": {
-							"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
-							"protocol": "https",
-							"host": [
-								"logicgoinfotechspaces-smart-budget-recommendation",
-								"hf",
-								"space"
-							],
-							"path": [
-								"expenses"
-							]
-						}
-					}
-				},
-				{
-					"name": "Groceries - Month 2 (Oct 2024)",
-					"request": {
-						"method": "POST",
-						"header": [
-							{
-								"key": "Content-Type",
-								"value": "application/json"
-							}
-						],
-						"body": {
-							"mode": "raw",
-							"raw": "{\n    \"user_id\": \"68a834c3f4694b11efedacd2\",\n    \"amount\": 3800,\n    \"category\": \"Groceries\",\n    \"description\": \"Monthly groceries - October 2024\",\n    \"date\": \"2024-10-15T00:00:00\",\n    \"type\": \"expense\"\n}"
-						},
-						"url": {
-							"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
-							"protocol": "https",
-							"host": [
-								"logicgoinfotechspaces-smart-budget-recommendation",
-								"hf",
-								"space"
-							],
-							"path": [
-								"expenses"
-							]
-						}
-					}
-				},
 				{
-					"name": "Groceries - Month 3 (Nov 2024)",
-					"request": {
-						"method": "POST",
-						"header": [
-							{
-								"key": "Content-Type",
-								"value": "application/json"
-							}
-						],
-						"body": {
-							"mode": "raw",
-							"raw": "{\n    \"user_id\": \"68a834c3f4694b11efedacd2\",\n    \"amount\": 4000,\n    \"category\": \"Groceries\",\n    \"description\": \"Monthly groceries - November 2024\",\n    \"date\": \"2024-11-15T00:00:00\",\n    \"type\": \"expense\"\n}"
-						},
-						"url": {
-							"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
-							"protocol": "https",
-							"host": [
-								"logicgoinfotechspaces-smart-budget-recommendation",
-								"hf",
-								"space"
-							],
-							"path": [
-								"expenses"
-							]
-						}
-					}
-				},
-				{
-					"name": "Transport - Month 1 (Sept 2024)",
-					"request": {
-						"method": "POST",
-						"header": [
-							{
-								"key": "Content-Type",
-								"value": "application/json"
-							}
-						],
-						"body": {
-							"mode": "raw",
-							"raw": "{\n    \"user_id\": \"68a834c3f4694b11efedacd2\",\n    \"amount\": 2000,\n    \"category\": \"Transport\",\n    \"description\": \"Monthly transport - September 2024\",\n    \"date\": \"2024-09-20T00:00:00\",\n    \"type\": \"expense\"\n}"
-						},
-						"url": {
-							"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
-							"protocol": "https",
-							"host": [
-								"logicgoinfotechspaces-smart-budget-recommendation",
-								"hf",
-								"space"
-							],
-							"path": [
-								"expenses"
-							]
-						}
-					}
-				},
-				{
-					"name": "Transport - Month 2 (Oct 2024)",
-					"request": {
-						"method": "POST",
-						"header": [
-							{
-								"key": "Content-Type",
-								"value": "application/json"
-							}
-						],
-						"body": {
-							"mode": "raw",
-							"raw": "{\n    \"user_id\": \"68a834c3f4694b11efedacd2\",\n    \"amount\": 2200,\n    \"category\": \"Transport\",\n    \"description\": \"Monthly transport - October 2024\",\n    \"date\": \"2024-10-20T00:00:00\",\n    \"type\": \"expense\"\n}"
-						},
-						"url": {
-							"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
-							"protocol": "https",
-							"host": [
-								"logicgoinfotechspaces-smart-budget-recommendation",
-								"hf",
-								"space"
-							],
-							"path": [
-								"expenses"
-							]
-						}
-					}
 				}
-			]
-		}
 	],
 	"variable": [
-		{
-			"key": "base_url",
-			"value": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space",
-			"type": "string"
-		},
-		{
-			"key": "user_id",
-			"value": "68a834c3f4694b11efedacd2",
-			"type": "string"
-		}
 	]
-}

 {
 	"info": {
+	  "_postman_id": "smart-budget-recommendation-api",
+	  "name": "Smart Budget Recommendation API",
+	  "description": "API collection for Smart Budget Recommendation service deployed on Hugging Face",
+	  "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
 	},
+	"auth": {
+	  "type": "bearer",
+	  "bearer": [
 		{
+		  "key": "token",
+		  "value": "{{hf_token}}",
+		  "type": "string"
+		}
+	  ]
+	},
+	"item": [
+	  {
+		"name": "Health Check",
+		"request": {
+		  "method": "GET",
+		  "header": [],
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/health",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  "health"
+			]
+		  },
+		  "description": "Check if the API and database are running"
+		}
+	  },
+	  {
+		"name": "Root Endpoint",
+		"request": {
+		  "method": "GET",
+		  "header": [],
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  ""
+			]
+		  }
+		}
+	  },
+	  {
+		"name": "Create Expense",
+		"request": {
+		  "method": "POST",
+		  "header": [
+			{
+			  "key": "Content-Type",
+			  "value": "application/json"
 			}
+		  ],
+		  "body": {
+			"mode": "raw",
+			"raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"amount\": 3800,\n  \"category\": \"Groceries\",\n  \"description\": \"Monthly groceries\",\n  \"date\": \"2025-01-15T00:00:00\",\n  \"type\": \"expense\"\n}"
+		  },
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  "expenses"
+			]
+		  },
+		  "description": "Create a new expense record"
+		}
+	  },
+	  {
+		"name": "Get Expenses",
+		"request": {
+		  "method": "GET",
+		  "header": [],
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses?user_id={{user_id}}&limit=20",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  "expenses"
+			],
+			"query": [
+			  {
+				"key": "user_id",
+				"value": "{{user_id}}",
+				"description": "User identifier"
+			  },
+			  {
+				"key": "limit",
+				"value": "20",
+				"description": "Maximum number of expenses to return"
+			  }
+			]
+		  },
+		  "description": "Get expenses for a specific user"
+		}
+	  },
+	  {
+		"name": "Create Budget",
+		"request": {
+		  "method": "POST",
+		  "header": [
+			{
+			  "key": "Content-Type",
+			  "value": "application/json"
 			}
+		  ],
+		  "body": {
+			"mode": "raw",
+			"raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"category\": \"Groceries\",\n  \"amount\": 4000,\n  \"period\": \"monthly\",\n  \"start_date\": \"2025-02-01T00:00:00\",\n  \"end_date\": \"2025-02-28T00:00:00\"\n}"
+		  },
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  "budgets"
+			]
+		  },
+		  "description": "Create a new budget"
+		}
+	  },
+	  {
+		"name": "Get Budgets",
+		"request": {
+		  "method": "GET",
+		  "header": [],
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/budgets?user_id={{user_id}}",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  "budgets"
+			],
+			"query": [
+			  {
+				"key": "user_id",
+				"value": "{{user_id}}"
+			  }
+			]
+		  },
+		  "description": "Get budgets for a specific user"
+		}
+	  },
+	  {
+		"name": "Get Smart Budget Recommendations",
+		"request": {
+		  "method": "GET",
+		  "header": [],
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/recommendations/{{user_id}}?month=2&year=2025",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  "recommendations",
+			  "{{user_id}}"
+			],
+			"query": [
+			  {
+				"key": "month",
+				"value": "2",
+				"description": "Target month (1-12), optional - defaults to next month"
+			  },
+			  {
+				"key": "year",
+				"value": "2025",
+				"description": "Target year, optional - defaults to next year"
+			  }
+			]
+		  },
+		  "description": "Get smart budget recommendations based on past spending behavior. Uses expenses, then budgets as fallback."
+		}
+	  },
+	  {
+		"name": "Get Category Expenses",
+		"request": {
+		  "method": "GET",
+		  "header": [],
+		  "url": {
+			"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/category-expenses/{{user_id}}?months=3",
+			"protocol": "https",
+			"host": [
+			  "logicgoinfotechspaces-smart-budget-recommendation",
+			  "hf",
+			  "space"
+			],
+			"path": [
+			  "category-expenses",
+			  "{{user_id}}"
+			],
+			"query": [
+			  {
+				"key": "months",
+				"value": "3",
+				"description": "Number of months to analyze (default: 3)"
+			  }
+			]
+		  },
+		  "description": "Get average expenses by category for the past N months"
+		}
+	  },
+	  {
+		"name": "Sample Expenses - Create Multiple",
+		"item": [
+		  {
+			"name": "Groceries - Month 1 (Sept 2024)",
 			"request": {
+			  "method": "POST",
+			  "header": [
+				{
+				  "key": "Content-Type",
+				  "value": "application/json"
+				}
+			  ],
+			  "body": {
+				"mode": "raw",
+				"raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"amount\": 3500,\n  \"category\": \"Groceries\",\n  \"description\": \"Monthly groceries - September 2024\",\n  \"date\": \"2024-09-15T00:00:00\",\n  \"type\": \"expense\"\n}"
+			  },
+			  "url": {
+				"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
+				"protocol": "https",
+				"host": [
+				  "logicgoinfotechspaces-smart-budget-recommendation",
+				  "hf",
+				  "space"
 				],
+				"path": [
+				  "expenses"
+				]
+			  }
 			}
+		  },
+		  {
+			"name": "Groceries - Month 2 (Oct 2024)",
 			"request": {
+			  "method": "POST",
+			  "header": [
+				{
+				  "key": "Content-Type",
+				  "value": "application/json"
+				}
+			  ],
+			  "body": {
+				"mode": "raw",
+				"raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"amount\": 3800,\n  \"category\": \"Groceries\",\n  \"description\": \"Monthly groceries - October 2024\",\n  \"date\": \"2024-10-15T00:00:00\",\n  \"type\": \"expense\"\n}"
+			  },
+			  "url": {
+				"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
+				"protocol": "https",
+				"host": [
+				  "logicgoinfotechspaces-smart-budget-recommendation",
+				  "hf",
+				  "space"
 				],
+				"path": [
+				  "expenses"
+				]
+			  }
 			}
+		  },
+		  {
+			"name": "Groceries - Month 3 (Nov 2024)",
 			"request": {
+			  "method": "POST",
+			  "header": [
+				{
+				  "key": "Content-Type",
+				  "value": "application/json"
+				}
+			  ],
+			  "body": {
+				"mode": "raw",
+				"raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"amount\": 4000,\n  \"category\": \"Groceries\",\n  \"description\": \"Monthly groceries - November 2024\",\n  \"date\": \"2024-11-15T00:00:00\",\n  \"type\": \"expense\"\n}"
+			  },
+			  "url": {
+				"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
+				"protocol": "https",
+				"host": [
+				  "logicgoinfotechspaces-smart-budget-recommendation",
+				  "hf",
+				  "space"
+				],
+				"path": [
+				  "expenses"
+				]
+			  }
 			}
+		  },
+		  {
+			"name": "Transport - Month 1 (Sept 2024)",
 			"request": {
+			  "method": "POST",
+			  "header": [
+				{
+				  "key": "Content-Type",
+				  "value": "application/json"
+				}
+			  ],
+			  "body": {
+				"mode": "raw",
+				"raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"amount\": 2000,\n  \"category\": \"Transport\",\n  \"description\": \"Monthly transport - September 2024\",\n  \"date\": \"2024-09-20T00:00:00\",\n  \"type\": \"expense\"\n}"
+			  },
+			  "url": {
+				"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
+				"protocol": "https",
+				"host": [
+				  "logicgoinfotechspaces-smart-budget-recommendation",
+				  "hf",
+				  "space"
+				],
+				"path": [
+				  "expenses"
+				]
+			  }
 			}
+		  },
+		  {
+			"name": "Transport - Month 2 (Oct 2024)",
 			"request": {
+			  "method": "POST",
+			  "header": [
 				{
+				  "key": "Content-Type",
+				  "value": "application/json"
 				}
+			  ],
+			  "body": {
+				"mode": "raw",
+				"raw": "{\n  \"user_id\": \"{{user_id}}\",\n  \"amount\": 2200,\n  \"category\": \"Transport\",\n  \"description\": \"Monthly transport - October 2024\",\n  \"date\": \"2024-10-20T00:00:00\",\n  \"type\": \"expense\"\n}"
+			  },
+			  "url": {
+				"raw": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space/expenses",
+				"protocol": "https",
+				"host": [
+				  "logicgoinfotechspaces-smart-budget-recommendation",
+				  "hf",
+				  "space"
+				],
+				"path": [
+				  "expenses"
+				]
+			  }
+			}
+		  }
+		]
+	  }
 	],
 	"variable": [
+	  {
+		"key": "base_url",
+		"value": "https://logicgoinfotechspaces-smart-budget-recommendation.hf.space",
+		"type": "string"
+	  },
+	  {
+		"key": "user_id",
+		"value": "68a834c3f4694b11efedacd2",
+		"type": "string"
+	  },
+	  {
+		"key": "hf_token",
+		"value": "",
+		"type": "string"
+	  }
 	]
+  }

app/main.py CHANGED Viewed

@@ -190,10 +190,12 @@ async def get_budget_recommendations(user_id: str, month: Optional[int] = None,
     Example response:
     {
-        "category": "Groceries",
         "average_expense": 3800,
         "recommended_budget": 4000,
-        "reason": "Your average monthly grocery expense is Rs.3,800. We suggest setting your budget to Rs.4,000 for next month."
     }
     """
     if not month or not year:

     Example response:
     {
+        "budget_name": "Groceries",
         "average_expense": 3800,
         "recommended_budget": 4000,
+        "reason": "Your average monthly grocery expense is Rs.3,800. We suggest setting your budget to Rs.4,000 for next month.",
+        "confidence": 0.85,
+        "action": "increase"
     }
     """
     if not month or not year:

app/models.py CHANGED Viewed

@@ -24,7 +24,7 @@ class Budget(BaseModel):
     end_date: Optional[datetime] = None
 class BudgetRecommendation(BaseModel):
-    category: str
     average_expense: float
     recommended_budget: float
     reason: str

     end_date: Optional[datetime] = None
 class BudgetRecommendation(BaseModel):
+    budget_name: str = Field(..., description="Budget name (e.g., Groceries, Transport)")
     average_expense: float
     recommended_budget: float
     reason: str

app/smart_recommendation.py CHANGED Viewed

@@ -40,28 +40,11 @@ class SmartBudgetRecommender:
         # 1) Try to build stats from existing budgets for this user (createdBy)
         category_data = self._get_category_stats_from_budgets(user_id, month, year)
-        # 2) If there are no budgets, fall back to expenses history
         if not category_data:
-            end_date = datetime(year, month, 1) - timedelta(days=1)
-            start_date = end_date - timedelta(days=180)  # ~6 months
-            expenses = list(
-                self.db.expenses.find(
-                    {
-                        "user_id": user_id,
-                        "date": {"$gte": start_date, "$lte": end_date},
-                        "type": "expense",
-                    }
-                )
-            )
-            if not expenses:
-                return []
-            # Group expenses by category and calculate monthly averages
-            category_data = self._calculate_category_statistics(
-                expenses, start_date, end_date
-            )
         recommendations: List[BudgetRecommendation] = []
@@ -87,7 +70,7 @@ class SmartBudgetRecommender:
                     print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
             recommendations.append(BudgetRecommendation(
-                category=category,
                 average_expense=round(avg_expense, 2),
                 recommended_budget=round(recommended_budget or 0, 2),
                 reason=reason,
@@ -263,14 +246,40 @@ class SmartBudgetRecommender:
         return result
     def _get_category_name(self, category_id) -> str:
-        """Look up category name from categories collection"""
         if not category_id:
             return "Uncategorized"
         try:
-            # Try to find category in categories collection
-            if isinstance(category_id, ObjectId):
-                category_doc = self.db.categories.find_one({"_id": category_id})
             else:
                 try:
                     category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
@@ -278,11 +287,14 @@ class SmartBudgetRecommender:
                     category_doc = self.db.categories.find_one({"_id": category_id})
             if category_doc:
-                return category_doc.get("name") or category_doc.get("title") or str(category_id)
         except Exception as e:
             print(f"Error looking up category name for {category_id}: {e}")
             pass
         return str(category_id) if category_id else "Uncategorized"
     def _get_category_stats_from_budgets(
@@ -396,91 +408,26 @@ class SmartBudgetRecommender:
         result: Dict[str, Dict] = {}
         for b in budgets:
-            # First, try to extract categories from headCategories array
-            head_categories = b.get("headCategories", [])
-            if head_categories and isinstance(head_categories, list):
-                # Process nested categories from headCategories
-                for head_cat in head_categories:
-                    if not isinstance(head_cat, dict):
-                        continue
-                    # Get headCategory ID and amounts
-                    head_cat_id = head_cat.get("headCategory")
-                    try:
-                        head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
-                        head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
-                    except (ValueError, TypeError):
-                        head_cat_max = 0
-                        head_cat_spend = 0
-                    # Process nested categories within headCategory
-                    nested_categories = head_cat.get("categories", [])
-                    if nested_categories and isinstance(nested_categories, list):
-                        for nested_cat in nested_categories:
-                            if not isinstance(nested_cat, dict):
-                                continue
-                            nested_cat_id = nested_cat.get("category")
-                            try:
-                                nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
-                                nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
-                            except (ValueError, TypeError):
-                                nested_cat_max = 0
-                                nested_cat_spend = 0
-                            spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
-                            # Only include categories with limits (must have maxAmount > 0)
-                            if nested_cat_max > 0:
-                                # Look up actual category name
-                                nested_category_name = self._get_category_name(nested_cat_id)
-                                nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
-                                if nested_category_name not in result:
-                                    result[nested_category_name] = {
-                                        "average_monthly": nested_base_amount,
-                                        "total": nested_base_amount,
-                                        "count": 1,
-                                        "months_analyzed": 1,
-                                        "std_dev": 0.0,
-                                        "monthly_values": [nested_base_amount],
-                                    }
-                                else:
-                                    result[nested_category_name]["total"] += nested_base_amount
-                                    result[nested_category_name]["count"] += 1
-                                    result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
-                                    result[nested_category_name]["average_monthly"] = (
-                                        result[nested_category_name]["total"] / result[nested_category_name]["count"]
-                                    )
-                                    result[nested_category_name]["monthly_values"].append(nested_base_amount)
-                    # Also include headCategory if it has amounts
-                    if head_cat_max > 0 or head_cat_spend > 0:
-                        head_category_name = self._get_category_name(head_cat_id)
-                        head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
-                        if head_category_name not in result:
-                            result[head_category_name] = {
-                                "average_monthly": head_base_amount,
-                                "total": head_base_amount,
-                                "count": 1,
-                                "months_analyzed": 1,
-                                "std_dev": 0.0,
-                                "monthly_values": [head_base_amount],
-                            }
-                        else:
-                            result[head_category_name]["total"] += head_base_amount
-                            result[head_category_name]["count"] += 1
-                            result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
-                            result[head_category_name]["average_monthly"] = (
-                                result[head_category_name]["total"] / result[head_category_name]["count"]
-                            )
-                            result[head_category_name]["monthly_values"].append(head_base_amount)
-            # Also include the main budget as a category (if it has amounts)
-            budget_name = b.get("name", "Uncategorized")
-            if not budget_name or budget_name == "Uncategorized":
-                budget_name = b.get("category") or b.get("title") or "Uncategorized"
             # Derive a base amount from WalletSync fields
             try:
@@ -502,10 +449,10 @@ class SmartBudgetRecommender:
             else:
                 base_amount = 0
-            # Only add main budget if it has an amount and we haven't processed categories
             if base_amount > 0:
-                if budget_name not in result:
-                    result[budget_name] = {
                         "average_monthly": base_amount,
                         "total": base_amount,
                         "count": 1,
@@ -514,15 +461,15 @@ class SmartBudgetRecommender:
                         "monthly_values": [base_amount],
                     }
                 else:
-                    result[budget_name]["total"] += base_amount
-                    result[budget_name]["count"] += 1
-                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
-                    result[budget_name]["average_monthly"] = (
-                        result[budget_name]["total"] / result[budget_name]["count"]
                     )
-                    result[budget_name]["monthly_values"].append(base_amount)
-        print(f"Processed {len(result)} budget categories for recommendations")
         return result
     def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):

         # 1) Try to build stats from existing budgets for this user (createdBy)
         category_data = self._get_category_stats_from_budgets(user_id, month, year)
+        # 2) Only return recommendations for actual budgets - do NOT use expenses history
+        # This ensures we only show recommendations for budgets the user actually created
         if not category_data:
+            print(f"No budgets found for user_id: {user_id}, returning empty recommendations")
+            return []
         recommendations: List[BudgetRecommendation] = []
                     print(f"⚠️ OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
             recommendations.append(BudgetRecommendation(
+                budget_name=category,
                 average_expense=round(avg_expense, 2),
                 recommended_budget=round(recommended_budget or 0, 2),
                 reason=reason,
         return result
     def _get_category_name(self, category_id) -> str:
+        """
+        Look up category name from headCategories and categories collections.
+        Checks headCategories first, then categories collection.
+        """
         if not category_id:
             return "Uncategorized"
         try:
+            # Convert to ObjectId if it's a string
+            if isinstance(category_id, str):
+                try:
+                    category_id_obj = ObjectId(category_id)
+                except (ValueError, TypeError):
+                    category_id_obj = category_id
+            else:
+                category_id_obj = category_id
+            # First, try to find in headCategories collection
+            if isinstance(category_id_obj, ObjectId):
+                head_category_doc = self.db.headcategories.find_one({"_id": category_id_obj})
+            else:
+                try:
+                    head_category_doc = self.db.headcategories.find_one({"_id": ObjectId(category_id)})
+                except (ValueError, TypeError):
+                    head_category_doc = self.db.headcategories.find_one({"_id": category_id})
+            if head_category_doc:
+                category_name = head_category_doc.get("name") or head_category_doc.get("title")
+                if category_name:
+                    return category_name
+            # If not found in headCategories, try categories collection
+            if isinstance(category_id_obj, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id_obj})
             else:
                 try:
                     category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
                     category_doc = self.db.categories.find_one({"_id": category_id})
             if category_doc:
+                category_name = category_doc.get("name") or category_doc.get("title")
+                if category_name:
+                    return category_name
         except Exception as e:
             print(f"Error looking up category name for {category_id}: {e}")
             pass
+        # If not found in either collection, return the ID as string
         return str(category_id) if category_id else "Uncategorized"
     def _get_category_stats_from_budgets(
         result: Dict[str, Dict] = {}
         for b in budgets:
+            # Extract category ID from budget (could be in category, categoryId, headCategory fields)
+            category_id = b.get("category") or b.get("categoryId") or b.get("headCategory") or b.get("category_id")
+            # Get category name from headCategories or categories collection using category ID
+            if category_id:
+                category_name = self._get_category_name(category_id)
+                print(f"✅ Found category ID: {category_id} -> Name: '{category_name}'")
+            else:
+                # Fallback to budget name if no category ID found
+                category_name = b.get("name", "Uncategorized")
+                if not category_name or category_name == "Uncategorized":
+                    category_name = b.get("title") or "Uncategorized"
+                print(f"⚠️ No category ID found, using budget name: '{category_name}'")
+            # Skip if category name is still Uncategorized or empty
+            if not category_name or category_name == "Uncategorized" or category_name.strip() == "":
+                print(f"⚠️ Skipping budget with invalid category name: {b.get('_id')}")
+                continue
+            print(f"✅ Processing budget: '{category_name}' (budget id: {b.get('_id')}, category id: {category_id})")
             # Derive a base amount from WalletSync fields
             try:
             else:
                 base_amount = 0
+            # Only add budget if it has an amount - use category name as key
             if base_amount > 0:
+                if category_name not in result:
+                    result[category_name] = {
                         "average_monthly": base_amount,
                         "total": base_amount,
                         "count": 1,
                         "monthly_values": [base_amount],
                     }
                 else:
+                    result[category_name]["total"] += base_amount
+                    result[category_name]["count"] += 1
+                    result[category_name]["months_analyzed"] = result[category_name]["count"]
+                    result[category_name]["average_monthly"] = (
+                        result[category_name]["total"] / result[category_name]["count"]
                     )
+                    result[category_name]["monthly_values"].append(base_amount)
+        print(f"✅ Processed {len(result)} budget categories for recommendations: {list(result.keys())}")
         return result
     def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):