Spaces:

LogicGoInfotechSpaces
/

SMART_BUDGET_RECOMMENDATION

Paused

App Files Files Community

LogicGoInfotechSpaces commited on 15 days ago

Commit

a600a52

1 Parent(s): fa10656

Fix bugs and improve budget recommendation system: Add API logging, fix OpenAI integration, enhance budget query patterns, add category extraction from headCategories, improve error handling

Browse files

Files changed (2) hide show

app/main.py +92 -2
app/smart_recommendation.py +273 -45

app/main.py CHANGED Viewed

@@ -1,10 +1,11 @@
-from fastapi import FastAPI, HTTPException, Depends
 from fastapi.middleware.cors import CORSMiddleware
 from pymongo import MongoClient
 from pymongo.errors import ConnectionFailure
 import os
 from typing import List, Optional
-from datetime import datetime, timedelta
 from app.models import BudgetRecommendation, Expense, Budget, CategoryExpense
 from app.smart_recommendation import SmartBudgetRecommender
@@ -37,6 +38,95 @@ except ConnectionFailure as e:
 # Initialize Smart Budget Recommender
 recommender = SmartBudgetRecommender(db)
 @app.get("/")
 async def root():
     return {"message": "Smart Budget Recommendation API", "status": "running"}

+from fastapi import FastAPI, HTTPException, Depends, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pymongo import MongoClient
 from pymongo.errors import ConnectionFailure
 import os
+import time
 from typing import List, Optional
+from datetime import datetime, timedelta, timezone
 from app.models import BudgetRecommendation, Expense, Budget, CategoryExpense
 from app.smart_recommendation import SmartBudgetRecommender
 # Initialize Smart Budget Recommender
 recommender = SmartBudgetRecommender(db)
+# IST timezone (UTC+5:30)
+IST = timezone(timedelta(hours=5, minutes=30))
+def log_api_call(db, name: str, status: str, response_time: float, endpoint: str = None, error: str = None):
+    """
+    Log API call to MongoDB api_logs collection
+    Args:
+        db: MongoDB database instance
+        name: API name (e.g., "smart budget recommendation")
+        status: "success" or "fail"
+        response_time: Response time in seconds
+        endpoint: Optional endpoint path
+        error: Optional error message
+    """
+    try:
+        # Get current time in IST
+        ist_time = datetime.now(IST)
+        # Format: DD-MM-YYYY HH:MM:SS:IST
+        timestamp_str = ist_time.strftime("%d-%m-%Y %H:%M:%S:IST")
+        # Extract user_id from endpoint (e.g., "/recommendations/user123" -> "user123")
+        user_id = None
+        if endpoint:
+            # Extract user_id from path patterns like "/recommendations/{user_id}" or "/category-expenses/{user_id}"
+            parts = endpoint.strip("/").split("/")
+            if len(parts) >= 2:
+                user_id = parts[1]  # Get the user_id part
+        log_entry = {
+            "name": name,
+            "status": status,
+            "date": timestamp_str,  # Combined date and time in IST
+            "response_time": round(response_time, 3),  # Round to 3 decimal places
+            "user_id": user_id,
+        }
+        if error:
+            log_entry["error"] = error
+        # Insert into api_logs collection
+        db.api_logs.insert_one(log_entry)
+    except Exception as e:
+        # Don't fail the API call if logging fails
+        print(f"Failed to log API call: {e}")
+@app.middleware("http")
+async def log_requests(request: Request, call_next):
+    """Middleware to log API requests and track response time"""
+    start_time = time.time()
+    # Only log specific endpoints
+    endpoint = request.url.path
+    should_log = endpoint in ["/recommendations", "/category-expenses"] or endpoint.startswith("/recommendations/") or endpoint.startswith("/category-expenses/")
+    if should_log:
+        try:
+            response = await call_next(request)
+            process_time = time.time() - start_time
+            # Determine status
+            status = "success" if response.status_code < 400 else "fail"
+            # Log the API call
+            log_api_call(
+                db=db,
+                name="smart budget recommendation",
+                status=status,
+                response_time=process_time,
+                endpoint=endpoint
+            )
+            return response
+        except Exception as e:
+            process_time = time.time() - start_time
+            # Log failure
+            log_api_call(
+                db=db,
+                name="smart budget recommendation",
+                status="fail",
+                response_time=process_time,
+                endpoint=endpoint,
+                error=str(e)
+            )
+            raise
+    else:
+        # For other endpoints, just pass through
+        return await call_next(request)
 @app.get("/")
 async def root():
     return {"message": "Smart Budget Recommendation API", "status": "running"}

app/smart_recommendation.py CHANGED Viewed

@@ -69,17 +69,22 @@ class SmartBudgetRecommender:
             avg_expense = data["average_monthly"]
             confidence = self._calculate_confidence(data)
-            # 1) Try OpenAI first (primary source of recommendation)
             ai_result = self._get_ai_recommendation(category, data, avg_expense)
-            if ai_result:
                 recommended_budget = ai_result.get("recommended_budget")
-                reason = ai_result.get("reason")
                 action = ai_result.get("action")
             else:
-                # 2) Fallback to rule-based recommendation
                 recommended_budget = self._calculate_recommended_budget(avg_expense, data)
                 reason = self._generate_reason(category, avg_expense, recommended_budget)
                 action = None
             recommendations.append(BudgetRecommendation(
                 category=category,
@@ -109,8 +114,18 @@ class SmartBudgetRecommender:
             amount = expense.get("amount", 0)
             date = expense.get("date")
             if isinstance(date, str):
-                date = datetime.fromisoformat(date.replace('Z', '+00:00'))
             category_data[category]["total"] += amount
             category_data[category]["count"] += 1
@@ -247,6 +262,29 @@ class SmartBudgetRecommender:
         result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
         return result
     def _get_category_stats_from_budgets(
         self, user_id: str, month: int, year: int
     ) -> Dict:
@@ -255,53 +293,236 @@ class SmartBudgetRecommender:
         We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
         as a spending category and derive an \"average\" from its amounts.
         """
-        # createdBy is stored as ObjectId in WalletSync, while user_id is a string.
-        # Try to cast to ObjectId; if it fails, fall back to matching the raw string.
-        query: Dict = {"status": "OPEN"}
         try:
-            query["createdBy"] = ObjectId(user_id)
-        except Exception:
-            query["createdBy"] = user_id
-        budgets = list(self.db.budgets.find(query))
         if not budgets:
             return {}
         result: Dict[str, Dict] = {}
         for b in budgets:
-            # Use budget \"name\" as category label
-            category = b.get("name", "Uncategorized")
             # Derive a base amount from WalletSync fields
-            max_amount = float(b.get("maxAmount", 0) or 0)
-            spend_amount = float(b.get("spendAmount", 0) or 0)
-            # If there is recorded spend, use that as \"average\"; otherwise maxAmount
-            base_amount = spend_amount if spend_amount > 0 else max_amount
-            if base_amount <= 0:
-                continue
-            if category not in result:
-                result[category] = {
-                    "average_monthly": base_amount,
-                    "total": base_amount,
-                    "count": 1,
-                    "months_analyzed": 1,
-                    "std_dev": 0.0,
-                    "monthly_values": [base_amount],
-                }
             else:
-                # If multiple budgets per category, average them
-                result[category]["total"] += base_amount
-                result[category]["count"] += 1
-                result[category]["months_analyzed"] = result[category]["count"]
-                result[category]["average_monthly"] = (
-                    result[category]["total"] / result[category]["count"]
-                )
-                result[category]["monthly_values"].append(base_amount)
         return result
     def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
@@ -309,7 +530,12 @@ class SmartBudgetRecommender:
         if not OPENAI_API_KEY:
             return None
-        history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
         summary = (
             f"Category: {category}\n"
             f"Monthly totals: [{history}]\n"
@@ -330,22 +556,24 @@ class SmartBudgetRecommender:
         try:
             response = requests.post(
-                "https://api.openai.com/v1/responses",
                 headers={
                     "Authorization": f"Bearer {OPENAI_API_KEY}",
                     "Content-Type": "application/json",
                 },
                 json={
-                    "model": "gpt-4.1-mini",
-                    "input": prompt,
                     "temperature": 0.1,
                     "response_format": {"type": "json_object"},
                 },
                 timeout=30,
             )
             response.raise_for_status()
-            data = response.json()
-            content = data["output"][0]["content"][0]["text"]
             return json.loads(content)
         except Exception as exc:
             print(f"OpenAI recommendation error for {category}: {exc}")

             avg_expense = data["average_monthly"]
             confidence = self._calculate_confidence(data)
+            # Always try OpenAI first (primary source of recommendation)
             ai_result = self._get_ai_recommendation(category, data, avg_expense)
+            if ai_result and ai_result.get("recommended_budget"):
                 recommended_budget = ai_result.get("recommended_budget")
+                reason = ai_result.get("reason", f"AI recommendation for {category}")
                 action = ai_result.get("action")
+                print(f"OpenAI recommendation for {category}: {recommended_budget}")
             else:
+                # Fallback to rule-based recommendation if OpenAI fails
                 recommended_budget = self._calculate_recommended_budget(avg_expense, data)
                 reason = self._generate_reason(category, avg_expense, recommended_budget)
                 action = None
+                if not ai_result:
+                    print(f"OpenAI unavailable, using rule-based for {category}: {recommended_budget}")
+                else:
+                    print(f"OpenAI returned invalid data, using rule-based for {category}: {recommended_budget}")
             recommendations.append(BudgetRecommendation(
                 category=category,
             amount = expense.get("amount", 0)
             date = expense.get("date")
+            # Handle date conversion - skip if date is None or invalid
+            if date is None:
+                continue
             if isinstance(date, str):
+                try:
+                    date = datetime.fromisoformat(date.replace('Z', '+00:00'))
+                except (ValueError, AttributeError):
+                    continue
+            elif not isinstance(date, datetime):
+                # If date is not a string or datetime, skip this expense
+                continue
             category_data[category]["total"] += amount
             category_data[category]["count"] += 1
         result.sort(key=lambda x: x.average_monthly_expense, reverse=True)
         return result
+    def _get_category_name(self, category_id) -> str:
+        """Look up category name from categories collection"""
+        if not category_id:
+            return "Uncategorized"
+        try:
+            # Try to find category in categories collection
+            if isinstance(category_id, ObjectId):
+                category_doc = self.db.categories.find_one({"_id": category_id})
+            else:
+                try:
+                    category_doc = self.db.categories.find_one({"_id": ObjectId(category_id)})
+                except:
+                    category_doc = self.db.categories.find_one({"_id": category_id})
+            if category_doc:
+                return category_doc.get("name") or category_doc.get("title") or str(category_id)
+        except Exception as e:
+            print(f"Error looking up category name for {category_id}: {e}")
+            pass
+        return str(category_id) if category_id else "Uncategorized"
     def _get_category_stats_from_budgets(
         self, user_id: str, month: int, year: int
     ) -> Dict:
         We treat each budget document (e.g. \"Office Maintenance\", \"LOGICGO\")
         as a spending category and derive an \"average\" from its amounts.
+        Also extracts categories from headCategories array.
         """
+        budgets = []
+        print(f"Searching for budgets with user_id: {user_id} (type: {type(user_id).__name__})")
+        # Try multiple query patterns to find budgets (include both OPEN and CLOSE status)
+        # Pattern 1: Try with ObjectId (most common in WalletSync) - no status filter
         try:
+            query_objid = {"createdBy": ObjectId(user_id)}
+            budgets_objid = list(self.db.budgets.find(query_objid))
+            print(f"Pattern 1 (createdBy ObjectId): Found {len(budgets_objid)} budgets")
+            if budgets_objid:
+                budgets.extend(budgets_objid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 1 failed: {e}")
+            pass
+        # Pattern 2: Try with string user_id - no status filter
+        try:
+            query_str = {"createdBy": user_id}
+            budgets_str = list(self.db.budgets.find(query_str))
+            print(f"Pattern 2 (createdBy string): Found {len(budgets_str)} budgets")
+            if budgets_str:
+                budgets.extend(budgets_str)
+        except Exception as e:
+            print(f"Pattern 2 failed: {e}")
+            pass
+        # Pattern 3: Try with user_id field (alternative field name) - no status filter
+        try:
+            query_userid = {"user_id": user_id}
+            budgets_userid = list(self.db.budgets.find(query_userid))
+            print(f"Pattern 3 (user_id string): Found {len(budgets_userid)} budgets")
+            if budgets_userid:
+                budgets.extend(budgets_userid)
+        except Exception as e:
+            print(f"Pattern 3 failed: {e}")
+            pass
+        # Pattern 4: Try ObjectId with user_id field - no status filter
+        try:
+            query_objid_userid = {"user_id": ObjectId(user_id)}
+            budgets_objid_userid = list(self.db.budgets.find(query_objid_userid))
+            print(f"Pattern 4 (user_id ObjectId): Found {len(budgets_objid_userid)} budgets")
+            if budgets_objid_userid:
+                budgets.extend(budgets_objid_userid)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 4 failed: {e}")
+            pass
+        # Pattern 5: Check if user_id is actually a budget _id, then get createdBy from it
+        try:
+            budget_by_id = self.db.budgets.find_one({"_id": ObjectId(user_id)})
+            if budget_by_id:
+                print(f"Pattern 5: user_id is a budget _id, found budget: {budget_by_id.get('name', 'Unknown')}")
+                created_by = budget_by_id.get("createdBy")
+                if created_by:
+                    # Now find all budgets for this createdBy
+                    query_by_creator = {"createdBy": created_by}
+                    budgets_by_creator = list(self.db.budgets.find(query_by_creator))
+                    print(f"Pattern 5: Found {len(budgets_by_creator)} budgets for createdBy: {created_by}")
+                    if budgets_by_creator:
+                        budgets.extend(budgets_by_creator)
+        except (ValueError, TypeError) as e:
+            print(f"Pattern 5 failed: {e}")
+            pass
+        # Pattern 6: Try finding by budget _id as string
+        try:
+            budget_by_id_str = self.db.budgets.find_one({"_id": user_id})
+            if budget_by_id_str:
+                print(f"Pattern 6: Found budget by _id as string")
+                budgets.append(budget_by_id_str)
+        except Exception as e:
+            print(f"Pattern 6 failed: {e}")
+            pass
+        # Remove duplicates based on _id
+        seen_ids = set()
+        unique_budgets = []
+        for b in budgets:
+            budget_id = str(b.get("_id", ""))
+            if budget_id not in seen_ids:
+                seen_ids.add(budget_id)
+                unique_budgets.append(b)
+        budgets = unique_budgets
         if not budgets:
+            print(f"No budgets found for user_id: {user_id}")
+            print(f"Tried all query patterns. Checking sample budget structure...")
+            # Get a sample budget to see the structure
+            sample = self.db.budgets.find_one()
+            if sample:
+                print(f"Sample budget structure - createdBy type: {type(sample.get('createdBy')).__name__}, value: {sample.get('createdBy')}")
+                print(f"Sample budget has user_id field: {'user_id' in sample}")
             return {}
+        print(f"Found {len(budgets)} budgets for user_id: {user_id}")
         result: Dict[str, Dict] = {}
         for b in budgets:
+            # First, try to extract categories from headCategories array
+            head_categories = b.get("headCategories", [])
+            if head_categories and isinstance(head_categories, list):
+                # Process nested categories from headCategories
+                for head_cat in head_categories:
+                    if not isinstance(head_cat, dict):
+                        continue
+                    # Get headCategory ID and amounts
+                    head_cat_id = head_cat.get("headCategory")
+                    try:
+                        head_cat_max = float(head_cat.get("maxAmount", 0) or 0)
+                        head_cat_spend = float(head_cat.get("spendAmount", 0) or 0)
+                    except (ValueError, TypeError):
+                        head_cat_max = 0
+                        head_cat_spend = 0
+                    # Process nested categories within headCategory
+                    nested_categories = head_cat.get("categories", [])
+                    if nested_categories and isinstance(nested_categories, list):
+                        for nested_cat in nested_categories:
+                            if not isinstance(nested_cat, dict):
+                                continue
+                            nested_cat_id = nested_cat.get("category")
+                            try:
+                                nested_cat_max = float(nested_cat.get("maxAmount", 0) or 0)
+                                nested_cat_spend = float(nested_cat.get("spendAmount", 0) or 0)
+                            except (ValueError, TypeError):
+                                nested_cat_max = 0
+                                nested_cat_spend = 0
+                            spend_limit_type = nested_cat.get("spendLimitType", "NO_LIMIT")
+                            # Only include categories with limits (must have maxAmount > 0)
+                            if nested_cat_max > 0:
+                                # Look up actual category name
+                                nested_category_name = self._get_category_name(nested_cat_id)
+                                nested_base_amount = nested_cat_spend if nested_cat_spend > 0 else nested_cat_max
+                                if nested_category_name not in result:
+                                    result[nested_category_name] = {
+                                        "average_monthly": nested_base_amount,
+                                        "total": nested_base_amount,
+                                        "count": 1,
+                                        "months_analyzed": 1,
+                                        "std_dev": 0.0,
+                                        "monthly_values": [nested_base_amount],
+                                    }
+                                else:
+                                    result[nested_category_name]["total"] += nested_base_amount
+                                    result[nested_category_name]["count"] += 1
+                                    result[nested_category_name]["months_analyzed"] = result[nested_category_name]["count"]
+                                    result[nested_category_name]["average_monthly"] = (
+                                        result[nested_category_name]["total"] / result[nested_category_name]["count"]
+                                    )
+                                    result[nested_category_name]["monthly_values"].append(nested_base_amount)
+                    # Also include headCategory if it has amounts
+                    if head_cat_max > 0 or head_cat_spend > 0:
+                        head_category_name = self._get_category_name(head_cat_id)
+                        head_base_amount = head_cat_spend if head_cat_spend > 0 else head_cat_max
+                        if head_category_name not in result:
+                            result[head_category_name] = {
+                                "average_monthly": head_base_amount,
+                                "total": head_base_amount,
+                                "count": 1,
+                                "months_analyzed": 1,
+                                "std_dev": 0.0,
+                                "monthly_values": [head_base_amount],
+                            }
+                        else:
+                            result[head_category_name]["total"] += head_base_amount
+                            result[head_category_name]["count"] += 1
+                            result[head_category_name]["months_analyzed"] = result[head_category_name]["count"]
+                            result[head_category_name]["average_monthly"] = (
+                                result[head_category_name]["total"] / result[head_category_name]["count"]
+                            )
+                            result[head_category_name]["monthly_values"].append(head_base_amount)
+            # Also include the main budget as a category (if it has amounts)
+            budget_name = b.get("name", "Uncategorized")
+            if not budget_name or budget_name == "Uncategorized":
+                budget_name = b.get("category") or b.get("title") or "Uncategorized"
             # Derive a base amount from WalletSync fields
+            try:
+                max_amount = float(b.get("maxAmount", 0) or b.get("max_amount", 0) or b.get("amount", 0) or 0)
+                spend_amount = float(b.get("spendAmount", 0) or b.get("spend_amount", 0) or b.get("spent", 0) or 0)
+                budget_amount = float(b.get("budget", 0) or b.get("budgetAmount", 0) or 0)
+            except (ValueError, TypeError):
+                max_amount = 0
+                spend_amount = 0
+                budget_amount = 0
+            # Priority: spendAmount > maxAmount > budgetAmount > budget
+            if spend_amount > 0:
+                base_amount = spend_amount
+            elif max_amount > 0:
+                base_amount = max_amount
+            elif budget_amount > 0:
+                base_amount = budget_amount
             else:
+                base_amount = 0
+            # Only add main budget if it has an amount and we haven't processed categories
+            if base_amount > 0:
+                if budget_name not in result:
+                    result[budget_name] = {
+                        "average_monthly": base_amount,
+                        "total": base_amount,
+                        "count": 1,
+                        "months_analyzed": 1,
+                        "std_dev": 0.0,
+                        "monthly_values": [base_amount],
+                    }
+                else:
+                    result[budget_name]["total"] += base_amount
+                    result[budget_name]["count"] += 1
+                    result[budget_name]["months_analyzed"] = result[budget_name]["count"]
+                    result[budget_name]["average_monthly"] = (
+                        result[budget_name]["total"] / result[budget_name]["count"]
+                    )
+                    result[budget_name]["monthly_values"].append(base_amount)
+        print(f"Processed {len(result)} budget categories for recommendations")
         return result
     def _get_ai_recommendation(self, category: str, data: Dict, avg_expense: float):
         if not OPENAI_API_KEY:
             return None
+        # Handle empty monthly_values
+        if not data.get("monthly_values") or len(data["monthly_values"]) == 0:
+            history = f"{avg_expense:.0f}"
+        else:
+            history = ", ".join(f"{value:.0f}" for value in data["monthly_values"])
         summary = (
             f"Category: {category}\n"
             f"Monthly totals: [{history}]\n"
         try:
             response = requests.post(
+                "https://api.openai.com/v1/chat/completions",
                 headers={
                     "Authorization": f"Bearer {OPENAI_API_KEY}",
                     "Content-Type": "application/json",
                 },
                 json={
+                    "model": "gpt-4o-mini",
+                    "messages": [
+                        {"role": "user", "content": prompt}
+                    ],
                     "temperature": 0.1,
                     "response_format": {"type": "json_object"},
                 },
                 timeout=30,
             )
             response.raise_for_status()
+            response_data = response.json()
+            content = response_data["choices"][0]["message"]["content"]
             return json.loads(content)
         except Exception as exc:
             print(f"OpenAI recommendation error for {category}: {exc}")