Spaces:

rairo
/

smart-w

Running

App Files Files Community

rairo commited on Aug 27, 2025

Commit

6c80eb1

verified ·

1 Parent(s): bac8902

Update utility.py

Browse files

Files changed (1) hide show

utility.py +55 -57

utility.py CHANGED Viewed

@@ -367,34 +367,34 @@ class ReportEngine:
         }
         return json.dumps(self.results, indent=2)
-def generate_forecast_data(self) -> str:
-    sales_df = self.dfs.get('sales')
-    if sales_df is None or sales_df.empty:
-        return json.dumps({"error": "Not enough sales data to generate a forecast."})
-    sales_df_copy = sales_df.copy()
-    sales_df_copy.set_index('timestamp', inplace=True)
-    weekly_sales = sales_df_copy['sale_total'].resample('W').sum()
-    if len(weekly_sales) < 2:
-        return json.dumps({"error": "I need at least two weeks of sales data to make a forecast."})
-    last_week_sales = weekly_sales.iloc[-1]
-    previous_week_sales = weekly_sales.iloc[-2] if len(weekly_sales) > 1 else 0
-    growth_rate = 0
-    if previous_week_sales > 0:
-        growth_rate = ((last_week_sales - previous_week_sales) / previous_week_sales) * 100
-    historical_avg = weekly_sales.head(-1).mean()
-    self.results = {
-        "last_period_sales": f"${last_week_sales:.2f}",
-        "previous_period_sales": f"${previous_week_sales:.2f}",
-        "period_over_period_growth": f"{growth_rate:.2f}%",
-        "historical_average": f"${historical_avg:.2f}"
-    }
-    return json.dumps(self.results, indent=2)
 def generateResponse(prompt: str) -> str:
     """Generate structured JSON response from user input using Generative AI."""
@@ -743,6 +743,18 @@ def create_liability(user_phone: str, transaction_data: List[Dict]) -> tuple[boo
         logger.error(f"Liability batch commit failed for user {user_phone}: {e}", exc_info=True)
         return False, f"Failed to record liabilities. An error occurred: {e}"
 def _fetch_all_collections_as_dfs(user_phone: str) -> List[Tuple[str, pd.DataFrame]]:
     """Fetches all user data, splits/validates DataFrames, and engineers features."""
     all_dfs_with_names = []
@@ -785,45 +797,24 @@ def _get_relative_date_context() -> str:
     """
     today = datetime.now(timezone.utc)
-    # Helper to format dates
     def fmt(d):
         return d.strftime('%Y-%m-%d')
-    # Basic dates
     yesterday = today - timedelta(days=1)
-    # Week calculations
     start_of_this_week = today - timedelta(days=today.weekday())
     end_of_this_week = start_of_this_week + timedelta(days=6)
     start_of_last_week = start_of_this_week - timedelta(days=7)
     end_of_last_week = start_of_last_week + timedelta(days=6)
-    # Last weekday calculations
-    last_monday = start_of_this_week if today.weekday() != 0 else start_of_last_week
-    if last_monday >= today:
-        last_monday -= timedelta(days=7)
-    last_tuesday = last_monday + timedelta(days=1)
-    last_wednesday = last_monday + timedelta(days=2)
-    last_thursday = last_monday + timedelta(days=3)
-    last_friday = last_monday + timedelta(days=4)
-    last_saturday = last_monday + timedelta(days=5)
-    last_sunday = last_monday + timedelta(days=6)
     context = [
         f"Here are some pre-calculated dates to help you understand the user's request:",
         f"- Today is: {fmt(today)}",
         f"- Yesterday was: {fmt(yesterday)}",
         f"- The start of this week was: {fmt(start_of_this_week)}",
-        f"- The end of this week is: {fmt(end_of_this_week)}",
         f"- The start of last week was: {fmt(start_of_last_week)}",
-        f"- The end of last week was: {fmt(end_of_last_week)}",
         f"- Last Monday was on: {fmt(last_monday)}",
-        f"- Last Tuesday was on: {fmt(last_tuesday)}",
-        f"- Last Wednesday was on: {fmt(last_wednesday)}",
-        f"- Last Thursday was on: {fmt(last_thursday)}",
-        f"- Last Friday was on: {fmt(last_friday)}",
-        f"- Last Saturday was on: {fmt(last_saturday)}",
-        f"- Last Sunday was on: {fmt(last_sunday)}"
     ]
     return "\n".join(context)
@@ -859,8 +850,8 @@ def read_datalake(user_phone: str, query: str) -> str:
         # --- Tier 1.5: Specific KPI Report Router ---
         item_report_match = re.search(r"(?:report on|how did) ([\w\s]+)", query_lower)
-        # Check for specific KPI queries first
         if "profit" in query_lower:
             logger.info(f"Handling '{query}' with the Profit Report Path.")
             report_json = engine.generate_profit_report()
@@ -871,8 +862,6 @@ def read_datalake(user_phone: str, query: str) -> str:
             item_name = item_report_match.group(1).strip()
             logger.info(f"Handling '{query}' with the Item Report Path for item: '{item_name}'.")
             report_json = engine.generate_item_report(item_name)
-        else:
-            report_json = None
         if report_json:
             report_data = json.loads(report_json)
@@ -895,21 +884,29 @@ def read_datalake(user_phone: str, query: str) -> str:
             return response.content
         # --- Tier 2: General Intelligent Router ---
-        predictive_keywords = ["expect", "forecast", "predict", "next month", "next week"]
         historical_report_keywords = ["report", "summary", "performance", "how did i do", "overview", "month", "year", "week", "today"]
         if any(keyword in query_lower for keyword in predictive_keywords):
             logger.info(f"Handling '{query}' with the Forecasting Path.")
             forecast_json = engine.generate_forecast_data()
-            # ... (forecast synthesis logic)
-            return "Forecast data"
         elif any(keyword in query_lower for keyword in historical_report_keywords):
             logger.info(f"Handling '{query}' with the General Reporting Path (Sales/Expense).")
             report_json = engine.generate_report()
-            # ... (general report synthesis logic)
-            return "General report"
         else:
             # --- Path C: Fortified PandasAI for Q&A and Plotting ---
             logger.info(f"Handling '{query}' with the Fortified PandasAI Path.")
@@ -950,11 +947,12 @@ def read_datalake(user_phone: str, query: str) -> str:
     except (NoCodeFoundError, MaliciousQueryError) as e:
         logger.error(f"PandasAI failed for query '{query}': {e}")
-        return f"Unfortunately, I was not able to answer your question, because of the following error:\n\n{e}"
     except Exception as e:
         logger.error(f"Data query failed for user {user_phone}, query '{query}': {e}", exc_info=True)
         return "Sorry, I encountered an error while analyzing your data."
 def _find_document_by_details(user_phone: str, collection_name: str, details: Dict) -> Optional[Any]:
     col_ref = db.collection("users").document(user_phone).collection(collection_name)
     if 'transaction_id' in details and details['transaction_id']:

         }
         return json.dumps(self.results, indent=2)
+    def generate_forecast_data(self) -> str:
+        sales_df = self.dfs.get('sales')
+        if sales_df is None or sales_df.empty:
+            return json.dumps({"error": "Not enough sales data to generate a forecast."})
+        sales_df_copy = sales_df.copy()
+        sales_df_copy.set_index('timestamp', inplace=True)
+        weekly_sales = sales_df_copy['sale_total'].resample('W').sum()
+        if len(weekly_sales) < 2:
+            return json.dumps({"error": "I need at least two weeks of sales data to make a forecast."})
+        last_week_sales = weekly_sales.iloc[-1]
+        previous_week_sales = weekly_sales.iloc[-2] if len(weekly_sales) > 1 else 0
+        growth_rate = 0
+        if previous_week_sales > 0:
+            growth_rate = ((last_week_sales - previous_week_sales) / previous_week_sales) * 100
+        historical_avg = weekly_sales.head(-1).mean()
+        self.results = {
+            "last_period_sales": f"${last_week_sales:.2f}",
+            "previous_period_sales": f"${previous_week_sales:.2f}",
+            "period_over_period_growth": f"{growth_rate:.2f}%",
+            "historical_average": f"${historical_avg:.2f}"
+        }
+        return json.dumps(self.results, indent=2)
 def generateResponse(prompt: str) -> str:
     """Generate structured JSON response from user input using Generative AI."""
         logger.error(f"Liability batch commit failed for user {user_phone}: {e}", exc_info=True)
         return False, f"Failed to record liabilities. An error occurred: {e}"
+def _validate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
+    """Proactively cleans and validates a DataFrame to ensure data integrity."""
+    if df.empty: return df
+    for col in ['timestamp', 'created_at', 'last_updated', 'acquisition_date', 'due_date']:
+        if col in df.columns: df[col] = pd.to_datetime(df[col], errors='coerce', utc=True)
+    numeric_cols = ['price', 'unit_price', 'quantity', 'amount', 'value', 'cost', 'hours', 'units_available']
+    for col in numeric_cols:
+        if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
+    for col in df.select_dtypes(include=['object']).columns:
+        df[col] = df[col].fillna('Unknown')
+    return df
 def _fetch_all_collections_as_dfs(user_phone: str) -> List[Tuple[str, pd.DataFrame]]:
     """Fetches all user data, splits/validates DataFrames, and engineers features."""
     all_dfs_with_names = []
     """
     today = datetime.now(timezone.utc)
     def fmt(d):
         return d.strftime('%Y-%m-%d')
     yesterday = today - timedelta(days=1)
     start_of_this_week = today - timedelta(days=today.weekday())
     end_of_this_week = start_of_this_week + timedelta(days=6)
     start_of_last_week = start_of_this_week - timedelta(days=7)
     end_of_last_week = start_of_last_week + timedelta(days=6)
+    last_monday = start_of_this_week - timedelta(days=7)
     context = [
         f"Here are some pre-calculated dates to help you understand the user's request:",
         f"- Today is: {fmt(today)}",
         f"- Yesterday was: {fmt(yesterday)}",
         f"- The start of this week was: {fmt(start_of_this_week)}",
         f"- The start of last week was: {fmt(start_of_last_week)}",
         f"- Last Monday was on: {fmt(last_monday)}",
     ]
     return "\n".join(context)
         # --- Tier 1.5: Specific KPI Report Router ---
         item_report_match = re.search(r"(?:report on|how did) ([\w\s]+)", query_lower)
+        report_json = None
         if "profit" in query_lower:
             logger.info(f"Handling '{query}' with the Profit Report Path.")
             report_json = engine.generate_profit_report()
             item_name = item_report_match.group(1).strip()
             logger.info(f"Handling '{query}' with the Item Report Path for item: '{item_name}'.")
             report_json = engine.generate_item_report(item_name)
         if report_json:
             report_data = json.loads(report_json)
             return response.content
         # --- Tier 2: General Intelligent Router ---
+        predictive_keywords = ["expect", "forecast", "predict"]
         historical_report_keywords = ["report", "summary", "performance", "how did i do", "overview", "month", "year", "week", "today"]
         if any(keyword in query_lower for keyword in predictive_keywords):
             logger.info(f"Handling '{query}' with the Forecasting Path.")
             forecast_json = engine.generate_forecast_data()
+            forecast_data = json.loads(forecast_json)
+            if "error" in forecast_data: return forecast_data["error"]
+            synthesis_prompt = f"You are a business analyst... Based on the following data, provide a friendly sales forecast: {forecast_json}"
+            response = llm.invoke(synthesis_prompt)
+            return response.content
         elif any(keyword in query_lower for keyword in historical_report_keywords):
             logger.info(f"Handling '{query}' with the General Reporting Path (Sales/Expense).")
             report_json = engine.generate_report()
+            report_data = json.loads(report_json)
+            if "error" in report_data: return report_data["error"]
+            synthesis_prompt = f"You are a helpful business assistant... Based on the summary, create a report. For sales, suggest insights. Data: {report_json}"
+            response = llm.invoke(synthesis_prompt)
+            return response.content
         else:
             # --- Path C: Fortified PandasAI for Q&A and Plotting ---
             logger.info(f"Handling '{query}' with the Fortified PandasAI Path.")
     except (NoCodeFoundError, MaliciousQueryError) as e:
         logger.error(f"PandasAI failed for query '{query}': {e}")
+        return f"Unfortunately, I was not able to answer your question: {e}"
     except Exception as e:
         logger.error(f"Data query failed for user {user_phone}, query '{query}': {e}", exc_info=True)
         return "Sorry, I encountered an error while analyzing your data."
 def _find_document_by_details(user_phone: str, collection_name: str, details: Dict) -> Optional[Any]:
     col_ref = db.collection("users").document(user_phone).collection(collection_name)
     if 'transaction_id' in details and details['transaction_id']: