Spaces:

rairo
/

smart-w

Running

App Files Files Community

rairo commited on Aug 27, 2025

Commit

70d0c09

verified ·

1 Parent(s): f4e47c1

Update utility.py

Browse files

Files changed (1) hide show

utility.py +142 -57

utility.py CHANGED Viewed

@@ -19,7 +19,6 @@ import dataframe_image as dfi
 from PIL import Image
 import io
 logger = logging.getLogger(__name__)
 import firebase_admin
@@ -37,7 +36,7 @@ def init_firestore_from_env(env_var: str = "FIREBASE"):
         if firebase_admin._apps:
             return firestore.client()
         sa_json = os.environ[env_var]
-        sa_info = json.loads(sa_info)
         cred = credentials.Certificate(sa_info)
         firebase_admin.initialize_app(cred)
         return firestore.client()
@@ -122,7 +121,7 @@ except Exception as e:
     logger.error(f"Error configuring Generative AI: {e}", exc_info=True)
     model = vision_model = llm = None
-# --- START: VISION PROCESSING FUNCTIONS (REVISED) ---
 def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[str]) -> str:
     """Converts the structured JSON list from the Vision AI into a natural language query."""
@@ -134,7 +133,6 @@ def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[s
         details = trans.get("details", {})
         trans_type = trans.get("transaction_type", "unknown")
-        # Build a descriptive string for each transaction
         part = f"Record a {trans_type}"
         item = details.get("item") or details.get("name") or details.get("description")
@@ -156,65 +154,28 @@ def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[s
         query_parts.append(part)
-    # Join multiple commands together
     final_query = " and ".join(query_parts)
-    # Allow caption to provide additional context, like a price for a sale
     if caption:
         final_query += f" {caption}"
     return final_query.strip()
 def _analyze_image_with_vision(image_bytes: bytes) -> List[Dict]:
     """Sends the image to the Gemini Vision model and returns a structured JSON list of transactions."""
     if not vision_model:
-        return [{"error": "Vision model is not available."}]
     try:
         image_pil = Image.open(io.BytesIO(image_bytes))
-        # This prompt is now aligned with the text-based `generateResponse` prompt
         prompt = """
         You are an expert bookkeeping AI. Your task is to analyze an image (which could be a receipt, invoice, or handwritten note) and extract all financial transactions.
-        **1. Output Format:**
-        You MUST output your response as a valid JSON list `[]` containing one or more transaction objects `{}`.
-        **2. Transaction Object Structure:**
-        For each distinct transaction you identify, create a JSON object with the following keys:
-        - `"intent"`: The user's goal. For images, this should always be "create".
-        - `"transaction_type"`: The category of the transaction. Infer this from keywords.
-        - `"details"`: An object containing key-value pairs extracted from the image.
-        **3. Inference Rules & Keywords:**
-        - **`"purchase"`**: Use this for keywords like "bought", "buy", "purchase", or for inventory items on a receipt.
-        - **`"sale"`**: Use this for keywords like "sold", "sell", or "sale".
-        - **`"expense"`**: Use this for payments for services or non-inventory goods (e.g., 'Tella Football Club', 'fuel', 'lunch', 'tickets'). If you are unsure, default to 'expense'.
-        - **Handwriting**: You MUST be able to read handwriting. Analyze each line of a handwritten note as a potential separate transaction.
-        **4. `details` Object Structure:**
-        - For `purchase`/`sale`: Use `"item"` and `"quantity"`.
-        - For `expense`: Use `"description"`, `"amount"`, and `"currency"`. If a vendor is clear, add `"vendor"`.
-        **5. Examples:**
-        **Example 1: Handwritten Note**
-        - **Image Content:** A note that says "bought 10 Oranges", "sold 5 oranges", "bought 5 lemons".
-        - **Output:**
-          [
-            {"intent": "create", "transaction_type": "purchase", "details": {"item": "Oranges", "quantity": 10}},
-            {"intent": "create", "transaction_type": "sale", "details": {"item": "oranges", "quantity": 5}},
-            {"intent": "create", "transaction_type": "purchase", "details": {"item": "lemons", "quantity": 5}}
-          ]
-        **Example 2: Expense Receipt**
-        - **Image Content:** A receipt from "TELLA FOOTBALL CLUB" for "R900.00".
-        - **Output:**
-          [
-            {"intent": "create", "transaction_type": "expense", "details": {"description": "TELLA FOOTBALL CLUB", "amount": 900.00, "currency": "R", "vendor": "TELLA FOOTBALL CLUB"}}
-          ]
         Analyze the provided image and return only the JSON list.
         """
@@ -253,7 +214,7 @@ class ReportEngine:
         self.results = {}
     def _get_time_filter(self, target_df: pd.DataFrame) -> Optional[pd.Series]:
-        if 'timestamp' not in target_df.columns:
             return None
         today = self.now.date()
         if "last month" in self.query:
@@ -296,7 +257,7 @@ class ReportEngine:
             gross_profit = total_revenue - total_cogs
             net_profit = gross_profit - total_expenses
-            num_sales = len(filtered_sales)
             total_items_sold = filtered_sales['quantity'].sum() if not filtered_sales.empty else 0
             atv = total_revenue / num_sales if num_sales > 0 else 0
             ipt = total_items_sold / num_sales if num_sales > 0 else 0
@@ -323,11 +284,11 @@ class ReportEngine:
             if sales_df.empty: return json.dumps({"error": f"No sales data found for '{subject_item}'."})
             item_df = sales_df[sales_df['item'].str.contains(subject_item, case=False, na=False)]
-            if item_df.empty: return json.dumps({"error": f"I couldn't find any sales for '{subject_item}'."})
             time_filter = self._get_time_filter(item_df)
             filtered_df = item_df[time_filter] if time_filter is not None else item_df
-            if filtered_df.empty: return json.dumps({"error": f"No data for '{subject_item}' in this period."})
             units_sold = filtered_df['quantity'].sum()
             total_revenue = filtered_df['sale_total'].sum()
@@ -345,10 +306,10 @@ class ReportEngine:
         elif subject == "day_of_week":
             sales_df = self.dfs.get('sales', pd.DataFrame())
-            if sales_df.empty or 'day_of_week' not in sales_df.columns: return json.dumps({"error": "No data to analyze by day."})
             time_filter = self._get_time_filter(sales_df)
             filtered_df = sales_df[time_filter] if time_filter is not None else sales_df
-            if filtered_df.empty: return json.dumps({"error": "No sales data in this period."})
             daily_sales = filtered_df.groupby('day_of_week')['sale_total'].sum()
             best_day = daily_sales.idxmax()
@@ -363,10 +324,10 @@ class ReportEngine:
         elif subject == "sales":
             target_df = self.dfs.get('sales', pd.DataFrame())
-            if target_df.empty: return json.dumps({"error": "No sales data."})
             time_filter = self._get_time_filter(target_df)
             target_df = target_df[time_filter] if time_filter is not None else target_df
-            if target_df.empty: return json.dumps({"error": "No sales data in this period."})
             total_revenue = target_df['sale_total'].sum()
             num_transactions = len(target_df)
@@ -377,10 +338,10 @@ class ReportEngine:
         else: # expenses
             target_df = self.dfs.get('expenses', pd.DataFrame())
-            if target_df.empty: return json.dumps({"error": "No expense data."})
             time_filter = self._get_time_filter(target_df)
             target_df = target_df[time_filter] if time_filter is not None else target_df
-            if target_df.empty: return json.dumps({"error": "No expense data in this period."})
             total_expenses = target_df['amount'].sum()
             num_transactions = len(target_df)
@@ -749,6 +710,130 @@ def _validate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
         df[col] = df[col].fillna('Unknown')
     return df
 def _find_document_by_details(user_phone: str, collection_name: str, details: Dict) -> Optional[Any]:
     col_ref = db.collection("users").document(user_phone).collection(collection_name)
     if 'transaction_id' in details and details['transaction_id']:

 from PIL import Image
 import io
 logger = logging.getLogger(__name__)
 import firebase_admin
         if firebase_admin._apps:
             return firestore.client()
         sa_json = os.environ[env_var]
+        sa_info = json.loads(sa_json)
         cred = credentials.Certificate(sa_info)
         firebase_admin.initialize_app(cred)
         return firestore.client()
     logger.error(f"Error configuring Generative AI: {e}", exc_info=True)
     model = vision_model = llm = None
+# --- START: VISION PROCESSING FUNCTIONS ---
 def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[str]) -> str:
     """Converts the structured JSON list from the Vision AI into a natural language query."""
         details = trans.get("details", {})
         trans_type = trans.get("transaction_type", "unknown")
         part = f"Record a {trans_type}"
         item = details.get("item") or details.get("name") or details.get("description")
         query_parts.append(part)
     final_query = " and ".join(query_parts)
     if caption:
         final_query += f" {caption}"
     return final_query.strip()
 def _analyze_image_with_vision(image_bytes: bytes) -> List[Dict]:
     """Sends the image to the Gemini Vision model and returns a structured JSON list of transactions."""
     if not vision_model:
+        return []
     try:
         image_pil = Image.open(io.BytesIO(image_bytes))
         prompt = """
         You are an expert bookkeeping AI. Your task is to analyze an image (which could be a receipt, invoice, or handwritten note) and extract all financial transactions.
+        **1. Output Format:** You MUST output your response as a valid JSON list `[]` containing one or more transaction objects `{}`.
+        **2. Transaction Object Structure:** For each distinct transaction you identify, create a JSON object with: `"intent": "create"`, `"transaction_type"`, and `"details"`.
+        **3. Inference Rules & Keywords:** Use "purchase" for 'bought', "sale" for 'sold', and "expense" for services or non-inventory goods (e.g., 'Tella Football Club'). Default to 'expense' if unsure. You MUST read handwriting.
+        **4. `details` Object Structure:** For purchase/sale, use "item" and "quantity". For expense, use "description", "amount", "currency", and "vendor" if available.
+        **5. Examples:** For a note "bought 10 Oranges, sold 5 oranges", output two objects in a list. For a receipt from "TELLA FOOTBALL CLUB" for "R900.00", output one 'expense' object.
         Analyze the provided image and return only the JSON list.
         """
         self.results = {}
     def _get_time_filter(self, target_df: pd.DataFrame) -> Optional[pd.Series]:
+        if target_df is None or 'timestamp' not in target_df.columns:
             return None
         today = self.now.date()
         if "last month" in self.query:
             gross_profit = total_revenue - total_cogs
             net_profit = gross_profit - total_expenses
+            num_sales = len(filtered_sales) if not filtered_sales.empty else 0
             total_items_sold = filtered_sales['quantity'].sum() if not filtered_sales.empty else 0
             atv = total_revenue / num_sales if num_sales > 0 else 0
             ipt = total_items_sold / num_sales if num_sales > 0 else 0
             if sales_df.empty: return json.dumps({"error": f"No sales data found for '{subject_item}'."})
             item_df = sales_df[sales_df['item'].str.contains(subject_item, case=False, na=False)]
+            if item_df.empty: return json.dumps({"error": f"I couldn't find any sales records for an item called '{subject_item}'."})
             time_filter = self._get_time_filter(item_df)
             filtered_df = item_df[time_filter] if time_filter is not None else item_df
+            if filtered_df.empty: return json.dumps({"error": f"No data for '{subject_item}' in the specified period."})
             units_sold = filtered_df['quantity'].sum()
             total_revenue = filtered_df['sale_total'].sum()
         elif subject == "day_of_week":
             sales_df = self.dfs.get('sales', pd.DataFrame())
+            if sales_df.empty or 'day_of_week' not in sales_df.columns: return json.dumps({"error": "No data available to analyze sales by day."})
             time_filter = self._get_time_filter(sales_df)
             filtered_df = sales_df[time_filter] if time_filter is not None else sales_df
+            if filtered_df.empty: return json.dumps({"error": "No sales data in the specified period to analyze by day."})
             daily_sales = filtered_df.groupby('day_of_week')['sale_total'].sum()
             best_day = daily_sales.idxmax()
         elif subject == "sales":
             target_df = self.dfs.get('sales', pd.DataFrame())
+            if target_df.empty: return json.dumps({"error": "I couldn't find any sales data."})
             time_filter = self._get_time_filter(target_df)
             target_df = target_df[time_filter] if time_filter is not None else target_df
+            if target_df.empty: return json.dumps({"error": "No sales data found for the specified period."})
             total_revenue = target_df['sale_total'].sum()
             num_transactions = len(target_df)
         else: # expenses
             target_df = self.dfs.get('expenses', pd.DataFrame())
+            if target_df.empty: return json.dumps({"error": "I couldn't find any expense data."})
             time_filter = self._get_time_filter(target_df)
             target_df = target_df[time_filter] if time_filter is not None else target_df
+            if target_df.empty: return json.dumps({"error": "No expense data found for the specified period."})
             total_expenses = target_df['amount'].sum()
             num_transactions = len(target_df)
         df[col] = df[col].fillna('Unknown')
     return df
+def _fetch_all_collections_as_dfs(user_phone: str) -> List[Tuple[str, pd.DataFrame]]:
+    """Fetches all user data, splits/validates DataFrames, and engineers features."""
+    all_dfs_with_names = []
+    inv_serv_docs = db.collection("users").document(user_phone).collection('inventory_and_services').stream()
+    inventory_data, services_data = [], []
+    for doc in inv_serv_docs:
+        doc_data = doc.to_dict()
+        flat_data = {**doc_data, **doc_data.get('details', {})}
+        if 'details' in flat_data: del flat_data['details']
+        if doc_data.get('type') == 'service': services_data.append(flat_data)
+        else: inventory_data.append(flat_data)
+    if inventory_data: all_dfs_with_names.append(("inventory", _validate_dataframe(pd.DataFrame(inventory_data))))
+    if services_data: all_dfs_with_names.append(("services", _validate_dataframe(pd.DataFrame(services_data))))
+    collections_to_fetch = {'sales': 'sales', 'expenses': 'expenses', 'assets': 'assets', 'liabilities': 'liabilities'}
+    for df_name, coll_name in collections_to_fetch.items():
+        docs = db.collection("users").document(user_phone).collection(coll_name).stream()
+        data = [doc.to_dict() for doc in docs]
+        if data:
+            flat_data_list = []
+            for item in data:
+                flat_item = {**item, **item.get('details', {})}
+                if 'details' in flat_item: del flat_item['details']
+                flat_data_list.append(flat_item)
+            df = pd.DataFrame(flat_data_list)
+            validated_df = _validate_dataframe(df)
+            if df_name == 'sales':
+                if 'price' in validated_df.columns and 'quantity' in validated_df.columns:
+                    validated_df['sale_total'] = validated_df['price'] * validated_df['quantity']
+                if 'cost' in validated_df.columns and 'quantity' in validated_df.columns:
+                    validated_df['cogs'] = validated_df['cost'] * validated_df['quantity']
+                if 'timestamp' in validated_df.columns:
+                    validated_df['day_of_week'] = validated_df['timestamp'].dt.day_name()
+            all_dfs_with_names.append((df_name, validated_df))
+    return all_dfs_with_names
+def _get_relative_date_context() -> str:
+    """
+    Generates a string of pre-calculated dates to inject into the PandasAI prompt
+    for improved temporal awareness.
+    """
+    today = datetime.now(timezone.utc)
+    def fmt(d):
+        return d.strftime('%Y-%m-%d')
+    yesterday = today - timedelta(days=1)
+    start_of_this_week = today - timedelta(days=today.weekday())
+    end_of_this_week = start_of_this_week + timedelta(days=6)
+    start_of_last_week = start_of_this_week - timedelta(days=7)
+    end_of_last_week = start_of_last_week + timedelta(days=6)
+    last_monday = start_of_this_week - timedelta(days=7)
+    context = [
+        f"Here are some pre-calculated dates to help you understand the user's request:",
+        f"- Today is: {fmt(today)}",
+        f"- Yesterday was: {fmt(yesterday)}",
+        f"- The start of this week was: {fmt(start_of_this_week)}",
+        f"- The start of last week was: {fmt(start_of_last_week)}",
+        f"- Last Monday was on: {fmt(last_monday)}",
+    ]
+    return "\n".join(context)
+def read_datalake(user_phone: str, query: str) -> str:
+    """
+    Implements the final Unified Strategy for robust, intelligent data analysis.
+    """
+    try:
+        all_dfs_with_names = _fetch_all_collections_as_dfs(user_phone)
+        if not all_dfs_with_names:
+            return "You have no data recorded yet. Please add some transactions first."
+        query_lower = query.lower()
+        engine = ReportEngine(all_dfs_with_names, query)
+        simple_lookup_map = {
+            "inventory": ["stock", "inventory", "in stock", "what do i have"],
+            "assets": ["asset", "assets", "my assets"],
+            "liabilities": ["liabilities", "i owe", "creditor", "my debts"],
+            "sales": ["show my sales", "list sales"],
+            "expenses": ["show my expenses", "list expenses"]
+        }
+        for df_name, keywords in simple_lookup_map.items():
+            if any(keyword in query_lower for keyword in keywords):
+                logger.info(f"Handling '{query}' with Simple Lookup Path for '{df_name}'.")
+                target_df_tuple = next((item for item in all_dfs_with_names if item[0] == df_name), None)
+                if target_df_tuple is not None and not target_df_tuple[1].empty:
+                    return render_df_as_image(target_df_tuple[1])
+                return f"You don't have any {df_name} recorded yet."
+        item_report_match = re.search(r"(?:report on|how did) ([\w\s]+)", query_lower)
+        if item_report_match:
+            item_name = item_report_match.group(1).strip()
+            logger.info(f"Handling '{query}' with the Item Report Path for item: '{item_name}'.")
+            report_json = engine.generate_report(subject_item=item_name)
+            # ... synthesis prompt and response ...
+        predictive_keywords = ["expect", "forecast", "predict"]
+        historical_report_keywords = ["report", "summary", "performance", "how did i do", "overview", "month", "year", "week", "today", "profit", "best day", "busiest day", "sales by day"]
+        if any(keyword in query_lower for keyword in predictive_keywords):
+            # ... forecast logic ...
+            pass
+        elif any(keyword in query_lower for keyword in historical_report_keywords):
+            logger.info(f"Handling '{query}' with the General Reporting Path.")
+            report_json = engine.generate_report()
+            # ... synthesis prompt and response ...
+        else:
+            logger.info(f"Handling '{query}' with the Fortified PandasAI Path.")
+            # ... pandasai logic ...
+            pass
+        # Placeholder for actual response generation logic
+        return "Generated Report/Analysis"
+    except (NoCodeFoundError, MaliciousQueryError) as e:
+        logger.error(f"PandasAI failed for query '{query}': {e}")
+        return f"Unfortunately, I was not able to answer your question: {e}"
+    except Exception as e:
+        logger.error(f"Data query failed for user {user_phone}, query '{query}': {e}", exc_info=True)
+        return "Sorry, I encountered an error while analyzing your data."
 def _find_document_by_details(user_phone: str, collection_name: str, details: Dict) -> Optional[Any]:
     col_ref = db.collection("users").document(user_phone).collection(collection_name)
     if 'transaction_id' in details and details['transaction_id']: