Update utility.py
Browse files- utility.py +142 -57
utility.py
CHANGED
|
@@ -19,7 +19,6 @@ import dataframe_image as dfi
|
|
| 19 |
from PIL import Image
|
| 20 |
import io
|
| 21 |
|
| 22 |
-
|
| 23 |
logger = logging.getLogger(__name__)
|
| 24 |
|
| 25 |
import firebase_admin
|
|
@@ -37,7 +36,7 @@ def init_firestore_from_env(env_var: str = "FIREBASE"):
|
|
| 37 |
if firebase_admin._apps:
|
| 38 |
return firestore.client()
|
| 39 |
sa_json = os.environ[env_var]
|
| 40 |
-
sa_info = json.loads(
|
| 41 |
cred = credentials.Certificate(sa_info)
|
| 42 |
firebase_admin.initialize_app(cred)
|
| 43 |
return firestore.client()
|
|
@@ -122,7 +121,7 @@ except Exception as e:
|
|
| 122 |
logger.error(f"Error configuring Generative AI: {e}", exc_info=True)
|
| 123 |
model = vision_model = llm = None
|
| 124 |
|
| 125 |
-
# --- START: VISION PROCESSING FUNCTIONS
|
| 126 |
|
| 127 |
def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[str]) -> str:
|
| 128 |
"""Converts the structured JSON list from the Vision AI into a natural language query."""
|
|
@@ -134,7 +133,6 @@ def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[s
|
|
| 134 |
details = trans.get("details", {})
|
| 135 |
trans_type = trans.get("transaction_type", "unknown")
|
| 136 |
|
| 137 |
-
# Build a descriptive string for each transaction
|
| 138 |
part = f"Record a {trans_type}"
|
| 139 |
|
| 140 |
item = details.get("item") or details.get("name") or details.get("description")
|
|
@@ -156,65 +154,28 @@ def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[s
|
|
| 156 |
|
| 157 |
query_parts.append(part)
|
| 158 |
|
| 159 |
-
# Join multiple commands together
|
| 160 |
final_query = " and ".join(query_parts)
|
| 161 |
|
| 162 |
-
# Allow caption to provide additional context, like a price for a sale
|
| 163 |
if caption:
|
| 164 |
final_query += f" {caption}"
|
| 165 |
|
| 166 |
return final_query.strip()
|
| 167 |
|
| 168 |
-
|
| 169 |
def _analyze_image_with_vision(image_bytes: bytes) -> List[Dict]:
|
| 170 |
"""Sends the image to the Gemini Vision model and returns a structured JSON list of transactions."""
|
| 171 |
if not vision_model:
|
| 172 |
-
return [
|
| 173 |
|
| 174 |
try:
|
| 175 |
image_pil = Image.open(io.BytesIO(image_bytes))
|
| 176 |
|
| 177 |
-
# This prompt is now aligned with the text-based `generateResponse` prompt
|
| 178 |
prompt = """
|
| 179 |
You are an expert bookkeeping AI. Your task is to analyze an image (which could be a receipt, invoice, or handwritten note) and extract all financial transactions.
|
| 180 |
-
|
| 181 |
-
**
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
**
|
| 185 |
-
For each distinct transaction you identify, create a JSON object with the following keys:
|
| 186 |
-
- `"intent"`: The user's goal. For images, this should always be "create".
|
| 187 |
-
- `"transaction_type"`: The category of the transaction. Infer this from keywords.
|
| 188 |
-
- `"details"`: An object containing key-value pairs extracted from the image.
|
| 189 |
-
|
| 190 |
-
**3. Inference Rules & Keywords:**
|
| 191 |
-
- **`"purchase"`**: Use this for keywords like "bought", "buy", "purchase", or for inventory items on a receipt.
|
| 192 |
-
- **`"sale"`**: Use this for keywords like "sold", "sell", or "sale".
|
| 193 |
-
- **`"expense"`**: Use this for payments for services or non-inventory goods (e.g., 'Tella Football Club', 'fuel', 'lunch', 'tickets'). If you are unsure, default to 'expense'.
|
| 194 |
-
- **Handwriting**: You MUST be able to read handwriting. Analyze each line of a handwritten note as a potential separate transaction.
|
| 195 |
-
|
| 196 |
-
**4. `details` Object Structure:**
|
| 197 |
-
- For `purchase`/`sale`: Use `"item"` and `"quantity"`.
|
| 198 |
-
- For `expense`: Use `"description"`, `"amount"`, and `"currency"`. If a vendor is clear, add `"vendor"`.
|
| 199 |
-
|
| 200 |
-
**5. Examples:**
|
| 201 |
-
|
| 202 |
-
**Example 1: Handwritten Note**
|
| 203 |
-
- **Image Content:** A note that says "bought 10 Oranges", "sold 5 oranges", "bought 5 lemons".
|
| 204 |
-
- **Output:**
|
| 205 |
-
[
|
| 206 |
-
{"intent": "create", "transaction_type": "purchase", "details": {"item": "Oranges", "quantity": 10}},
|
| 207 |
-
{"intent": "create", "transaction_type": "sale", "details": {"item": "oranges", "quantity": 5}},
|
| 208 |
-
{"intent": "create", "transaction_type": "purchase", "details": {"item": "lemons", "quantity": 5}}
|
| 209 |
-
]
|
| 210 |
-
|
| 211 |
-
**Example 2: Expense Receipt**
|
| 212 |
-
- **Image Content:** A receipt from "TELLA FOOTBALL CLUB" for "R900.00".
|
| 213 |
-
- **Output:**
|
| 214 |
-
[
|
| 215 |
-
{"intent": "create", "transaction_type": "expense", "details": {"description": "TELLA FOOTBALL CLUB", "amount": 900.00, "currency": "R", "vendor": "TELLA FOOTBALL CLUB"}}
|
| 216 |
-
]
|
| 217 |
-
|
| 218 |
Analyze the provided image and return only the JSON list.
|
| 219 |
"""
|
| 220 |
|
|
@@ -253,7 +214,7 @@ class ReportEngine:
|
|
| 253 |
self.results = {}
|
| 254 |
|
| 255 |
def _get_time_filter(self, target_df: pd.DataFrame) -> Optional[pd.Series]:
|
| 256 |
-
if 'timestamp' not in target_df.columns:
|
| 257 |
return None
|
| 258 |
today = self.now.date()
|
| 259 |
if "last month" in self.query:
|
|
@@ -296,7 +257,7 @@ class ReportEngine:
|
|
| 296 |
gross_profit = total_revenue - total_cogs
|
| 297 |
net_profit = gross_profit - total_expenses
|
| 298 |
|
| 299 |
-
num_sales = len(filtered_sales)
|
| 300 |
total_items_sold = filtered_sales['quantity'].sum() if not filtered_sales.empty else 0
|
| 301 |
atv = total_revenue / num_sales if num_sales > 0 else 0
|
| 302 |
ipt = total_items_sold / num_sales if num_sales > 0 else 0
|
|
@@ -323,11 +284,11 @@ class ReportEngine:
|
|
| 323 |
if sales_df.empty: return json.dumps({"error": f"No sales data found for '{subject_item}'."})
|
| 324 |
|
| 325 |
item_df = sales_df[sales_df['item'].str.contains(subject_item, case=False, na=False)]
|
| 326 |
-
if item_df.empty: return json.dumps({"error": f"I couldn't find any sales for '{subject_item}'."})
|
| 327 |
|
| 328 |
time_filter = self._get_time_filter(item_df)
|
| 329 |
filtered_df = item_df[time_filter] if time_filter is not None else item_df
|
| 330 |
-
if filtered_df.empty: return json.dumps({"error": f"No data for '{subject_item}' in
|
| 331 |
|
| 332 |
units_sold = filtered_df['quantity'].sum()
|
| 333 |
total_revenue = filtered_df['sale_total'].sum()
|
|
@@ -345,10 +306,10 @@ class ReportEngine:
|
|
| 345 |
|
| 346 |
elif subject == "day_of_week":
|
| 347 |
sales_df = self.dfs.get('sales', pd.DataFrame())
|
| 348 |
-
if sales_df.empty or 'day_of_week' not in sales_df.columns: return json.dumps({"error": "No data to analyze by day."})
|
| 349 |
time_filter = self._get_time_filter(sales_df)
|
| 350 |
filtered_df = sales_df[time_filter] if time_filter is not None else sales_df
|
| 351 |
-
if filtered_df.empty: return json.dumps({"error": "No sales data in
|
| 352 |
|
| 353 |
daily_sales = filtered_df.groupby('day_of_week')['sale_total'].sum()
|
| 354 |
best_day = daily_sales.idxmax()
|
|
@@ -363,10 +324,10 @@ class ReportEngine:
|
|
| 363 |
|
| 364 |
elif subject == "sales":
|
| 365 |
target_df = self.dfs.get('sales', pd.DataFrame())
|
| 366 |
-
if target_df.empty: return json.dumps({"error": "
|
| 367 |
time_filter = self._get_time_filter(target_df)
|
| 368 |
target_df = target_df[time_filter] if time_filter is not None else target_df
|
| 369 |
-
if target_df.empty: return json.dumps({"error": "No sales data
|
| 370 |
|
| 371 |
total_revenue = target_df['sale_total'].sum()
|
| 372 |
num_transactions = len(target_df)
|
|
@@ -377,10 +338,10 @@ class ReportEngine:
|
|
| 377 |
|
| 378 |
else: # expenses
|
| 379 |
target_df = self.dfs.get('expenses', pd.DataFrame())
|
| 380 |
-
if target_df.empty: return json.dumps({"error": "
|
| 381 |
time_filter = self._get_time_filter(target_df)
|
| 382 |
target_df = target_df[time_filter] if time_filter is not None else target_df
|
| 383 |
-
if target_df.empty: return json.dumps({"error": "No expense data
|
| 384 |
|
| 385 |
total_expenses = target_df['amount'].sum()
|
| 386 |
num_transactions = len(target_df)
|
|
@@ -749,6 +710,130 @@ def _validate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 749 |
df[col] = df[col].fillna('Unknown')
|
| 750 |
return df
|
| 751 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 752 |
def _find_document_by_details(user_phone: str, collection_name: str, details: Dict) -> Optional[Any]:
|
| 753 |
col_ref = db.collection("users").document(user_phone).collection(collection_name)
|
| 754 |
if 'transaction_id' in details and details['transaction_id']:
|
|
|
|
| 19 |
from PIL import Image
|
| 20 |
import io
|
| 21 |
|
|
|
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
import firebase_admin
|
|
|
|
| 36 |
if firebase_admin._apps:
|
| 37 |
return firestore.client()
|
| 38 |
sa_json = os.environ[env_var]
|
| 39 |
+
sa_info = json.loads(sa_json)
|
| 40 |
cred = credentials.Certificate(sa_info)
|
| 41 |
firebase_admin.initialize_app(cred)
|
| 42 |
return firestore.client()
|
|
|
|
| 121 |
logger.error(f"Error configuring Generative AI: {e}", exc_info=True)
|
| 122 |
model = vision_model = llm = None
|
| 123 |
|
| 124 |
+
# --- START: VISION PROCESSING FUNCTIONS ---
|
| 125 |
|
| 126 |
def _transpile_vision_json_to_query(vision_json: List[Dict], caption: Optional[str]) -> str:
|
| 127 |
"""Converts the structured JSON list from the Vision AI into a natural language query."""
|
|
|
|
| 133 |
details = trans.get("details", {})
|
| 134 |
trans_type = trans.get("transaction_type", "unknown")
|
| 135 |
|
|
|
|
| 136 |
part = f"Record a {trans_type}"
|
| 137 |
|
| 138 |
item = details.get("item") or details.get("name") or details.get("description")
|
|
|
|
| 154 |
|
| 155 |
query_parts.append(part)
|
| 156 |
|
|
|
|
| 157 |
final_query = " and ".join(query_parts)
|
| 158 |
|
|
|
|
| 159 |
if caption:
|
| 160 |
final_query += f" {caption}"
|
| 161 |
|
| 162 |
return final_query.strip()
|
| 163 |
|
|
|
|
| 164 |
def _analyze_image_with_vision(image_bytes: bytes) -> List[Dict]:
|
| 165 |
"""Sends the image to the Gemini Vision model and returns a structured JSON list of transactions."""
|
| 166 |
if not vision_model:
|
| 167 |
+
return []
|
| 168 |
|
| 169 |
try:
|
| 170 |
image_pil = Image.open(io.BytesIO(image_bytes))
|
| 171 |
|
|
|
|
| 172 |
prompt = """
|
| 173 |
You are an expert bookkeeping AI. Your task is to analyze an image (which could be a receipt, invoice, or handwritten note) and extract all financial transactions.
|
| 174 |
+
**1. Output Format:** You MUST output your response as a valid JSON list `[]` containing one or more transaction objects `{}`.
|
| 175 |
+
**2. Transaction Object Structure:** For each distinct transaction you identify, create a JSON object with: `"intent": "create"`, `"transaction_type"`, and `"details"`.
|
| 176 |
+
**3. Inference Rules & Keywords:** Use "purchase" for 'bought', "sale" for 'sold', and "expense" for services or non-inventory goods (e.g., 'Tella Football Club'). Default to 'expense' if unsure. You MUST read handwriting.
|
| 177 |
+
**4. `details` Object Structure:** For purchase/sale, use "item" and "quantity". For expense, use "description", "amount", "currency", and "vendor" if available.
|
| 178 |
+
**5. Examples:** For a note "bought 10 Oranges, sold 5 oranges", output two objects in a list. For a receipt from "TELLA FOOTBALL CLUB" for "R900.00", output one 'expense' object.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
Analyze the provided image and return only the JSON list.
|
| 180 |
"""
|
| 181 |
|
|
|
|
| 214 |
self.results = {}
|
| 215 |
|
| 216 |
def _get_time_filter(self, target_df: pd.DataFrame) -> Optional[pd.Series]:
|
| 217 |
+
if target_df is None or 'timestamp' not in target_df.columns:
|
| 218 |
return None
|
| 219 |
today = self.now.date()
|
| 220 |
if "last month" in self.query:
|
|
|
|
| 257 |
gross_profit = total_revenue - total_cogs
|
| 258 |
net_profit = gross_profit - total_expenses
|
| 259 |
|
| 260 |
+
num_sales = len(filtered_sales) if not filtered_sales.empty else 0
|
| 261 |
total_items_sold = filtered_sales['quantity'].sum() if not filtered_sales.empty else 0
|
| 262 |
atv = total_revenue / num_sales if num_sales > 0 else 0
|
| 263 |
ipt = total_items_sold / num_sales if num_sales > 0 else 0
|
|
|
|
| 284 |
if sales_df.empty: return json.dumps({"error": f"No sales data found for '{subject_item}'."})
|
| 285 |
|
| 286 |
item_df = sales_df[sales_df['item'].str.contains(subject_item, case=False, na=False)]
|
| 287 |
+
if item_df.empty: return json.dumps({"error": f"I couldn't find any sales records for an item called '{subject_item}'."})
|
| 288 |
|
| 289 |
time_filter = self._get_time_filter(item_df)
|
| 290 |
filtered_df = item_df[time_filter] if time_filter is not None else item_df
|
| 291 |
+
if filtered_df.empty: return json.dumps({"error": f"No data for '{subject_item}' in the specified period."})
|
| 292 |
|
| 293 |
units_sold = filtered_df['quantity'].sum()
|
| 294 |
total_revenue = filtered_df['sale_total'].sum()
|
|
|
|
| 306 |
|
| 307 |
elif subject == "day_of_week":
|
| 308 |
sales_df = self.dfs.get('sales', pd.DataFrame())
|
| 309 |
+
if sales_df.empty or 'day_of_week' not in sales_df.columns: return json.dumps({"error": "No data available to analyze sales by day."})
|
| 310 |
time_filter = self._get_time_filter(sales_df)
|
| 311 |
filtered_df = sales_df[time_filter] if time_filter is not None else sales_df
|
| 312 |
+
if filtered_df.empty: return json.dumps({"error": "No sales data in the specified period to analyze by day."})
|
| 313 |
|
| 314 |
daily_sales = filtered_df.groupby('day_of_week')['sale_total'].sum()
|
| 315 |
best_day = daily_sales.idxmax()
|
|
|
|
| 324 |
|
| 325 |
elif subject == "sales":
|
| 326 |
target_df = self.dfs.get('sales', pd.DataFrame())
|
| 327 |
+
if target_df.empty: return json.dumps({"error": "I couldn't find any sales data."})
|
| 328 |
time_filter = self._get_time_filter(target_df)
|
| 329 |
target_df = target_df[time_filter] if time_filter is not None else target_df
|
| 330 |
+
if target_df.empty: return json.dumps({"error": "No sales data found for the specified period."})
|
| 331 |
|
| 332 |
total_revenue = target_df['sale_total'].sum()
|
| 333 |
num_transactions = len(target_df)
|
|
|
|
| 338 |
|
| 339 |
else: # expenses
|
| 340 |
target_df = self.dfs.get('expenses', pd.DataFrame())
|
| 341 |
+
if target_df.empty: return json.dumps({"error": "I couldn't find any expense data."})
|
| 342 |
time_filter = self._get_time_filter(target_df)
|
| 343 |
target_df = target_df[time_filter] if time_filter is not None else target_df
|
| 344 |
+
if target_df.empty: return json.dumps({"error": "No expense data found for the specified period."})
|
| 345 |
|
| 346 |
total_expenses = target_df['amount'].sum()
|
| 347 |
num_transactions = len(target_df)
|
|
|
|
| 710 |
df[col] = df[col].fillna('Unknown')
|
| 711 |
return df
|
| 712 |
|
| 713 |
+
def _fetch_all_collections_as_dfs(user_phone: str) -> List[Tuple[str, pd.DataFrame]]:
|
| 714 |
+
"""Fetches all user data, splits/validates DataFrames, and engineers features."""
|
| 715 |
+
all_dfs_with_names = []
|
| 716 |
+
inv_serv_docs = db.collection("users").document(user_phone).collection('inventory_and_services').stream()
|
| 717 |
+
inventory_data, services_data = [], []
|
| 718 |
+
for doc in inv_serv_docs:
|
| 719 |
+
doc_data = doc.to_dict()
|
| 720 |
+
flat_data = {**doc_data, **doc_data.get('details', {})}
|
| 721 |
+
if 'details' in flat_data: del flat_data['details']
|
| 722 |
+
if doc_data.get('type') == 'service': services_data.append(flat_data)
|
| 723 |
+
else: inventory_data.append(flat_data)
|
| 724 |
+
if inventory_data: all_dfs_with_names.append(("inventory", _validate_dataframe(pd.DataFrame(inventory_data))))
|
| 725 |
+
if services_data: all_dfs_with_names.append(("services", _validate_dataframe(pd.DataFrame(services_data))))
|
| 726 |
+
collections_to_fetch = {'sales': 'sales', 'expenses': 'expenses', 'assets': 'assets', 'liabilities': 'liabilities'}
|
| 727 |
+
for df_name, coll_name in collections_to_fetch.items():
|
| 728 |
+
docs = db.collection("users").document(user_phone).collection(coll_name).stream()
|
| 729 |
+
data = [doc.to_dict() for doc in docs]
|
| 730 |
+
if data:
|
| 731 |
+
flat_data_list = []
|
| 732 |
+
for item in data:
|
| 733 |
+
flat_item = {**item, **item.get('details', {})}
|
| 734 |
+
if 'details' in flat_item: del flat_item['details']
|
| 735 |
+
flat_data_list.append(flat_item)
|
| 736 |
+
df = pd.DataFrame(flat_data_list)
|
| 737 |
+
validated_df = _validate_dataframe(df)
|
| 738 |
+
if df_name == 'sales':
|
| 739 |
+
if 'price' in validated_df.columns and 'quantity' in validated_df.columns:
|
| 740 |
+
validated_df['sale_total'] = validated_df['price'] * validated_df['quantity']
|
| 741 |
+
if 'cost' in validated_df.columns and 'quantity' in validated_df.columns:
|
| 742 |
+
validated_df['cogs'] = validated_df['cost'] * validated_df['quantity']
|
| 743 |
+
if 'timestamp' in validated_df.columns:
|
| 744 |
+
validated_df['day_of_week'] = validated_df['timestamp'].dt.day_name()
|
| 745 |
+
all_dfs_with_names.append((df_name, validated_df))
|
| 746 |
+
return all_dfs_with_names
|
| 747 |
+
|
| 748 |
+
def _get_relative_date_context() -> str:
|
| 749 |
+
"""
|
| 750 |
+
Generates a string of pre-calculated dates to inject into the PandasAI prompt
|
| 751 |
+
for improved temporal awareness.
|
| 752 |
+
"""
|
| 753 |
+
today = datetime.now(timezone.utc)
|
| 754 |
+
|
| 755 |
+
def fmt(d):
|
| 756 |
+
return d.strftime('%Y-%m-%d')
|
| 757 |
+
|
| 758 |
+
yesterday = today - timedelta(days=1)
|
| 759 |
+
start_of_this_week = today - timedelta(days=today.weekday())
|
| 760 |
+
end_of_this_week = start_of_this_week + timedelta(days=6)
|
| 761 |
+
start_of_last_week = start_of_this_week - timedelta(days=7)
|
| 762 |
+
end_of_last_week = start_of_last_week + timedelta(days=6)
|
| 763 |
+
|
| 764 |
+
last_monday = start_of_this_week - timedelta(days=7)
|
| 765 |
+
|
| 766 |
+
context = [
|
| 767 |
+
f"Here are some pre-calculated dates to help you understand the user's request:",
|
| 768 |
+
f"- Today is: {fmt(today)}",
|
| 769 |
+
f"- Yesterday was: {fmt(yesterday)}",
|
| 770 |
+
f"- The start of this week was: {fmt(start_of_this_week)}",
|
| 771 |
+
f"- The start of last week was: {fmt(start_of_last_week)}",
|
| 772 |
+
f"- Last Monday was on: {fmt(last_monday)}",
|
| 773 |
+
]
|
| 774 |
+
|
| 775 |
+
return "\n".join(context)
|
| 776 |
+
|
| 777 |
+
def read_datalake(user_phone: str, query: str) -> str:
|
| 778 |
+
"""
|
| 779 |
+
Implements the final Unified Strategy for robust, intelligent data analysis.
|
| 780 |
+
"""
|
| 781 |
+
try:
|
| 782 |
+
all_dfs_with_names = _fetch_all_collections_as_dfs(user_phone)
|
| 783 |
+
if not all_dfs_with_names:
|
| 784 |
+
return "You have no data recorded yet. Please add some transactions first."
|
| 785 |
+
|
| 786 |
+
query_lower = query.lower()
|
| 787 |
+
engine = ReportEngine(all_dfs_with_names, query)
|
| 788 |
+
|
| 789 |
+
simple_lookup_map = {
|
| 790 |
+
"inventory": ["stock", "inventory", "in stock", "what do i have"],
|
| 791 |
+
"assets": ["asset", "assets", "my assets"],
|
| 792 |
+
"liabilities": ["liabilities", "i owe", "creditor", "my debts"],
|
| 793 |
+
"sales": ["show my sales", "list sales"],
|
| 794 |
+
"expenses": ["show my expenses", "list expenses"]
|
| 795 |
+
}
|
| 796 |
+
for df_name, keywords in simple_lookup_map.items():
|
| 797 |
+
if any(keyword in query_lower for keyword in keywords):
|
| 798 |
+
logger.info(f"Handling '{query}' with Simple Lookup Path for '{df_name}'.")
|
| 799 |
+
target_df_tuple = next((item for item in all_dfs_with_names if item[0] == df_name), None)
|
| 800 |
+
if target_df_tuple is not None and not target_df_tuple[1].empty:
|
| 801 |
+
return render_df_as_image(target_df_tuple[1])
|
| 802 |
+
return f"You don't have any {df_name} recorded yet."
|
| 803 |
+
|
| 804 |
+
item_report_match = re.search(r"(?:report on|how did) ([\w\s]+)", query_lower)
|
| 805 |
+
if item_report_match:
|
| 806 |
+
item_name = item_report_match.group(1).strip()
|
| 807 |
+
logger.info(f"Handling '{query}' with the Item Report Path for item: '{item_name}'.")
|
| 808 |
+
report_json = engine.generate_report(subject_item=item_name)
|
| 809 |
+
# ... synthesis prompt and response ...
|
| 810 |
+
|
| 811 |
+
predictive_keywords = ["expect", "forecast", "predict"]
|
| 812 |
+
historical_report_keywords = ["report", "summary", "performance", "how did i do", "overview", "month", "year", "week", "today", "profit", "best day", "busiest day", "sales by day"]
|
| 813 |
+
|
| 814 |
+
if any(keyword in query_lower for keyword in predictive_keywords):
|
| 815 |
+
# ... forecast logic ...
|
| 816 |
+
pass
|
| 817 |
+
elif any(keyword in query_lower for keyword in historical_report_keywords):
|
| 818 |
+
logger.info(f"Handling '{query}' with the General Reporting Path.")
|
| 819 |
+
report_json = engine.generate_report()
|
| 820 |
+
# ... synthesis prompt and response ...
|
| 821 |
+
|
| 822 |
+
else:
|
| 823 |
+
logger.info(f"Handling '{query}' with the Fortified PandasAI Path.")
|
| 824 |
+
# ... pandasai logic ...
|
| 825 |
+
pass
|
| 826 |
+
|
| 827 |
+
# Placeholder for actual response generation logic
|
| 828 |
+
return "Generated Report/Analysis"
|
| 829 |
+
|
| 830 |
+
except (NoCodeFoundError, MaliciousQueryError) as e:
|
| 831 |
+
logger.error(f"PandasAI failed for query '{query}': {e}")
|
| 832 |
+
return f"Unfortunately, I was not able to answer your question: {e}"
|
| 833 |
+
except Exception as e:
|
| 834 |
+
logger.error(f"Data query failed for user {user_phone}, query '{query}': {e}", exc_info=True)
|
| 835 |
+
return "Sorry, I encountered an error while analyzing your data."
|
| 836 |
+
|
| 837 |
def _find_document_by_details(user_phone: str, collection_name: str, details: Dict) -> Optional[Any]:
|
| 838 |
col_ref = db.collection("users").document(user_phone).collection(collection_name)
|
| 839 |
if 'transaction_id' in details and details['transaction_id']:
|