Spaces:

Em4e
/

seo-b2b-saas-forecasting-tool

Sleeping

App Files Files Community

Em4e commited on Jun 8, 2025

Commit

64ee1b1

verified ·

1 Parent(s): 8c118e9

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -79

app.py CHANGED Viewed

@@ -18,42 +18,27 @@ class DataLoader:
         self.sample_file_url = sample_file_url
     @st.cache_data
-    def load_csv(self, uploaded_file_obj: st.runtime.uploaded_file_manager.UploadedFile | None) -> pd.DataFrame | None:
-    # Changed back to 'self' from '_self' as per the initial correction, but the error
-    # message is explicitly asking for '_self'. Let's follow the error's advice.
-    # The initial attempt to correct by changing `_self` back to `self` was incorrect
-    # for the specific error you're getting.
-    # Streamlit's error message is authoritative here.
-    # So, we revert to what the error message advises: `_self` for cached methods.
-        """
-        Loads the GSC data from an uploaded CSV or a sample URL,
-        normalizes column names, and ensures a 'cpc' column exists.
-        Args:
-            _self: The instance of the DataLoader class (ignored by Streamlit caching).
-            uploaded_file_obj (streamlit.runtime.uploaded_file_manager.UploadedFile): The file object
-                                                                             uploaded by the user, or None.
-        Returns:
-            pd.DataFrame: The loaded and processed DataFrame, or None if an error occurs.
-        """
-        try:
-            # We must use `self.sample_file_url` within the method
-            # because `_self` is a positional argument that Streamlit special-handles
-            # for caching, but the actual instance is still `self`.
-            # This is a bit counter-intuitive but necessary for Streamlit's caching with methods.
-            if uploaded_file_obj:
-                df = pd.read_csv(uploaded_file_obj)
-            else:
-                df = pd.read_csv(self.sample_file_url) # Use self here, not _self
-        except Exception as e:
-            st.error(f"Error loading file: {e}")
-            return None
-        df.columns = [col.lower() for col in df.columns]
-        if "cpc" not in df.columns:
-            st.warning("No `cpc` column found—simulating CPC values between 0.50–3.00 USD (for testing purposes only!)")
-            df["cpc"] = np.round(np.random.uniform(0.5, 3.0, size=len(df)), 2)
-        return df
 # --- 2. Core Calculation Logic (Single Responsibility Principle) ---
 class SeoCalculator:
@@ -91,52 +76,63 @@ class SeoCalculator:
         return df.rename(columns={found_columns[k]: k for k in found_columns})
     @st.cache_data
-    def calculate_metrics(
-        self, # Changed to self for the instance reference
-        df: pd.DataFrame,
-        target_position: float,
-        conversion_rate: float,
-        close_rate: float,
-        mrr_per_customer: int,
-        seo_cost: int,
-        add_spend: int,
-    ) -> tuple[dict, pd.DataFrame] | tuple[None, pd.DataFrame]:
-    # Again, the error specifically asks for `_self` for cached methods.
-    # Let's adhere to Streamlit's recommendation for cached methods to prevent hashing `self`.
-    # So, we change it back to `_self` for `calculate_metrics` as well.
-        """
-        Performs core calculations for SEO forecasting based on GSC data and user inputs.
-        Returns:
-            tuple: A dictionary of calculated metrics and a DataFrame with detailed results.
-                   Returns (None, pd.DataFrame()) if required columns are missing.
-        """
-        # Within the method, you continue to use `self` to access instance attributes.
-        df_processed = self._validate_and_rename_columns(df.copy())
-        if df_processed is None:
-            return None, pd.DataFrame()
-        df_processed["current_ctr"] = df_processed["position"].apply(self._get_ctr)
-        target_ctr_value = self._get_ctr(target_position)
-        df_processed["target_ctr"] = target_ctr_value
-        df_processed["current_clicks"] = df_processed["impressions"] * df_processed["current_ctr"]
-        df_processed["projected_clicks"] = df_processed["impressions"] * df_processed["target_ctr"]
-        df_processed["incremental_clicks"] = df_processed["projected_clicks"] - df_processed["current_clicks"]
-        df_processed["avoided_paid_spend"] = df_processed["incremental_clicks"] * df_processed["cpc"]
-        # --- Financial calculations ---
-        total_avoided_paid_spend = df_processed["avoided_paid_spend"].sum()
-        net_savings_vs_paid = total_avoided_paid_spend - seo_cost
-        total_incremental_conversions = df_processed["incremental_clicks"].sum() * (
-            conversion_rate / 100
-        )
-        total_incremental_customers = total_incremental_conversions * (close_rate / 100)
-        incremental_mrr = total_incremental_customers * mrr_per_customer
-        if seo_cost > 0:
-            seo_roi = (incremental_mrr - seo_cost) / seo_cost
         else:
-            seo_roi = np.inf
         # Categorize impact for each query
         def categorize_impact(row):

         self.sample_file_url = sample_file_url
     @st.cache_data
+def load_csv(_self, uploaded_file_obj: st.runtime.uploaded_file_manager.UploadedFile | None) -> pd.DataFrame | None:
+    """
+    Loads the GSC data from an uploaded CSV or a sample URL,
+    normalizes column names, and ensures a 'cpc' column exists.
+    """
+    try:
+        if uploaded_file_obj:
+            df = pd.read_csv(uploaded_file_obj)
+        else:
+            df = pd.read_csv(_self.sample_file_url)  # use _self here
+    except Exception as e:
+        st.error(f"Error loading file: {e}")
+        return None
+    df.columns = [col.strip().lower() for col in df.columns]
+    if "cpc" not in df.columns:
+        st.warning("No `cpc` column found—simulating CPC values between 0.50–3.00 USD (for testing purposes only!)")
+        df["cpc"] = np.round(np.random.uniform(0.5, 3.0, size=len(df)), 2)
+    return df
 # --- 2. Core Calculation Logic (Single Responsibility Principle) ---
 class SeoCalculator:
         return df.rename(columns={found_columns[k]: k for k in found_columns})
     @st.cache_data
+def calculate_metrics(
+    _self,
+    df: pd.DataFrame,
+    target_position: float,
+    conversion_rate: float,
+    close_rate: float,
+    mrr_per_customer: int,
+    seo_cost: int,
+    add_spend: int,
+) -> tuple[dict, pd.DataFrame] | tuple[None, pd.DataFrame]:
+    """
+    Performs core calculations for SEO forecasting based on GSC data and user inputs.
+    """
+    df_processed = _self._validate_and_rename_columns(df.copy())
+    if df_processed is None:
+        return None, pd.DataFrame()
+    df_processed["current_ctr"] = df_processed["position"].apply(_self._get_ctr)
+    target_ctr_value = _self._get_ctr(target_position)
+    df_processed["target_ctr"] = target_ctr_value
+    df_processed["current_clicks"] = df_processed["impressions"] * df_processed["current_ctr"]
+    df_processed["projected_clicks"] = df_processed["impressions"] * df_processed["target_ctr"]
+    df_processed["incremental_clicks"] = df_processed["projected_clicks"] - df_processed["current_clicks"]
+    df_processed["avoided_paid_spend"] = df_processed["incremental_clicks"] * df_processed["cpc"]
+    # Financial logic
+    total_avoided_paid_spend = df_processed["avoided_paid_spend"].sum()
+    net_savings_vs_paid = total_avoided_paid_spend - seo_cost
+    total_incremental_conversions = df_processed["incremental_clicks"].sum() * (conversion_rate / 100)
+    total_incremental_customers = total_incremental_conversions * (close_rate / 100)
+    incremental_mrr = total_incremental_customers * mrr_per_customer
+    if seo_cost > 0:
+        seo_roi = (incremental_mrr - seo_cost) / seo_cost
+    else:
+        seo_roi = np.inf
+    def categorize_impact(row):
+        if row["position"] > target_position:
+            return "🚀 Improvement"
+        elif row["position"] <= target_position and row["incremental_clicks"] > 0:
+            return "✅ Maintain & Grow"
         else:
+            return "🎯 Reached Target"
+    df_processed["impact_category"] = df_processed.apply(categorize_impact, axis=1)
+    metrics = {
+        "total_avoided_paid_spend": total_avoided_paid_spend,
+        "net_savings_vs_paid": net_savings_vs_paid,
+        "total_incremental_conversions": total_incremental_conversions,
+        "total_incremental_customers": total_incremental_customers,
+        "incremental_mrr": incremental_mrr,
+        "seo_roi": seo_roi,
+    }
+    return metrics, df_processed
         # Categorize impact for each query
         def categorize_impact(row):