Em4e commited on
Commit
64ee1b1
·
verified ·
1 Parent(s): 8c118e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -79
app.py CHANGED
@@ -18,42 +18,27 @@ class DataLoader:
18
  self.sample_file_url = sample_file_url
19
 
20
  @st.cache_data
21
- def load_csv(self, uploaded_file_obj: st.runtime.uploaded_file_manager.UploadedFile | None) -> pd.DataFrame | None:
22
- # Changed back to 'self' from '_self' as per the initial correction, but the error
23
- # message is explicitly asking for '_self'. Let's follow the error's advice.
24
- # The initial attempt to correct by changing `_self` back to `self` was incorrect
25
- # for the specific error you're getting.
26
- # Streamlit's error message is authoritative here.
27
- # So, we revert to what the error message advises: `_self` for cached methods.
28
- """
29
- Loads the GSC data from an uploaded CSV or a sample URL,
30
- normalizes column names, and ensures a 'cpc' column exists.
31
- Args:
32
- _self: The instance of the DataLoader class (ignored by Streamlit caching).
33
- uploaded_file_obj (streamlit.runtime.uploaded_file_manager.UploadedFile): The file object
34
- uploaded by the user, or None.
35
- Returns:
36
- pd.DataFrame: The loaded and processed DataFrame, or None if an error occurs.
37
- """
38
- try:
39
- # We must use `self.sample_file_url` within the method
40
- # because `_self` is a positional argument that Streamlit special-handles
41
- # for caching, but the actual instance is still `self`.
42
- # This is a bit counter-intuitive but necessary for Streamlit's caching with methods.
43
- if uploaded_file_obj:
44
- df = pd.read_csv(uploaded_file_obj)
45
- else:
46
- df = pd.read_csv(self.sample_file_url) # Use self here, not _self
47
- except Exception as e:
48
- st.error(f"Error loading file: {e}")
49
- return None
50
 
51
- df.columns = [col.lower() for col in df.columns]
 
 
 
52
 
53
- if "cpc" not in df.columns:
54
- st.warning("No `cpc` column found—simulating CPC values between 0.50–3.00 USD (for testing purposes only!)")
55
- df["cpc"] = np.round(np.random.uniform(0.5, 3.0, size=len(df)), 2)
56
- return df
57
 
58
  # --- 2. Core Calculation Logic (Single Responsibility Principle) ---
59
  class SeoCalculator:
@@ -91,52 +76,63 @@ class SeoCalculator:
91
  return df.rename(columns={found_columns[k]: k for k in found_columns})
92
 
93
  @st.cache_data
94
- def calculate_metrics(
95
- self, # Changed to self for the instance reference
96
- df: pd.DataFrame,
97
- target_position: float,
98
- conversion_rate: float,
99
- close_rate: float,
100
- mrr_per_customer: int,
101
- seo_cost: int,
102
- add_spend: int,
103
- ) -> tuple[dict, pd.DataFrame] | tuple[None, pd.DataFrame]:
104
- # Again, the error specifically asks for `_self` for cached methods.
105
- # Let's adhere to Streamlit's recommendation for cached methods to prevent hashing `self`.
106
- # So, we change it back to `_self` for `calculate_metrics` as well.
107
- """
108
- Performs core calculations for SEO forecasting based on GSC data and user inputs.
109
- Returns:
110
- tuple: A dictionary of calculated metrics and a DataFrame with detailed results.
111
- Returns (None, pd.DataFrame()) if required columns are missing.
112
- """
113
- # Within the method, you continue to use `self` to access instance attributes.
114
- df_processed = self._validate_and_rename_columns(df.copy())
115
- if df_processed is None:
116
- return None, pd.DataFrame()
117
-
118
- df_processed["current_ctr"] = df_processed["position"].apply(self._get_ctr)
119
- target_ctr_value = self._get_ctr(target_position)
120
- df_processed["target_ctr"] = target_ctr_value
121
-
122
- df_processed["current_clicks"] = df_processed["impressions"] * df_processed["current_ctr"]
123
- df_processed["projected_clicks"] = df_processed["impressions"] * df_processed["target_ctr"]
124
- df_processed["incremental_clicks"] = df_processed["projected_clicks"] - df_processed["current_clicks"]
125
- df_processed["avoided_paid_spend"] = df_processed["incremental_clicks"] * df_processed["cpc"]
126
-
127
- # --- Financial calculations ---
128
- total_avoided_paid_spend = df_processed["avoided_paid_spend"].sum()
129
- net_savings_vs_paid = total_avoided_paid_spend - seo_cost
130
- total_incremental_conversions = df_processed["incremental_clicks"].sum() * (
131
- conversion_rate / 100
132
- )
133
- total_incremental_customers = total_incremental_conversions * (close_rate / 100)
134
- incremental_mrr = total_incremental_customers * mrr_per_customer
135
-
136
- if seo_cost > 0:
137
- seo_roi = (incremental_mrr - seo_cost) / seo_cost
138
  else:
139
- seo_roi = np.inf
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  # Categorize impact for each query
142
  def categorize_impact(row):
 
18
  self.sample_file_url = sample_file_url
19
 
20
  @st.cache_data
21
+ def load_csv(_self, uploaded_file_obj: st.runtime.uploaded_file_manager.UploadedFile | None) -> pd.DataFrame | None:
22
+ """
23
+ Loads the GSC data from an uploaded CSV or a sample URL,
24
+ normalizes column names, and ensures a 'cpc' column exists.
25
+ """
26
+ try:
27
+ if uploaded_file_obj:
28
+ df = pd.read_csv(uploaded_file_obj)
29
+ else:
30
+ df = pd.read_csv(_self.sample_file_url) # use _self here
31
+ except Exception as e:
32
+ st.error(f"Error loading file: {e}")
33
+ return None
34
+
35
+ df.columns = [col.strip().lower() for col in df.columns]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ if "cpc" not in df.columns:
38
+ st.warning("No `cpc` column found—simulating CPC values between 0.50–3.00 USD (for testing purposes only!)")
39
+ df["cpc"] = np.round(np.random.uniform(0.5, 3.0, size=len(df)), 2)
40
+ return df
41
 
 
 
 
 
42
 
43
  # --- 2. Core Calculation Logic (Single Responsibility Principle) ---
44
  class SeoCalculator:
 
76
  return df.rename(columns={found_columns[k]: k for k in found_columns})
77
 
78
  @st.cache_data
79
+ def calculate_metrics(
80
+ _self,
81
+ df: pd.DataFrame,
82
+ target_position: float,
83
+ conversion_rate: float,
84
+ close_rate: float,
85
+ mrr_per_customer: int,
86
+ seo_cost: int,
87
+ add_spend: int,
88
+ ) -> tuple[dict, pd.DataFrame] | tuple[None, pd.DataFrame]:
89
+ """
90
+ Performs core calculations for SEO forecasting based on GSC data and user inputs.
91
+ """
92
+ df_processed = _self._validate_and_rename_columns(df.copy())
93
+ if df_processed is None:
94
+ return None, pd.DataFrame()
95
+
96
+ df_processed["current_ctr"] = df_processed["position"].apply(_self._get_ctr)
97
+ target_ctr_value = _self._get_ctr(target_position)
98
+ df_processed["target_ctr"] = target_ctr_value
99
+
100
+ df_processed["current_clicks"] = df_processed["impressions"] * df_processed["current_ctr"]
101
+ df_processed["projected_clicks"] = df_processed["impressions"] * df_processed["target_ctr"]
102
+ df_processed["incremental_clicks"] = df_processed["projected_clicks"] - df_processed["current_clicks"]
103
+ df_processed["avoided_paid_spend"] = df_processed["incremental_clicks"] * df_processed["cpc"]
104
+
105
+ # Financial logic
106
+ total_avoided_paid_spend = df_processed["avoided_paid_spend"].sum()
107
+ net_savings_vs_paid = total_avoided_paid_spend - seo_cost
108
+ total_incremental_conversions = df_processed["incremental_clicks"].sum() * (conversion_rate / 100)
109
+ total_incremental_customers = total_incremental_conversions * (close_rate / 100)
110
+ incremental_mrr = total_incremental_customers * mrr_per_customer
111
+
112
+ if seo_cost > 0:
113
+ seo_roi = (incremental_mrr - seo_cost) / seo_cost
114
+ else:
115
+ seo_roi = np.inf
116
+
117
+ def categorize_impact(row):
118
+ if row["position"] > target_position:
119
+ return "🚀 Improvement"
120
+ elif row["position"] <= target_position and row["incremental_clicks"] > 0:
121
+ return "✅ Maintain & Grow"
 
122
  else:
123
+ return "🎯 Reached Target"
124
+ df_processed["impact_category"] = df_processed.apply(categorize_impact, axis=1)
125
+
126
+ metrics = {
127
+ "total_avoided_paid_spend": total_avoided_paid_spend,
128
+ "net_savings_vs_paid": net_savings_vs_paid,
129
+ "total_incremental_conversions": total_incremental_conversions,
130
+ "total_incremental_customers": total_incremental_customers,
131
+ "incremental_mrr": incremental_mrr,
132
+ "seo_roi": seo_roi,
133
+ }
134
+ return metrics, df_processed
135
+
136
 
137
  # Categorize impact for each query
138
  def categorize_impact(row):