import pandas as pd import numpy as np # Fixed non-click columns NON_CLICK_COLS = {"date", "month", "year", "blogs", "h1", "meta title", "meta description", "total clicks", "total_clicks"} TIER_CONFIG = { "takedown_zero_max": 0, "takedown_low_max": 5, "takedown_low_monthly": 2, "monitor_min": 6, "monitor_max": 20, "performing_min": 21, "performing_max": 100, "strong_min": 101, "strong_max": 500, "top_min": 501, } def detect_columns(df: pd.DataFrame) -> dict: """ Auto-detect URL, title, month, and total_clicks columns from any CSV with the expected structure. Returns a dict with keys: url_col, title_col, month_cols, total_col """ cols_lower = {c.lower(): c for c in df.columns} # URL column — 'Blogs' or any col containing http values url_col = cols_lower.get("blogs") or cols_lower.get("url") or cols_lower.get("urls") if not url_col: for col in df.columns: sample = df[col].dropna().astype(str).head(5) if sample.str.startswith("http").any(): url_col = col break # Title column — 'H1' or 'Title' title_col = cols_lower.get("h1") or cols_lower.get("title") or cols_lower.get("meta title") # Total clicks column total_col = cols_lower.get("total clicks") or cols_lower.get("total_clicks") # Month columns — numeric columns not in the known set month_cols = [] for col in df.columns: if col.lower() in NON_CLICK_COLS: continue if pd.api.types.is_numeric_dtype(df[col]) or _is_mostly_numeric(df[col]): month_cols.append(col) # Remove total_col from month_cols if accidentally included if total_col and total_col in month_cols: month_cols.remove(total_col) return { "url_col": url_col, "title_col": title_col, "month_cols": month_cols, "total_col": total_col, } def _is_mostly_numeric(series: pd.Series, threshold=0.7) -> bool: converted = pd.to_numeric(series, errors="coerce") valid = converted.notna().sum() return valid / max(len(series), 1) >= threshold def clean_and_tier(df: pd.DataFrame, col_map: dict) -> pd.DataFrame: """ Build a clean analysis dataframe with tier, trend, and slug columns. """ url_col = col_map["url_col"] title_col = col_map["title_col"] month_cols = col_map["month_cols"] total_col = col_map["total_col"] out = pd.DataFrame() out["url"] = df[url_col].astype(str).str.strip() out["title"] = df[title_col].astype(str).str.strip() if title_col else out["url"] out["slug"] = out["url"].apply( lambda x: x.split("/blog/")[-1] if "/blog/" in x else x.rstrip("/").split("/")[-1] ) for col in month_cols: out[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int) if total_col: out["total_clicks"] = pd.to_numeric(df[total_col], errors="coerce").fillna(0).astype(int) else: out["total_clicks"] = out[month_cols].sum(axis=1) # Trend out["trend"] = out.apply(lambda r: _get_trend(r, month_cols), axis=1) # Tier out["tier"] = out.apply(lambda r: _assign_tier(r, month_cols), axis=1) # Recommended action out["action"] = out.apply(lambda r: _get_action(r["tier"], r["total_clicks"]), axis=1) return out def _get_trend(row, month_cols): if len(month_cols) < 2: return "➡️ Stable" vals = [row[c] for c in month_cols] if vals[-1] > vals[0]: return "📈 Growing" elif vals[-1] < vals[0]: return "📉 Declining" return "➡️ Stable" def _assign_tier(row, month_cols): c = row["total_clicks"] max_m = max([row[m] for m in month_cols]) if month_cols else 0 if c == 0: return "TAKEDOWN_ZERO" elif c <= TIER_CONFIG["takedown_low_max"] and max_m <= TIER_CONFIG["takedown_low_monthly"]: return "TAKEDOWN_LOW" elif TIER_CONFIG["monitor_min"] <= c <= TIER_CONFIG["monitor_max"]: return "MONITOR" elif TIER_CONFIG["performing_min"] <= c <= TIER_CONFIG["performing_max"]: return "PERFORMING" elif TIER_CONFIG["strong_min"] <= c <= TIER_CONFIG["strong_max"]: return "STRONG" elif c >= TIER_CONFIG["top_min"]: return "TOP" return "PERFORMING" def _get_action(tier, clicks): actions = { "TAKEDOWN_ZERO": "Remove immediately. Zero organic traction across all months. Set up 410 or 301 redirect.", "TAKEDOWN_LOW": "Merge into a stronger related article or remove. Implement 301 redirect.", "MONITOR": "Optimize meta title, description & keywords. Review in 90 days. Merge if no improvement.", "PERFORMING": "Refresh content, strengthen internal links, add FAQ schema. Push for top 50 clicks.", "STRONG": "Update statistics & examples. Add lead gen CTA. Build backlinks to reach 500+ clicks.", "TOP": "Priority asset. Add lead magnets, improve CTAs, build backlinks. Protect rankings.", } if tier == "MONITOR" and clicks >= 15: return "Good potential. Optimize meta description and add 2–3 internal links to improve CTR." return actions.get(tier, "Review manually.") def get_tier_summary(df: pd.DataFrame) -> dict: counts = df["tier"].value_counts().to_dict() return { "TAKEDOWN_ZERO": counts.get("TAKEDOWN_ZERO", 0), "TAKEDOWN_LOW": counts.get("TAKEDOWN_LOW", 0), "MONITOR": counts.get("MONITOR", 0), "PERFORMING": counts.get("PERFORMING", 0), "STRONG": counts.get("STRONG", 0), "TOP": counts.get("TOP", 0), } TIER_META = { "TAKEDOWN_ZERO": {"label": "🔴 Take Down (0 clicks)", "color": "#FF4B4B"}, "TAKEDOWN_LOW": {"label": "🟠 Take Down (1–5 clicks)", "color": "#FF8C00"}, "MONITOR": {"label": "🟡 Monitor (6–20 clicks)", "color": "#FFC300"}, "PERFORMING": {"label": "✅ Performing (21–100 clicks)", "color": "#2ECC71"}, "STRONG": {"label": "💪 Strong (101–500 clicks)", "color": "#1ABC9C"}, "TOP": {"label": "🏆 Top Performers (500+ clicks)","color": "#9B59B6"}, }