Spaces:
Sleeping
Sleeping
File size: 6,332 Bytes
6d28094 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | import pandas as pd
import numpy as np
# Fixed non-click columns
NON_CLICK_COLS = {"date", "month", "year", "blogs", "h1", "meta title",
"meta description", "total clicks", "total_clicks"}
TIER_CONFIG = {
"takedown_zero_max": 0,
"takedown_low_max": 5,
"takedown_low_monthly": 2,
"monitor_min": 6,
"monitor_max": 20,
"performing_min": 21,
"performing_max": 100,
"strong_min": 101,
"strong_max": 500,
"top_min": 501,
}
def detect_columns(df: pd.DataFrame) -> dict:
"""
Auto-detect URL, title, month, and total_clicks columns
from any CSV with the expected structure.
Returns a dict with keys: url_col, title_col, month_cols, total_col
"""
cols_lower = {c.lower(): c for c in df.columns}
# URL column β 'Blogs' or any col containing http values
url_col = cols_lower.get("blogs") or cols_lower.get("url") or cols_lower.get("urls")
if not url_col:
for col in df.columns:
sample = df[col].dropna().astype(str).head(5)
if sample.str.startswith("http").any():
url_col = col
break
# Title column β 'H1' or 'Title'
title_col = cols_lower.get("h1") or cols_lower.get("title") or cols_lower.get("meta title")
# Total clicks column
total_col = cols_lower.get("total clicks") or cols_lower.get("total_clicks")
# Month columns β numeric columns not in the known set
month_cols = []
for col in df.columns:
if col.lower() in NON_CLICK_COLS:
continue
if pd.api.types.is_numeric_dtype(df[col]) or _is_mostly_numeric(df[col]):
month_cols.append(col)
# Remove total_col from month_cols if accidentally included
if total_col and total_col in month_cols:
month_cols.remove(total_col)
return {
"url_col": url_col,
"title_col": title_col,
"month_cols": month_cols,
"total_col": total_col,
}
def _is_mostly_numeric(series: pd.Series, threshold=0.7) -> bool:
converted = pd.to_numeric(series, errors="coerce")
valid = converted.notna().sum()
return valid / max(len(series), 1) >= threshold
def clean_and_tier(df: pd.DataFrame, col_map: dict) -> pd.DataFrame:
"""
Build a clean analysis dataframe with tier, trend, and slug columns.
"""
url_col = col_map["url_col"]
title_col = col_map["title_col"]
month_cols = col_map["month_cols"]
total_col = col_map["total_col"]
out = pd.DataFrame()
out["url"] = df[url_col].astype(str).str.strip()
out["title"] = df[title_col].astype(str).str.strip() if title_col else out["url"]
out["slug"] = out["url"].apply(
lambda x: x.split("/blog/")[-1] if "/blog/" in x else x.rstrip("/").split("/")[-1]
)
for col in month_cols:
out[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int)
if total_col:
out["total_clicks"] = pd.to_numeric(df[total_col], errors="coerce").fillna(0).astype(int)
else:
out["total_clicks"] = out[month_cols].sum(axis=1)
# Trend
out["trend"] = out.apply(lambda r: _get_trend(r, month_cols), axis=1)
# Tier
out["tier"] = out.apply(lambda r: _assign_tier(r, month_cols), axis=1)
# Recommended action
out["action"] = out.apply(lambda r: _get_action(r["tier"], r["total_clicks"]), axis=1)
return out
def _get_trend(row, month_cols):
if len(month_cols) < 2:
return "β‘οΈ Stable"
vals = [row[c] for c in month_cols]
if vals[-1] > vals[0]:
return "π Growing"
elif vals[-1] < vals[0]:
return "π Declining"
return "β‘οΈ Stable"
def _assign_tier(row, month_cols):
c = row["total_clicks"]
max_m = max([row[m] for m in month_cols]) if month_cols else 0
if c == 0:
return "TAKEDOWN_ZERO"
elif c <= TIER_CONFIG["takedown_low_max"] and max_m <= TIER_CONFIG["takedown_low_monthly"]:
return "TAKEDOWN_LOW"
elif TIER_CONFIG["monitor_min"] <= c <= TIER_CONFIG["monitor_max"]:
return "MONITOR"
elif TIER_CONFIG["performing_min"] <= c <= TIER_CONFIG["performing_max"]:
return "PERFORMING"
elif TIER_CONFIG["strong_min"] <= c <= TIER_CONFIG["strong_max"]:
return "STRONG"
elif c >= TIER_CONFIG["top_min"]:
return "TOP"
return "PERFORMING"
def _get_action(tier, clicks):
actions = {
"TAKEDOWN_ZERO": "Remove immediately. Zero organic traction across all months. Set up 410 or 301 redirect.",
"TAKEDOWN_LOW": "Merge into a stronger related article or remove. Implement 301 redirect.",
"MONITOR": "Optimize meta title, description & keywords. Review in 90 days. Merge if no improvement.",
"PERFORMING": "Refresh content, strengthen internal links, add FAQ schema. Push for top 50 clicks.",
"STRONG": "Update statistics & examples. Add lead gen CTA. Build backlinks to reach 500+ clicks.",
"TOP": "Priority asset. Add lead magnets, improve CTAs, build backlinks. Protect rankings.",
}
if tier == "MONITOR" and clicks >= 15:
return "Good potential. Optimize meta description and add 2β3 internal links to improve CTR."
return actions.get(tier, "Review manually.")
def get_tier_summary(df: pd.DataFrame) -> dict:
counts = df["tier"].value_counts().to_dict()
return {
"TAKEDOWN_ZERO": counts.get("TAKEDOWN_ZERO", 0),
"TAKEDOWN_LOW": counts.get("TAKEDOWN_LOW", 0),
"MONITOR": counts.get("MONITOR", 0),
"PERFORMING": counts.get("PERFORMING", 0),
"STRONG": counts.get("STRONG", 0),
"TOP": counts.get("TOP", 0),
}
TIER_META = {
"TAKEDOWN_ZERO": {"label": "π΄ Take Down (0 clicks)", "color": "#FF4B4B"},
"TAKEDOWN_LOW": {"label": "π Take Down (1β5 clicks)", "color": "#FF8C00"},
"MONITOR": {"label": "π‘ Monitor (6β20 clicks)", "color": "#FFC300"},
"PERFORMING": {"label": "β
Performing (21β100 clicks)", "color": "#2ECC71"},
"STRONG": {"label": "πͺ Strong (101β500 clicks)", "color": "#1ABC9C"},
"TOP": {"label": "π Top Performers (500+ clicks)","color": "#9B59B6"},
}
|