File size: 6,332 Bytes
6d28094
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import pandas as pd
import numpy as np

# Fixed non-click columns
NON_CLICK_COLS = {"date", "month", "year", "blogs", "h1", "meta title",
                  "meta description", "total clicks", "total_clicks"}

TIER_CONFIG = {
    "takedown_zero_max":    0,
    "takedown_low_max":     5,
    "takedown_low_monthly": 2,
    "monitor_min":          6,
    "monitor_max":          20,
    "performing_min":       21,
    "performing_max":       100,
    "strong_min":           101,
    "strong_max":           500,
    "top_min":              501,
}


def detect_columns(df: pd.DataFrame) -> dict:
    """
    Auto-detect URL, title, month, and total_clicks columns
    from any CSV with the expected structure.
    Returns a dict with keys: url_col, title_col, month_cols, total_col
    """
    cols_lower = {c.lower(): c for c in df.columns}

    # URL column β€” 'Blogs' or any col containing http values
    url_col = cols_lower.get("blogs") or cols_lower.get("url") or cols_lower.get("urls")
    if not url_col:
        for col in df.columns:
            sample = df[col].dropna().astype(str).head(5)
            if sample.str.startswith("http").any():
                url_col = col
                break

    # Title column β€” 'H1' or 'Title'
    title_col = cols_lower.get("h1") or cols_lower.get("title") or cols_lower.get("meta title")

    # Total clicks column
    total_col = cols_lower.get("total clicks") or cols_lower.get("total_clicks")

    # Month columns β€” numeric columns not in the known set
    month_cols = []
    for col in df.columns:
        if col.lower() in NON_CLICK_COLS:
            continue
        if pd.api.types.is_numeric_dtype(df[col]) or _is_mostly_numeric(df[col]):
            month_cols.append(col)

    # Remove total_col from month_cols if accidentally included
    if total_col and total_col in month_cols:
        month_cols.remove(total_col)

    return {
        "url_col":    url_col,
        "title_col":  title_col,
        "month_cols": month_cols,
        "total_col":  total_col,
    }


def _is_mostly_numeric(series: pd.Series, threshold=0.7) -> bool:
    converted = pd.to_numeric(series, errors="coerce")
    valid = converted.notna().sum()
    return valid / max(len(series), 1) >= threshold


def clean_and_tier(df: pd.DataFrame, col_map: dict) -> pd.DataFrame:
    """
    Build a clean analysis dataframe with tier, trend, and slug columns.
    """
    url_col   = col_map["url_col"]
    title_col = col_map["title_col"]
    month_cols = col_map["month_cols"]
    total_col  = col_map["total_col"]

    out = pd.DataFrame()
    out["url"]   = df[url_col].astype(str).str.strip()
    out["title"] = df[title_col].astype(str).str.strip() if title_col else out["url"]
    out["slug"]  = out["url"].apply(
        lambda x: x.split("/blog/")[-1] if "/blog/" in x else x.rstrip("/").split("/")[-1]
    )

    for col in month_cols:
        out[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int)

    if total_col:
        out["total_clicks"] = pd.to_numeric(df[total_col], errors="coerce").fillna(0).astype(int)
    else:
        out["total_clicks"] = out[month_cols].sum(axis=1)

    # Trend
    out["trend"] = out.apply(lambda r: _get_trend(r, month_cols), axis=1)

    # Tier
    out["tier"] = out.apply(lambda r: _assign_tier(r, month_cols), axis=1)

    # Recommended action
    out["action"] = out.apply(lambda r: _get_action(r["tier"], r["total_clicks"]), axis=1)

    return out


def _get_trend(row, month_cols):
    if len(month_cols) < 2:
        return "➑️ Stable"
    vals = [row[c] for c in month_cols]
    if vals[-1] > vals[0]:
        return "πŸ“ˆ Growing"
    elif vals[-1] < vals[0]:
        return "πŸ“‰ Declining"
    return "➑️ Stable"


def _assign_tier(row, month_cols):
    c = row["total_clicks"]
    max_m = max([row[m] for m in month_cols]) if month_cols else 0
    if c == 0:
        return "TAKEDOWN_ZERO"
    elif c <= TIER_CONFIG["takedown_low_max"] and max_m <= TIER_CONFIG["takedown_low_monthly"]:
        return "TAKEDOWN_LOW"
    elif TIER_CONFIG["monitor_min"] <= c <= TIER_CONFIG["monitor_max"]:
        return "MONITOR"
    elif TIER_CONFIG["performing_min"] <= c <= TIER_CONFIG["performing_max"]:
        return "PERFORMING"
    elif TIER_CONFIG["strong_min"] <= c <= TIER_CONFIG["strong_max"]:
        return "STRONG"
    elif c >= TIER_CONFIG["top_min"]:
        return "TOP"
    return "PERFORMING"


def _get_action(tier, clicks):
    actions = {
        "TAKEDOWN_ZERO": "Remove immediately. Zero organic traction across all months. Set up 410 or 301 redirect.",
        "TAKEDOWN_LOW":  "Merge into a stronger related article or remove. Implement 301 redirect.",
        "MONITOR":       "Optimize meta title, description & keywords. Review in 90 days. Merge if no improvement.",
        "PERFORMING":    "Refresh content, strengthen internal links, add FAQ schema. Push for top 50 clicks.",
        "STRONG":        "Update statistics & examples. Add lead gen CTA. Build backlinks to reach 500+ clicks.",
        "TOP":           "Priority asset. Add lead magnets, improve CTAs, build backlinks. Protect rankings.",
    }
    if tier == "MONITOR" and clicks >= 15:
        return "Good potential. Optimize meta description and add 2–3 internal links to improve CTR."
    return actions.get(tier, "Review manually.")


def get_tier_summary(df: pd.DataFrame) -> dict:
    counts = df["tier"].value_counts().to_dict()
    return {
        "TAKEDOWN_ZERO": counts.get("TAKEDOWN_ZERO", 0),
        "TAKEDOWN_LOW":  counts.get("TAKEDOWN_LOW", 0),
        "MONITOR":       counts.get("MONITOR", 0),
        "PERFORMING":    counts.get("PERFORMING", 0),
        "STRONG":        counts.get("STRONG", 0),
        "TOP":           counts.get("TOP", 0),
    }


TIER_META = {
    "TAKEDOWN_ZERO": {"label": "πŸ”΄ Take Down (0 clicks)",       "color": "#FF4B4B"},
    "TAKEDOWN_LOW":  {"label": "🟠 Take Down (1–5 clicks)",      "color": "#FF8C00"},
    "MONITOR":       {"label": "🟑 Monitor (6–20 clicks)",       "color": "#FFC300"},
    "PERFORMING":    {"label": "βœ… Performing (21–100 clicks)",  "color": "#2ECC71"},
    "STRONG":        {"label": "πŸ’ͺ Strong (101–500 clicks)",     "color": "#1ABC9C"},
    "TOP":           {"label": "πŸ† Top Performers (500+ clicks)","color": "#9B59B6"},
}