vijaykumaredstellar commited on
Commit
6d28094
Β·
verified Β·
1 Parent(s): 1a9abc2

Upload 3 files

Browse files
utils/data_processor.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ # Fixed non-click columns
5
+ NON_CLICK_COLS = {"date", "month", "year", "blogs", "h1", "meta title",
6
+ "meta description", "total clicks", "total_clicks"}
7
+
8
+ TIER_CONFIG = {
9
+ "takedown_zero_max": 0,
10
+ "takedown_low_max": 5,
11
+ "takedown_low_monthly": 2,
12
+ "monitor_min": 6,
13
+ "monitor_max": 20,
14
+ "performing_min": 21,
15
+ "performing_max": 100,
16
+ "strong_min": 101,
17
+ "strong_max": 500,
18
+ "top_min": 501,
19
+ }
20
+
21
+
22
+ def detect_columns(df: pd.DataFrame) -> dict:
23
+ """
24
+ Auto-detect URL, title, month, and total_clicks columns
25
+ from any CSV with the expected structure.
26
+ Returns a dict with keys: url_col, title_col, month_cols, total_col
27
+ """
28
+ cols_lower = {c.lower(): c for c in df.columns}
29
+
30
+ # URL column β€” 'Blogs' or any col containing http values
31
+ url_col = cols_lower.get("blogs") or cols_lower.get("url") or cols_lower.get("urls")
32
+ if not url_col:
33
+ for col in df.columns:
34
+ sample = df[col].dropna().astype(str).head(5)
35
+ if sample.str.startswith("http").any():
36
+ url_col = col
37
+ break
38
+
39
+ # Title column β€” 'H1' or 'Title'
40
+ title_col = cols_lower.get("h1") or cols_lower.get("title") or cols_lower.get("meta title")
41
+
42
+ # Total clicks column
43
+ total_col = cols_lower.get("total clicks") or cols_lower.get("total_clicks")
44
+
45
+ # Month columns β€” numeric columns not in the known set
46
+ month_cols = []
47
+ for col in df.columns:
48
+ if col.lower() in NON_CLICK_COLS:
49
+ continue
50
+ if pd.api.types.is_numeric_dtype(df[col]) or _is_mostly_numeric(df[col]):
51
+ month_cols.append(col)
52
+
53
+ # Remove total_col from month_cols if accidentally included
54
+ if total_col and total_col in month_cols:
55
+ month_cols.remove(total_col)
56
+
57
+ return {
58
+ "url_col": url_col,
59
+ "title_col": title_col,
60
+ "month_cols": month_cols,
61
+ "total_col": total_col,
62
+ }
63
+
64
+
65
+ def _is_mostly_numeric(series: pd.Series, threshold=0.7) -> bool:
66
+ converted = pd.to_numeric(series, errors="coerce")
67
+ valid = converted.notna().sum()
68
+ return valid / max(len(series), 1) >= threshold
69
+
70
+
71
+ def clean_and_tier(df: pd.DataFrame, col_map: dict) -> pd.DataFrame:
72
+ """
73
+ Build a clean analysis dataframe with tier, trend, and slug columns.
74
+ """
75
+ url_col = col_map["url_col"]
76
+ title_col = col_map["title_col"]
77
+ month_cols = col_map["month_cols"]
78
+ total_col = col_map["total_col"]
79
+
80
+ out = pd.DataFrame()
81
+ out["url"] = df[url_col].astype(str).str.strip()
82
+ out["title"] = df[title_col].astype(str).str.strip() if title_col else out["url"]
83
+ out["slug"] = out["url"].apply(
84
+ lambda x: x.split("/blog/")[-1] if "/blog/" in x else x.rstrip("/").split("/")[-1]
85
+ )
86
+
87
+ for col in month_cols:
88
+ out[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int)
89
+
90
+ if total_col:
91
+ out["total_clicks"] = pd.to_numeric(df[total_col], errors="coerce").fillna(0).astype(int)
92
+ else:
93
+ out["total_clicks"] = out[month_cols].sum(axis=1)
94
+
95
+ # Trend
96
+ out["trend"] = out.apply(lambda r: _get_trend(r, month_cols), axis=1)
97
+
98
+ # Tier
99
+ out["tier"] = out.apply(lambda r: _assign_tier(r, month_cols), axis=1)
100
+
101
+ # Recommended action
102
+ out["action"] = out.apply(lambda r: _get_action(r["tier"], r["total_clicks"]), axis=1)
103
+
104
+ return out
105
+
106
+
107
+ def _get_trend(row, month_cols):
108
+ if len(month_cols) < 2:
109
+ return "➑️ Stable"
110
+ vals = [row[c] for c in month_cols]
111
+ if vals[-1] > vals[0]:
112
+ return "πŸ“ˆ Growing"
113
+ elif vals[-1] < vals[0]:
114
+ return "πŸ“‰ Declining"
115
+ return "➑️ Stable"
116
+
117
+
118
+ def _assign_tier(row, month_cols):
119
+ c = row["total_clicks"]
120
+ max_m = max([row[m] for m in month_cols]) if month_cols else 0
121
+ if c == 0:
122
+ return "TAKEDOWN_ZERO"
123
+ elif c <= TIER_CONFIG["takedown_low_max"] and max_m <= TIER_CONFIG["takedown_low_monthly"]:
124
+ return "TAKEDOWN_LOW"
125
+ elif TIER_CONFIG["monitor_min"] <= c <= TIER_CONFIG["monitor_max"]:
126
+ return "MONITOR"
127
+ elif TIER_CONFIG["performing_min"] <= c <= TIER_CONFIG["performing_max"]:
128
+ return "PERFORMING"
129
+ elif TIER_CONFIG["strong_min"] <= c <= TIER_CONFIG["strong_max"]:
130
+ return "STRONG"
131
+ elif c >= TIER_CONFIG["top_min"]:
132
+ return "TOP"
133
+ return "PERFORMING"
134
+
135
+
136
+ def _get_action(tier, clicks):
137
+ actions = {
138
+ "TAKEDOWN_ZERO": "Remove immediately. Zero organic traction across all months. Set up 410 or 301 redirect.",
139
+ "TAKEDOWN_LOW": "Merge into a stronger related article or remove. Implement 301 redirect.",
140
+ "MONITOR": "Optimize meta title, description & keywords. Review in 90 days. Merge if no improvement.",
141
+ "PERFORMING": "Refresh content, strengthen internal links, add FAQ schema. Push for top 50 clicks.",
142
+ "STRONG": "Update statistics & examples. Add lead gen CTA. Build backlinks to reach 500+ clicks.",
143
+ "TOP": "Priority asset. Add lead magnets, improve CTAs, build backlinks. Protect rankings.",
144
+ }
145
+ if tier == "MONITOR" and clicks >= 15:
146
+ return "Good potential. Optimize meta description and add 2–3 internal links to improve CTR."
147
+ return actions.get(tier, "Review manually.")
148
+
149
+
150
+ def get_tier_summary(df: pd.DataFrame) -> dict:
151
+ counts = df["tier"].value_counts().to_dict()
152
+ return {
153
+ "TAKEDOWN_ZERO": counts.get("TAKEDOWN_ZERO", 0),
154
+ "TAKEDOWN_LOW": counts.get("TAKEDOWN_LOW", 0),
155
+ "MONITOR": counts.get("MONITOR", 0),
156
+ "PERFORMING": counts.get("PERFORMING", 0),
157
+ "STRONG": counts.get("STRONG", 0),
158
+ "TOP": counts.get("TOP", 0),
159
+ }
160
+
161
+
162
+ TIER_META = {
163
+ "TAKEDOWN_ZERO": {"label": "πŸ”΄ Take Down (0 clicks)", "color": "#FF4B4B"},
164
+ "TAKEDOWN_LOW": {"label": "🟠 Take Down (1–5 clicks)", "color": "#FF8C00"},
165
+ "MONITOR": {"label": "🟑 Monitor (6–20 clicks)", "color": "#FFC300"},
166
+ "PERFORMING": {"label": "βœ… Performing (21–100 clicks)", "color": "#2ECC71"},
167
+ "STRONG": {"label": "πŸ’ͺ Strong (101–500 clicks)", "color": "#1ABC9C"},
168
+ "TOP": {"label": "πŸ† Top Performers (500+ clicks)","color": "#9B59B6"},
169
+ }
utils/embeddings.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+ import streamlit as st
5
+
6
+ MODEL_NAME = "all-MiniLM-L6-v2" # ~90 MB, English-optimised, fast
7
+ SIMILARITY_THRESHOLD = 0.72
8
+
9
+
10
+ @st.cache_resource(show_spinner=False)
11
+ def load_embedder():
12
+ """Load the sentence-transformer model once and cache it for the session."""
13
+ return SentenceTransformer(MODEL_NAME)
14
+
15
+
16
+ def get_embeddings(titles: list[str], embedder) -> np.ndarray:
17
+ """Generate sentence embeddings for a list of titles."""
18
+ return embedder.encode(titles, show_progress_bar=False, batch_size=64)
19
+
20
+
21
+ def find_merge_candidates(
22
+ df,
23
+ threshold: float = SIMILARITY_THRESHOLD,
24
+ max_weak_clicks: int = 200,
25
+ ) -> list[dict]:
26
+ """
27
+ Compare all blog titles using cosine similarity.
28
+ Returns pairs where:
29
+ - Weak blog has fewer clicks than strong blog
30
+ - Similarity score >= threshold
31
+ - Weak blog has <= max_weak_clicks total clicks
32
+ One result per weak blog (best matching strong).
33
+ """
34
+ embedder = load_embedder()
35
+
36
+ all_titles = df["title"].tolist()
37
+ all_clicks = df["total_clicks"].tolist()
38
+ all_urls = df["url"].tolist()
39
+
40
+ # Candidate pool: blogs with low clicks only
41
+ weak_mask = df["total_clicks"] <= max_weak_clicks
42
+ weak_idx = df[weak_mask].index.tolist()
43
+
44
+ if not weak_idx:
45
+ return []
46
+
47
+ weak_titles = [all_titles[i] for i in weak_idx]
48
+
49
+ all_emb = get_embeddings(all_titles, embedder)
50
+ weak_emb = np.array([all_emb[i] for i in weak_idx])
51
+
52
+ sim_matrix = cosine_similarity(weak_emb, all_emb)
53
+
54
+ pairs = []
55
+ seen_weak = set()
56
+
57
+ for row_i, wi in enumerate(weak_idx):
58
+ if wi in seen_weak:
59
+ continue
60
+ sims = sim_matrix[row_i]
61
+
62
+ # Build ranked candidates for this weak blog
63
+ ranked = sorted(
64
+ [
65
+ (j, float(sims[j]))
66
+ for j in range(len(all_titles))
67
+ if j != wi
68
+ and sims[j] >= threshold
69
+ and all_clicks[j] > all_clicks[wi] # strong must have more clicks
70
+ ],
71
+ key=lambda x: x[1],
72
+ reverse=True,
73
+ )
74
+
75
+ if ranked:
76
+ best_j, best_score = ranked[0]
77
+ pairs.append({
78
+ "weak_url": all_urls[wi],
79
+ "weak_title": all_titles[wi],
80
+ "weak_clicks": all_clicks[wi],
81
+ "strong_url": all_urls[best_j],
82
+ "strong_title": all_titles[best_j],
83
+ "strong_clicks": all_clicks[best_j],
84
+ "similarity": round(best_score, 4),
85
+ # Placeholders β€” filled in by LLM later
86
+ "topic_cluster": "",
87
+ "merge_reason": "",
88
+ "approved": True, # default approved until user toggles
89
+ })
90
+ seen_weak.add(wi)
91
+
92
+ # Sort by similarity descending
93
+ pairs.sort(key=lambda x: x["similarity"], reverse=True)
94
+ return pairs
utils/excel_builder.py ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ from openpyxl import Workbook
3
+ from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
4
+ from openpyxl.utils import get_column_letter
5
+
6
+ # ── Shared styles ─────────────────────────────────────────────────────────────
7
+ _thin = Side(style="thin", color="CCCCCC")
8
+ BORDER = Border(left=_thin, right=_thin, top=_thin, bottom=_thin)
9
+ NORMAL = Font(name="Arial", size=10)
10
+ HDR_FNT = Font(bold=True, color="FFFFFF", name="Arial", size=11)
11
+ LEFT = Alignment(horizontal="left", vertical="center", wrap_text=True)
12
+ CENTER = Alignment(horizontal="center", vertical="center", wrap_text=True)
13
+
14
+
15
+ def _fill(hex_color: str) -> PatternFill:
16
+ return PatternFill("solid", start_color=hex_color, end_color=hex_color)
17
+
18
+
19
+ FILLS = {
20
+ "red": _fill("FFDCE0"),
21
+ "orange": _fill("FFE5CC"),
22
+ "yellow": _fill("FFFACD"),
23
+ "yellow2": _fill("FFF2A0"),
24
+ "blue": _fill("DCE6F1"),
25
+ "blue2": _fill("BDD7EE"),
26
+ "purple": _fill("E8D5F5"),
27
+ "green": _fill("E2EFDA"),
28
+ "green2": _fill("D9EAD3"),
29
+ "dkgreen": _fill("C6EFCE"),
30
+ "gold": _fill("FFF2CC"),
31
+ "gold2": _fill("FFE599"),
32
+ "hdr_red": _fill("C00000"),
33
+ "hdr_navy": _fill("1F4E79"),
34
+ "hdr_olive":_fill("7B6000"),
35
+ "hdr_grn": _fill("375623"),
36
+ "hdr_blue": _fill("0070C0"),
37
+ "hdr_gold": _fill("7F6000"),
38
+ "hdr_purple":_fill("5B2C8D"),
39
+ }
40
+
41
+
42
+ def _add_header(ws, headers: list, fill_key: str, row_h: int = 30):
43
+ ws.append(headers)
44
+ for cell in ws[1]:
45
+ cell.font = HDR_FNT
46
+ cell.fill = FILLS[fill_key]
47
+ cell.alignment = CENTER
48
+ cell.border = BORDER
49
+ ws.row_dimensions[1].height = row_h
50
+
51
+
52
+ def _style_row(ws, row_idx: int, fill_a: str, fill_b: str | None = None, i: int = 0):
53
+ fill = FILLS[fill_b] if (fill_b and i % 2 == 1) else FILLS[fill_a]
54
+ for cell in ws[row_idx]:
55
+ cell.font = NORMAL
56
+ cell.fill = fill
57
+ cell.alignment = LEFT
58
+ cell.border = BORDER
59
+
60
+
61
+ def _set_widths(ws, widths: dict):
62
+ for col, w in widths.items():
63
+ ws.column_dimensions[col].width = w
64
+
65
+
66
+ def _month_col_widths(month_cols: list) -> dict:
67
+ """Return column letter β†’ width for month columns starting at D."""
68
+ return {get_column_letter(4 + i): 9 for i in range(len(month_cols))}
69
+
70
+
71
+ # ── Public builder ─────────────────────────────────────────────────────────────
72
+ def build_excel(df, merge_pairs: list[dict], month_cols: list, site_name: str = "Website") -> bytes:
73
+ """
74
+ Build the full 7-tab Excel workbook and return it as bytes for download.
75
+ """
76
+ wb = Workbook()
77
+
78
+ # ── Tab 1: Summary Dashboard ───────────────────────────────────────────────
79
+ ws1 = wb.active
80
+ ws1.title = "Summary Dashboard"
81
+ _build_summary(ws1, df, merge_pairs, month_cols, site_name)
82
+
83
+ # ── Tab 2: Take Down ───────────────────────────────────────────────────────
84
+ ws2 = wb.create_sheet("Take Down")
85
+ _build_tier_tab(
86
+ ws2, df, month_cols,
87
+ tiers=["TAKEDOWN_ZERO", "TAKEDOWN_LOW"],
88
+ fill_map={"TAKEDOWN_ZERO": "red", "TAKEDOWN_LOW": "orange"},
89
+ hdr_key="hdr_red",
90
+ severity_map={"TAKEDOWN_ZERO": "CRITICAL – 0 Clicks", "TAKEDOWN_LOW": "HIGH – 1–5 Clicks"},
91
+ )
92
+
93
+ # ── Tab 3: Merge Recommendations ──────────────────────────────────────────
94
+ ws3 = wb.create_sheet("Merge Recommendations")
95
+ _build_merge_tab(ws3, merge_pairs)
96
+
97
+ # ── Tab 4: Monitor ─────────────────────────────────────────────────────────
98
+ ws4 = wb.create_sheet("Monitor (6–20 Clicks)")
99
+ _build_simple_tier(ws4, df, month_cols, "MONITOR", "yellow", "yellow2", "hdr_olive")
100
+
101
+ # ── Tab 5: Performing ──────────────────────────────────────────────────────
102
+ ws5 = wb.create_sheet("Performing (21–100 Clicks)")
103
+ _build_simple_tier(ws5, df, month_cols, "PERFORMING", "green", "green2", "hdr_grn")
104
+
105
+ # ── Tab 6: Strong ──────────────────────────────────────────────────────────
106
+ ws6 = wb.create_sheet("Strong (101–500 Clicks)")
107
+ _build_simple_tier(ws6, df, month_cols, "STRONG", "blue", "blue2", "hdr_blue")
108
+
109
+ # ── Tab 7: Top Performers ──────────────────────────────────────────────────
110
+ ws7 = wb.create_sheet("Top Performers (500+ Clicks)")
111
+ _build_simple_tier(ws7, df, month_cols, "TOP", "gold", "gold2", "hdr_gold")
112
+
113
+ # Return as bytes buffer
114
+ buf = io.BytesIO()
115
+ wb.save(buf)
116
+ buf.seek(0)
117
+ return buf.read()
118
+
119
+
120
+ # ── Tab builders ──────────────────────────────────────────────────────────────
121
+
122
+ def _build_summary(ws, df, merge_pairs, month_cols, site_name):
123
+ ws["A1"] = f"Blog Audit Report β€” {site_name}"
124
+ ws["A1"].font = Font(bold=True, name="Arial", size=16, color="1F4E79")
125
+ ws["A1"].alignment = CENTER
126
+ ws.merge_cells("A1:E1")
127
+ ws.row_dimensions[1].height = 36
128
+
129
+ ws["A2"] = f"Total Blogs: {len(df)} | Months Analyzed: {len(month_cols)}"
130
+ ws["A2"].font = Font(italic=True, name="Arial", size=10, color="808080")
131
+ ws["A2"].alignment = CENTER
132
+ ws.merge_cells("A2:E2")
133
+
134
+ # Header row
135
+ for j, h in enumerate(["Category", "Count", "Action Required", "Description"], 1):
136
+ c = ws.cell(row=3, column=j, value=h)
137
+ c.font = HDR_FNT; c.fill = FILLS["hdr_navy"]; c.alignment = CENTER; c.border = BORDER
138
+ ws.row_dimensions[3].height = 28
139
+
140
+ from utils.data_processor import get_tier_summary
141
+ counts = get_tier_summary(df)
142
+
143
+ rows = [
144
+ ("πŸ”΄ TAKE DOWN – Zero Clicks", counts["TAKEDOWN_ZERO"], "Remove", "No traffic at all. Immediate removal recommended.", "red"),
145
+ ("🟠 TAKE DOWN – 1–5 Clicks", counts["TAKEDOWN_LOW"], "Remove / Merge", "Negligible traffic with no recovery signal.", "orange"),
146
+ ("πŸ”΅ MERGE – AI Detected Pairs", len(merge_pairs), "Merge + 301 Redirect","Consolidate into stronger related articles.", "blue"),
147
+ ("🟑 MONITOR – 6–20 Clicks", counts["MONITOR"], "Optimize & Monitor", "Underperforming. Optimize and review in 90 days.", "yellow"),
148
+ ("βœ… PERFORMING – 21–100 Clicks", counts["PERFORMING"], "Maintain & Optimize", "Acceptable performance. Strengthen meta and links.", "green"),
149
+ ("πŸ’ͺ STRONG – 101–500 Clicks", counts["STRONG"], "Strengthen", "Good performance. Freshen content and build backlinks.", "dkgreen"),
150
+ ("πŸ† TOP PERFORMERS – 500+ Clicks", counts["TOP"], "Priority Investment", "Star content. CTAs, lead magnets, backlink outreach.", "gold2"),
151
+ ]
152
+
153
+ for i, (cat, cnt, act, desc, fk) in enumerate(rows):
154
+ r = i + 4
155
+ for j, val in enumerate([cat, cnt, act, desc], 1):
156
+ c = ws.cell(row=r, column=j, value=val)
157
+ c.font = NORMAL; c.fill = FILLS[fk]; c.alignment = LEFT; c.border = BORDER
158
+ ws.row_dimensions[r].height = 22
159
+
160
+ # Top 10
161
+ ws["A12"] = "Top 10 Performing Blogs"
162
+ ws["A12"].font = Font(bold=True, name="Arial", size=13, color="1F4E79")
163
+ ws.merge_cells("A12:E12")
164
+ ws.row_dimensions[12].height = 26
165
+
166
+ for j, h in enumerate(["#", "Blog URL", "Title", "Total Clicks", "Trend"], 1):
167
+ c = ws.cell(row=13, column=j, value=h)
168
+ c.font = HDR_FNT; c.fill = FILLS["hdr_grn"]; c.alignment = CENTER; c.border = BORDER
169
+ ws.row_dimensions[13].height = 28
170
+
171
+ top10 = df.nlargest(10, "total_clicks")
172
+ for i, (_, row) in enumerate(top10.iterrows()):
173
+ r = 14 + i
174
+ for j, val in enumerate([i + 1, row["url"], row["title"], row["total_clicks"], row["trend"]], 1):
175
+ c = ws.cell(row=r, column=j, value=val)
176
+ c.font = NORMAL; c.fill = FILLS["dkgreen"]; c.alignment = LEFT; c.border = BORDER
177
+
178
+ _set_widths(ws, {"A": 42, "B": 12, "C": 22, "D": 65, "E": 16})
179
+
180
+
181
+ def _build_tier_tab(ws, df, month_cols, tiers, fill_map, hdr_key, severity_map=None):
182
+ extra = ["Severity"] if severity_map else []
183
+ headers = ["#", "Blog URL", "Title"] + month_cols + ["Total Clicks", "Trend"] + extra + ["Recommended Action"]
184
+ _add_header(ws, headers, hdr_key)
185
+
186
+ subset = df[df["tier"].isin(tiers)].sort_values("total_clicks")
187
+ for i, (_, row) in enumerate(subset.iterrows()):
188
+ monthly = [row[m] for m in month_cols]
189
+ sev = [severity_map[row["tier"]]] if severity_map else []
190
+ vals = [i + 1, row["url"], row["title"]] + monthly + \
191
+ [row["total_clicks"], row["trend"]] + sev + [row["action"]]
192
+ ws.append(vals)
193
+ fk = fill_map.get(row["tier"], "orange")
194
+ _style_row(ws, i + 2, fk)
195
+
196
+ n = len(month_cols)
197
+ w = {"A": 5, "B": 55, "C": 50}
198
+ w.update(_month_col_widths(month_cols))
199
+ clicks_col = get_column_letter(4 + n)
200
+ trend_col = get_column_letter(5 + n)
201
+ action_col = get_column_letter(6 + n + (1 if severity_map else 0))
202
+ sev_col = get_column_letter(6 + n) if severity_map else None
203
+ w[clicks_col] = 12
204
+ w[trend_col] = 14
205
+ if sev_col: w[sev_col] = 22
206
+ w[action_col] = 60
207
+ _set_widths(ws, w)
208
+
209
+
210
+ def _build_merge_tab(ws, merge_pairs):
211
+ headers = ["#", "Weak Blog URL (Merge FROM)", "Weak Title", "Weak Clicks",
212
+ "Strong Blog URL (Merge INTO)", "Strong Title", "Strong Clicks",
213
+ "Similarity", "Topic Cluster", "AI Merge Reason"]
214
+ _add_header(ws, headers, "hdr_navy")
215
+
216
+ for i, p in enumerate(merge_pairs):
217
+ vals = [
218
+ i + 1,
219
+ p.get("weak_url", ""),
220
+ p.get("weak_title", ""),
221
+ p.get("weak_clicks", 0),
222
+ p.get("strong_url", ""),
223
+ p.get("strong_title", ""),
224
+ p.get("strong_clicks", 0),
225
+ p.get("similarity", ""),
226
+ p.get("topic_cluster", ""),
227
+ p.get("merge_reason", ""),
228
+ ]
229
+ ws.append(vals)
230
+ fk = "blue" if i % 2 == 0 else "purple"
231
+ _style_row(ws, i + 2, fk)
232
+
233
+ _set_widths(ws, {"A": 5, "B": 52, "C": 42, "D": 10,
234
+ "E": 52, "F": 42, "G": 10, "H": 11,
235
+ "I": 22, "J": 65})
236
+
237
+
238
+ def _build_simple_tier(ws, df, month_cols, tier_key, fill_a, fill_b, hdr_key):
239
+ headers = ["#", "Blog URL", "Title"] + month_cols + ["Total Clicks", "Trend", "Recommended Action"]
240
+ _add_header(ws, headers, hdr_key)
241
+
242
+ subset = df[df["tier"] == tier_key].sort_values("total_clicks", ascending=False)
243
+ for i, (_, row) in enumerate(subset.iterrows()):
244
+ monthly = [row[m] for m in month_cols]
245
+ vals = [i + 1, row["url"], row["title"]] + monthly + \
246
+ [row["total_clicks"], row["trend"], row["action"]]
247
+ ws.append(vals)
248
+ _style_row(ws, i + 2, fill_a, fill_b, i)
249
+
250
+ n = len(month_cols)
251
+ w = {"A": 5, "B": 55, "C": 50}
252
+ w.update(_month_col_widths(month_cols))
253
+ w[get_column_letter(4 + n)] = 12
254
+ w[get_column_letter(5 + n)] = 14
255
+ w[get_column_letter(6 + n)] = 60
256
+ _set_widths(ws, w)