blog-audit / utils /excel_builder.py
vijaykumaredstellar's picture
Upload 3 files
6d28094 verified
import io
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
# ── Shared styles ─────────────────────────────────────────────────────────────
_thin = Side(style="thin", color="CCCCCC")
BORDER = Border(left=_thin, right=_thin, top=_thin, bottom=_thin)
NORMAL = Font(name="Arial", size=10)
HDR_FNT = Font(bold=True, color="FFFFFF", name="Arial", size=11)
LEFT = Alignment(horizontal="left", vertical="center", wrap_text=True)
CENTER = Alignment(horizontal="center", vertical="center", wrap_text=True)
def _fill(hex_color: str) -> PatternFill:
return PatternFill("solid", start_color=hex_color, end_color=hex_color)
FILLS = {
"red": _fill("FFDCE0"),
"orange": _fill("FFE5CC"),
"yellow": _fill("FFFACD"),
"yellow2": _fill("FFF2A0"),
"blue": _fill("DCE6F1"),
"blue2": _fill("BDD7EE"),
"purple": _fill("E8D5F5"),
"green": _fill("E2EFDA"),
"green2": _fill("D9EAD3"),
"dkgreen": _fill("C6EFCE"),
"gold": _fill("FFF2CC"),
"gold2": _fill("FFE599"),
"hdr_red": _fill("C00000"),
"hdr_navy": _fill("1F4E79"),
"hdr_olive":_fill("7B6000"),
"hdr_grn": _fill("375623"),
"hdr_blue": _fill("0070C0"),
"hdr_gold": _fill("7F6000"),
"hdr_purple":_fill("5B2C8D"),
}
def _add_header(ws, headers: list, fill_key: str, row_h: int = 30):
ws.append(headers)
for cell in ws[1]:
cell.font = HDR_FNT
cell.fill = FILLS[fill_key]
cell.alignment = CENTER
cell.border = BORDER
ws.row_dimensions[1].height = row_h
def _style_row(ws, row_idx: int, fill_a: str, fill_b: str | None = None, i: int = 0):
fill = FILLS[fill_b] if (fill_b and i % 2 == 1) else FILLS[fill_a]
for cell in ws[row_idx]:
cell.font = NORMAL
cell.fill = fill
cell.alignment = LEFT
cell.border = BORDER
def _set_widths(ws, widths: dict):
for col, w in widths.items():
ws.column_dimensions[col].width = w
def _month_col_widths(month_cols: list) -> dict:
"""Return column letter β†’ width for month columns starting at D."""
return {get_column_letter(4 + i): 9 for i in range(len(month_cols))}
# ── Public builder ─────────────────────────────────────────────────────────────
def build_excel(df, merge_pairs: list[dict], month_cols: list, site_name: str = "Website") -> bytes:
"""
Build the full 7-tab Excel workbook and return it as bytes for download.
"""
wb = Workbook()
# ── Tab 1: Summary Dashboard ───────────────────────────────────────────────
ws1 = wb.active
ws1.title = "Summary Dashboard"
_build_summary(ws1, df, merge_pairs, month_cols, site_name)
# ── Tab 2: Take Down ───────────────────────────────────────────────────────
ws2 = wb.create_sheet("Take Down")
_build_tier_tab(
ws2, df, month_cols,
tiers=["TAKEDOWN_ZERO", "TAKEDOWN_LOW"],
fill_map={"TAKEDOWN_ZERO": "red", "TAKEDOWN_LOW": "orange"},
hdr_key="hdr_red",
severity_map={"TAKEDOWN_ZERO": "CRITICAL – 0 Clicks", "TAKEDOWN_LOW": "HIGH – 1–5 Clicks"},
)
# ── Tab 3: Merge Recommendations ──────────────────────────────────────────
ws3 = wb.create_sheet("Merge Recommendations")
_build_merge_tab(ws3, merge_pairs)
# ── Tab 4: Monitor ─────────────────────────────────────────────────────────
ws4 = wb.create_sheet("Monitor (6–20 Clicks)")
_build_simple_tier(ws4, df, month_cols, "MONITOR", "yellow", "yellow2", "hdr_olive")
# ── Tab 5: Performing ──────────────────────────────────────────────────────
ws5 = wb.create_sheet("Performing (21–100 Clicks)")
_build_simple_tier(ws5, df, month_cols, "PERFORMING", "green", "green2", "hdr_grn")
# ── Tab 6: Strong ──────────────────────────────────────────────────────────
ws6 = wb.create_sheet("Strong (101–500 Clicks)")
_build_simple_tier(ws6, df, month_cols, "STRONG", "blue", "blue2", "hdr_blue")
# ── Tab 7: Top Performers ──────────────────────────────────────────────────
ws7 = wb.create_sheet("Top Performers (500+ Clicks)")
_build_simple_tier(ws7, df, month_cols, "TOP", "gold", "gold2", "hdr_gold")
# Return as bytes buffer
buf = io.BytesIO()
wb.save(buf)
buf.seek(0)
return buf.read()
# ── Tab builders ──────────────────────────────────────────────────────────────
def _build_summary(ws, df, merge_pairs, month_cols, site_name):
ws["A1"] = f"Blog Audit Report β€” {site_name}"
ws["A1"].font = Font(bold=True, name="Arial", size=16, color="1F4E79")
ws["A1"].alignment = CENTER
ws.merge_cells("A1:E1")
ws.row_dimensions[1].height = 36
ws["A2"] = f"Total Blogs: {len(df)} | Months Analyzed: {len(month_cols)}"
ws["A2"].font = Font(italic=True, name="Arial", size=10, color="808080")
ws["A2"].alignment = CENTER
ws.merge_cells("A2:E2")
# Header row
for j, h in enumerate(["Category", "Count", "Action Required", "Description"], 1):
c = ws.cell(row=3, column=j, value=h)
c.font = HDR_FNT; c.fill = FILLS["hdr_navy"]; c.alignment = CENTER; c.border = BORDER
ws.row_dimensions[3].height = 28
from utils.data_processor import get_tier_summary
counts = get_tier_summary(df)
rows = [
("πŸ”΄ TAKE DOWN – Zero Clicks", counts["TAKEDOWN_ZERO"], "Remove", "No traffic at all. Immediate removal recommended.", "red"),
("🟠 TAKE DOWN – 1–5 Clicks", counts["TAKEDOWN_LOW"], "Remove / Merge", "Negligible traffic with no recovery signal.", "orange"),
("πŸ”΅ MERGE – AI Detected Pairs", len(merge_pairs), "Merge + 301 Redirect","Consolidate into stronger related articles.", "blue"),
("🟑 MONITOR – 6–20 Clicks", counts["MONITOR"], "Optimize & Monitor", "Underperforming. Optimize and review in 90 days.", "yellow"),
("βœ… PERFORMING – 21–100 Clicks", counts["PERFORMING"], "Maintain & Optimize", "Acceptable performance. Strengthen meta and links.", "green"),
("πŸ’ͺ STRONG – 101–500 Clicks", counts["STRONG"], "Strengthen", "Good performance. Freshen content and build backlinks.", "dkgreen"),
("πŸ† TOP PERFORMERS – 500+ Clicks", counts["TOP"], "Priority Investment", "Star content. CTAs, lead magnets, backlink outreach.", "gold2"),
]
for i, (cat, cnt, act, desc, fk) in enumerate(rows):
r = i + 4
for j, val in enumerate([cat, cnt, act, desc], 1):
c = ws.cell(row=r, column=j, value=val)
c.font = NORMAL; c.fill = FILLS[fk]; c.alignment = LEFT; c.border = BORDER
ws.row_dimensions[r].height = 22
# Top 10
ws["A12"] = "Top 10 Performing Blogs"
ws["A12"].font = Font(bold=True, name="Arial", size=13, color="1F4E79")
ws.merge_cells("A12:E12")
ws.row_dimensions[12].height = 26
for j, h in enumerate(["#", "Blog URL", "Title", "Total Clicks", "Trend"], 1):
c = ws.cell(row=13, column=j, value=h)
c.font = HDR_FNT; c.fill = FILLS["hdr_grn"]; c.alignment = CENTER; c.border = BORDER
ws.row_dimensions[13].height = 28
top10 = df.nlargest(10, "total_clicks")
for i, (_, row) in enumerate(top10.iterrows()):
r = 14 + i
for j, val in enumerate([i + 1, row["url"], row["title"], row["total_clicks"], row["trend"]], 1):
c = ws.cell(row=r, column=j, value=val)
c.font = NORMAL; c.fill = FILLS["dkgreen"]; c.alignment = LEFT; c.border = BORDER
_set_widths(ws, {"A": 42, "B": 12, "C": 22, "D": 65, "E": 16})
def _build_tier_tab(ws, df, month_cols, tiers, fill_map, hdr_key, severity_map=None):
extra = ["Severity"] if severity_map else []
headers = ["#", "Blog URL", "Title"] + month_cols + ["Total Clicks", "Trend"] + extra + ["Recommended Action"]
_add_header(ws, headers, hdr_key)
subset = df[df["tier"].isin(tiers)].sort_values("total_clicks")
for i, (_, row) in enumerate(subset.iterrows()):
monthly = [row[m] for m in month_cols]
sev = [severity_map[row["tier"]]] if severity_map else []
vals = [i + 1, row["url"], row["title"]] + monthly + \
[row["total_clicks"], row["trend"]] + sev + [row["action"]]
ws.append(vals)
fk = fill_map.get(row["tier"], "orange")
_style_row(ws, i + 2, fk)
n = len(month_cols)
w = {"A": 5, "B": 55, "C": 50}
w.update(_month_col_widths(month_cols))
clicks_col = get_column_letter(4 + n)
trend_col = get_column_letter(5 + n)
action_col = get_column_letter(6 + n + (1 if severity_map else 0))
sev_col = get_column_letter(6 + n) if severity_map else None
w[clicks_col] = 12
w[trend_col] = 14
if sev_col: w[sev_col] = 22
w[action_col] = 60
_set_widths(ws, w)
def _build_merge_tab(ws, merge_pairs):
headers = ["#", "Weak Blog URL (Merge FROM)", "Weak Title", "Weak Clicks",
"Strong Blog URL (Merge INTO)", "Strong Title", "Strong Clicks",
"Similarity", "Topic Cluster", "AI Merge Reason"]
_add_header(ws, headers, "hdr_navy")
for i, p in enumerate(merge_pairs):
vals = [
i + 1,
p.get("weak_url", ""),
p.get("weak_title", ""),
p.get("weak_clicks", 0),
p.get("strong_url", ""),
p.get("strong_title", ""),
p.get("strong_clicks", 0),
p.get("similarity", ""),
p.get("topic_cluster", ""),
p.get("merge_reason", ""),
]
ws.append(vals)
fk = "blue" if i % 2 == 0 else "purple"
_style_row(ws, i + 2, fk)
_set_widths(ws, {"A": 5, "B": 52, "C": 42, "D": 10,
"E": 52, "F": 42, "G": 10, "H": 11,
"I": 22, "J": 65})
def _build_simple_tier(ws, df, month_cols, tier_key, fill_a, fill_b, hdr_key):
headers = ["#", "Blog URL", "Title"] + month_cols + ["Total Clicks", "Trend", "Recommended Action"]
_add_header(ws, headers, hdr_key)
subset = df[df["tier"] == tier_key].sort_values("total_clicks", ascending=False)
for i, (_, row) in enumerate(subset.iterrows()):
monthly = [row[m] for m in month_cols]
vals = [i + 1, row["url"], row["title"]] + monthly + \
[row["total_clicks"], row["trend"], row["action"]]
ws.append(vals)
_style_row(ws, i + 2, fill_a, fill_b, i)
n = len(month_cols)
w = {"A": 5, "B": 55, "C": 50}
w.update(_month_col_widths(month_cols))
w[get_column_letter(4 + n)] = 12
w[get_column_letter(5 + n)] = 14
w[get_column_letter(6 + n)] = 60
_set_widths(ws, w)