blog-audit / app.py
vijaykumaredstellar's picture
Upload 3 files
5284eaf verified
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import time
from utils.data_processor import detect_columns, clean_and_tier, get_tier_summary, TIER_META
from utils.embeddings import find_merge_candidates, load_embedder
from utils.llm_client import MODEL_OPTIONS, test_connection, enrich_merge_pairs
from utils.excel_builder import build_excel
# ── Page config ───────────────────────────────────────────────────────────────
st.set_page_config(
page_title="Blog Audit Tool",
page_icon="πŸ“Š",
layout="wide",
initial_sidebar_state="expanded",
)
# ── Global CSS ────────────────────────────────────────────────────────────────
st.markdown("""
<style>
.block-container { padding-top: 1.5rem; }
.metric-card {
background: #F8F9FA;
border: 1px solid #E0E0E0;
border-radius: 10px;
padding: 16px 20px;
text-align: center;
}
.metric-card .value { font-size: 2rem; font-weight: 700; }
.metric-card .label { font-size: 0.82rem; color: #666; margin-top: 2px; }
.section-header {
font-size: 1.15rem; font-weight: 700;
color: #1A1A2E; margin: 1.2rem 0 0.4rem 0;
padding-bottom: 4px; border-bottom: 2px solid #4F46E5;
display: inline-block;
}
.status-ok { color: #2ECC71; font-weight: 600; }
.status-err { color: #E74C3C; font-weight: 600; }
div[data-testid="stDataFrame"] { border-radius: 8px; }
.stButton > button {
border-radius: 8px; font-weight: 600;
}
.approve-row { background: #E8F5E9 !important; }
.reject-row { background: #FFEBEE !important; }
</style>
""", unsafe_allow_html=True)
# ── Session state init ────────────────────────────────────────────────────────
for key in ["df_clean", "col_map", "merge_pairs", "analysis_done",
"api_key", "model_id", "model_label", "site_name"]:
if key not in st.session_state:
st.session_state[key] = None
if "analysis_done" not in st.session_state or st.session_state["analysis_done"] is None:
st.session_state["analysis_done"] = False
# ── Sidebar ───────────────────────────────────────────────────────────────────
with st.sidebar:
st.image("https://via.placeholder.com/200x50/4F46E5/FFFFFF?text=Blog+Audit", width=200)
st.markdown("---")
st.markdown("### πŸ“‹ Steps")
st.markdown("""
1. πŸ“ Upload your CSV
2. πŸ”‘ Enter API key & pick model
3. πŸš€ Run analysis
4. πŸ“Š Review results
5. πŸ’Ύ Download report
""")
st.markdown("---")
st.markdown("### ℹ️ CSV Format")
st.markdown("""
Your CSV must have:
- A **URL** column (`Blogs` or similar)
- A **title** column (`H1` or `Title`)
- **4 monthly click** columns (any names)
- A **Total Clicks** column
""")
st.markdown("---")
st.caption("Blog Audit Tool β€” works for any website")
# ══════════════════════════════════════════════════════════════════════════════
# HEADER
# ══════════════════════════════════════════════════════════════════════════════
st.title("πŸ“Š Blog Performance Audit Tool")
st.markdown("Upload any website's blog click data CSV to get a full performance audit with AI-powered merge recommendations.")
st.markdown("---")
# ══════════════════════════════════════════════════════════════════════════════
# STEP 1 β€” UPLOAD CSV
# ══════════════════════════════════════════════════════════════════════════════
st.markdown('<p class="section-header">Step 1 β€” Upload Your CSV</p>', unsafe_allow_html=True)
col_up1, col_up2 = st.columns([2, 1])
with col_up1:
uploaded_file = st.file_uploader(
"Upload your blog clicks CSV",
type=["csv"],
help="CSV with URL column, H1/title column, 4 monthly click columns, and Total Clicks column.",
label_visibility="collapsed",
)
with col_up2:
site_name_input = st.text_input(
"Website / Brand name (for report)",
value="My Website",
placeholder="e.g. Edstellar, TechBlog...",
)
if uploaded_file:
try:
df_raw = pd.read_csv(uploaded_file)
col_map = detect_columns(df_raw)
# Validate detection
if not col_map["url_col"]:
st.error("❌ Could not detect a URL column. Make sure your CSV has a column with URLs (e.g. 'Blogs').")
st.stop()
if not col_map["month_cols"]:
st.error("❌ Could not detect monthly click columns. Ensure your numeric click columns are present.")
st.stop()
df_clean = clean_and_tier(df_raw, col_map)
st.session_state["df_clean"] = df_clean
st.session_state["col_map"] = col_map
st.session_state["site_name"] = site_name_input
# Preview
with st.expander("πŸ“‹ CSV Preview & Column Detection", expanded=True):
c1, c2, c3, c4 = st.columns(4)
c1.metric("Total Blogs", len(df_clean))
c2.metric("URL Column", col_map["url_col"])
c3.metric("Title Column", col_map["title_col"] or "β€”")
c4.metric("Month Columns", len(col_map["month_cols"]))
st.markdown(f"**Detected month columns:** `{'` Β· `'.join(col_map['month_cols'])}`")
st.dataframe(df_clean[["url", "title"] + col_map["month_cols"] + ["total_clicks", "tier", "trend"]].head(8),
use_container_width=True, height=220)
st.success(f"βœ… File loaded: **{len(df_clean)} blogs** detected.")
except Exception as e:
st.error(f"❌ Error reading CSV: {e}")
st.stop()
st.markdown("---")
# ══════════════════════════════════════════════════════════════════════════════
# STEP 2 β€” AI CONFIGURATION
# ══════════════════════════════════════════════════════════════════════════════
st.markdown('<p class="section-header">Step 2 β€” AI Configuration (OpenRouter)</p>', unsafe_allow_html=True)
st.caption("Get a free API key at [openrouter.ai/keys](https://openrouter.ai/keys)")
ai_col1, ai_col2, ai_col3 = st.columns([2, 2, 1])
with ai_col1:
api_key_input = st.text_input(
"πŸ”‘ OpenRouter API Key",
type="password",
placeholder="sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxx",
value=st.secrets.get("OPENROUTER_API_KEY", "") if hasattr(st, "secrets") else "",
)
with ai_col2:
model_label = st.selectbox(
"🧠 Select LLM Model",
options=list(MODEL_OPTIONS.keys()),
index=0,
)
with ai_col3:
st.markdown("<br>", unsafe_allow_html=True)
test_btn = st.button("πŸ”Œ Test Connection", use_container_width=True)
if test_btn:
if not api_key_input:
st.warning("⚠️ Please enter your API key first.")
else:
with st.spinner("Testing connection..."):
ok, msg = test_connection(api_key_input, MODEL_OPTIONS[model_label])
if ok:
st.session_state["api_key"] = api_key_input
st.session_state["model_id"] = MODEL_OPTIONS[model_label]
st.session_state["model_label"] = model_label
st.success(f"βœ… {msg} Model: **{model_label}**")
else:
st.error(f"❌ {msg}")
# Auto-save key if already entered without test
if api_key_input and not test_btn:
st.session_state["api_key"] = api_key_input
st.session_state["model_id"] = MODEL_OPTIONS[model_label]
st.session_state["model_label"] = model_label
st.markdown("---")
# ══════════════════════════════════════════════════════════════════════════════
# STEP 3 β€” RUN ANALYSIS
# ══════════════════════════════════════════════════════════════════════════════
st.markdown('<p class="section-header">Step 3 β€” Run Analysis</p>', unsafe_allow_html=True)
run_ready = (
st.session_state.get("df_clean") is not None
and st.session_state.get("api_key")
)
if not run_ready:
st.info("⬆️ Complete Steps 1 and 2 above before running the analysis.")
run_btn = st.button(
"πŸš€ Run Full Analysis",
disabled=not run_ready,
use_container_width=False,
type="primary",
)
if run_btn:
df_clean = st.session_state["df_clean"]
col_map = st.session_state["col_map"]
api_key = st.session_state["api_key"]
model_id = st.session_state["model_id"]
model_label = st.session_state["model_label"]
progress_bar = st.progress(0, text="Starting analysis...")
status_box = st.empty()
# Step A β€” Embeddings
status_box.info("🧠 Step 1/3 β€” Loading embedding model and generating title vectors...")
progress_bar.progress(10, text="Loading embedding model...")
with st.spinner("Generating embeddings (this may take 30–60s on first run)..."):
load_embedder() # warm up cache
merge_candidates = find_merge_candidates(df_clean, threshold=0.72, max_weak_clicks=200)
progress_bar.progress(45, text=f"Found {len(merge_candidates)} merge candidate pairs.")
status_box.success(f"βœ… Embeddings done β€” {len(merge_candidates)} potential merge pairs found.")
time.sleep(0.4)
# Step B β€” LLM enrichment
if merge_candidates:
status_box.info(f"πŸ€– Step 2/3 β€” Sending {len(merge_candidates)} pairs to {model_label} for merge reasoning...")
progress_bar.progress(50, text="LLM analysis in progress...")
llm_progress = st.empty()
llm_prog_bar = st.progress(0)
def llm_cb(done, total):
pct = int(done / total * 100)
llm_prog_bar.progress(pct, text=f"LLM: {done}/{total} pairs processed")
enriched_pairs = enrich_merge_pairs(
merge_candidates, api_key, model_id,
batch_size=10, progress_callback=llm_cb,
)
llm_prog_bar.empty()
llm_progress.empty()
else:
enriched_pairs = []
st.session_state["merge_pairs"] = enriched_pairs
progress_bar.progress(90, text="Building report...")
# Step C β€” Done
status_box.success(f"βœ… Analysis complete! {len(enriched_pairs)} merge recommendations generated.")
progress_bar.progress(100, text="βœ… Analysis complete!")
st.session_state["analysis_done"] = True
time.sleep(0.5)
st.rerun()
st.markdown("---")
# ══════════════════════════════════════════════════════════════════════════════
# STEP 4 β€” RESULTS DASHBOARD
# ══════════════════════════════════════════════════════════════════════════════
if st.session_state["analysis_done"] and st.session_state["df_clean"] is not None:
df_clean = st.session_state["df_clean"]
col_map = st.session_state["col_map"]
month_cols = col_map["month_cols"]
merge_pairs = st.session_state.get("merge_pairs", [])
site_name = st.session_state.get("site_name", "Website")
st.markdown('<p class="section-header">Step 4 β€” Results Dashboard</p>', unsafe_allow_html=True)
counts = get_tier_summary(df_clean)
# ── KPI metric row ──────────────────────────────────────────────────────
m_cols = st.columns(7)
kpis = [
("Total Blogs", len(df_clean), "#4F46E5"),
("πŸ”΄ Take Down", counts["TAKEDOWN_ZERO"] + counts["TAKEDOWN_LOW"], "#E74C3C"),
("πŸ”΅ Merge", len(merge_pairs), "#2980B9"),
("🟑 Monitor", counts["MONITOR"], "#F39C12"),
("βœ… Performing", counts["PERFORMING"], "#27AE60"),
("πŸ’ͺ Strong", counts["STRONG"], "#1ABC9C"),
("πŸ† Top", counts["TOP"], "#9B59B6"),
]
for col_w, (label, val, color) in zip(m_cols, kpis):
col_w.markdown(
f'<div class="metric-card">'
f'<div class="value" style="color:{color}">{val}</div>'
f'<div class="label">{label}</div>'
f'</div>',
unsafe_allow_html=True,
)
st.markdown("<br>", unsafe_allow_html=True)
# ── Charts ──────────────────────────────────────────────────────────────
chart_col1, chart_col2 = st.columns(2)
with chart_col1:
st.markdown("**Click Distribution by Tier**")
tier_labels = [TIER_META[t]["label"] for t in counts]
tier_values = list(counts.values())
tier_colors = [TIER_META[t]["color"] for t in counts]
fig_pie = go.Figure(go.Pie(
labels=tier_labels, values=tier_values,
marker_colors=tier_colors,
hole=0.45, textinfo="label+percent",
))
fig_pie.update_layout(
showlegend=False, margin=dict(t=10, b=10, l=10, r=10), height=280
)
st.plotly_chart(fig_pie, use_container_width=True)
with chart_col2:
st.markdown("**Monthly Click Trend (Top 5 Blogs)**")
top5 = df_clean.nlargest(5, "total_clicks")
fig_line = go.Figure()
for _, row in top5.iterrows():
short_title = row["title"][:40] + "…" if len(row["title"]) > 40 else row["title"]
fig_line.add_trace(go.Scatter(
x=month_cols,
y=[row[m] for m in month_cols],
mode="lines+markers",
name=short_title,
))
fig_line.update_layout(
margin=dict(t=10, b=10, l=10, r=10), height=280,
legend=dict(font=dict(size=9)),
xaxis_title="Month", yaxis_title="Clicks",
)
st.plotly_chart(fig_line, use_container_width=True)
st.markdown("---")
# ── Result Tabs ─────────────────────────────────────────────────────────
tab_td, tab_merge, tab_mon, tab_perf, tab_str, tab_top = st.tabs([
f"πŸ”΄ Take Down ({counts['TAKEDOWN_ZERO'] + counts['TAKEDOWN_LOW']})",
f"πŸ”΅ Merge ({len(merge_pairs)})",
f"🟑 Monitor ({counts['MONITOR']})",
f"βœ… Performing ({counts['PERFORMING']})",
f"πŸ’ͺ Strong ({counts['STRONG']})",
f"πŸ† Top ({counts['TOP']})",
])
display_cols_base = ["url", "title"] + month_cols + ["total_clicks", "trend", "action"]
# ── Take Down tab ────────────────────────────────────────────────────────
with tab_td:
st.markdown("Blogs with zero or negligible traffic. Recommended for removal or merge.")
td_df = df_clean[df_clean["tier"].isin(["TAKEDOWN_ZERO", "TAKEDOWN_LOW"])].sort_values("total_clicks")
if len(td_df):
# Search filter
search_td = st.text_input("πŸ” Filter by URL or title", key="search_td", placeholder="Type to filter...")
if search_td:
mask = td_df["url"].str.contains(search_td, case=False, na=False) | \
td_df["title"].str.contains(search_td, case=False, na=False)
td_df = td_df[mask]
st.dataframe(td_df[display_cols_base], use_container_width=True, height=400)
st.caption(f"{len(td_df)} blogs")
else:
st.success("πŸŽ‰ No take-down candidates found!")
# ── Merge tab ────────────────────────────────────────────────────────────
with tab_merge:
st.markdown("AI-detected topically overlapping blogs. Review and approve/reject each suggestion.")
if merge_pairs:
# Review interface
st.markdown("**βœ… = Keep in report &nbsp;&nbsp; ❌ = Reject suggestion**")
# Init approval state
if "merge_approvals" not in st.session_state:
st.session_state["merge_approvals"] = {i: True for i in range(len(merge_pairs))}
col_filter, col_actions = st.columns([3, 1])
with col_filter:
search_merge = st.text_input("πŸ” Filter merges", key="search_merge", placeholder="Filter by title or cluster...")
with col_actions:
st.markdown("<br>", unsafe_allow_html=True)
if st.button("βœ… Approve All", key="approve_all"):
for i in range(len(merge_pairs)):
st.session_state["merge_approvals"][i] = True
st.rerun()
# Display table
merge_display = []
for i, p in enumerate(merge_pairs):
if search_merge and search_merge.lower() not in p.get("weak_title","").lower() \
and search_merge.lower() not in p.get("strong_title","").lower() \
and search_merge.lower() not in p.get("topic_cluster","").lower():
continue
approved = st.session_state["merge_approvals"].get(i, True)
merge_display.append({
"#": i + 1,
"Weak Blog (Merge FROM)": p.get("weak_title", "")[:60],
"Weak Clicks": p.get("weak_clicks", 0),
"Strong Blog (Merge INTO)":p.get("strong_title", "")[:60],
"Strong Clicks": p.get("strong_clicks", 0),
"Similarity": p.get("similarity", ""),
"Topic Cluster": p.get("topic_cluster", ""),
"AI Merge Reason": p.get("merge_reason", "")[:100],
"Approved": "βœ… Yes" if approved else "❌ Rejected",
})
merge_df_display = pd.DataFrame(merge_display)
st.dataframe(merge_df_display, use_container_width=True, height=380)
# Approve/Reject individual rows
st.markdown("**Toggle individual suggestions:**")
toggle_cols = st.columns(5)
for i, p in enumerate(merge_pairs[:20]): # show first 20 toggles
col_idx = i % 5
with toggle_cols[col_idx]:
current = st.session_state["merge_approvals"].get(i, True)
label = f"#{i+1}: {p.get('weak_title','')[:25]}…"
new_val = st.checkbox(label, value=current, key=f"merge_toggle_{i}")
st.session_state["merge_approvals"][i] = new_val
approved_count = sum(st.session_state["merge_approvals"].values())
st.caption(f"βœ… {approved_count} approved Β· ❌ {len(merge_pairs) - approved_count} rejected")
else:
st.info("No merge candidates detected above the similarity threshold.")
# ── Generic tier tab helper ──────────────────────────────────────────────
def render_tier_tab(tier_key, tab_obj, search_key):
with tab_obj:
subset = df_clean[df_clean["tier"] == tier_key].sort_values("total_clicks", ascending=False)
search = st.text_input("πŸ” Filter", key=search_key, placeholder="Filter by URL or title...")
if search:
mask = subset["url"].str.contains(search, case=False, na=False) | \
subset["title"].str.contains(search, case=False, na=False)
subset = subset[mask]
trend_filter = st.selectbox(
"Trend filter", ["All", "πŸ“ˆ Growing", "πŸ“‰ Declining", "➑️ Stable"],
key=f"{search_key}_trend"
)
if trend_filter != "All":
subset = subset[subset["trend"] == trend_filter]
st.dataframe(subset[display_cols_base], use_container_width=True, height=400)
st.caption(f"{len(subset)} blogs")
render_tier_tab("MONITOR", tab_mon, "search_mon")
render_tier_tab("PERFORMING", tab_perf, "search_perf")
render_tier_tab("STRONG", tab_str, "search_str")
render_tier_tab("TOP", tab_top, "search_top")
st.markdown("---")
# ══════════════════════════════════════════════════════════════════════════
# STEP 5 β€” DOWNLOAD REPORT
# ══════════════════════════════════════════════════════════════════════════
st.markdown('<p class="section-header">Step 5 β€” Download Excel Report</p>', unsafe_allow_html=True)
# Apply approvals to merge pairs before export
approvals = st.session_state.get("merge_approvals", {})
approved_pairs = [p for i, p in enumerate(merge_pairs) if approvals.get(i, True)]
dl_col1, dl_col2 = st.columns([2, 3])
with dl_col1:
with st.spinner("Building Excel report..."):
excel_bytes = build_excel(
df_clean, approved_pairs, month_cols,
site_name=st.session_state.get("site_name", "Website"),
)
filename = f"{site_name.replace(' ', '_').lower()}_blog_audit_report.xlsx"
st.download_button(
label="⬇️ Download 7-Tab Excel Report",
data=excel_bytes,
file_name=filename,
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
type="primary",
use_container_width=True,
)
with dl_col2:
st.markdown(f"""
**Report includes 7 tabs:**
- πŸ“Š Summary Dashboard
- πŸ”΄ Take Down ({counts['TAKEDOWN_ZERO'] + counts['TAKEDOWN_LOW']} blogs)
- πŸ”΅ Merge Recommendations ({len(approved_pairs)} approved pairs)
- 🟑 Monitor Β· βœ… Performing Β· πŸ’ͺ Strong Β· πŸ† Top Performers
""")
st.caption(f"Model used: {st.session_state.get('model_label','β€”')}")
elif not st.session_state["analysis_done"]:
st.markdown('<p class="section-header">Step 4 β€” Results Dashboard</p>', unsafe_allow_html=True)
st.info("⬆️ Complete Steps 1–3 above to see results here.")