import streamlit as st import pandas as pd import plotly.graph_objects as go import plotly.express as px import time from utils.data_processor import detect_columns, clean_and_tier, get_tier_summary, TIER_META from utils.embeddings import find_merge_candidates, load_embedder from utils.llm_client import MODEL_OPTIONS, test_connection, enrich_merge_pairs from utils.excel_builder import build_excel # ── Page config ─────────────────────────────────────────────────────────────── st.set_page_config( page_title="Blog Audit Tool", page_icon="📊", layout="wide", initial_sidebar_state="expanded", ) # ── Global CSS ──────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ── Session state init ──────────────────────────────────────────────────────── for key in ["df_clean", "col_map", "merge_pairs", "analysis_done", "api_key", "model_id", "model_label", "site_name"]: if key not in st.session_state: st.session_state[key] = None if "analysis_done" not in st.session_state or st.session_state["analysis_done"] is None: st.session_state["analysis_done"] = False # ── Sidebar ─────────────────────────────────────────────────────────────────── with st.sidebar: st.image("https://via.placeholder.com/200x50/4F46E5/FFFFFF?text=Blog+Audit", width=200) st.markdown("---") st.markdown("### 📋 Steps") st.markdown(""" 1. 📁 Upload your CSV 2. 🔑 Enter API key & pick model 3. 🚀 Run analysis 4. 📊 Review results 5. 💾 Download report """) st.markdown("---") st.markdown("### ℹ️ CSV Format") st.markdown(""" Your CSV must have: - A **URL** column (`Blogs` or similar) - A **title** column (`H1` or `Title`) - **4 monthly click** columns (any names) - A **Total Clicks** column """) st.markdown("---") st.caption("Blog Audit Tool — works for any website") # ══════════════════════════════════════════════════════════════════════════════ # HEADER # ══════════════════════════════════════════════════════════════════════════════ st.title("📊 Blog Performance Audit Tool") st.markdown("Upload any website's blog click data CSV to get a full performance audit with AI-powered merge recommendations.") st.markdown("---") # ══════════════════════════════════════════════════════════════════════════════ # STEP 1 — UPLOAD CSV # ══════════════════════════════════════════════════════════════════════════════ st.markdown('

Step 1 — Upload Your CSV

', unsafe_allow_html=True) col_up1, col_up2 = st.columns([2, 1]) with col_up1: uploaded_file = st.file_uploader( "Upload your blog clicks CSV", type=["csv"], help="CSV with URL column, H1/title column, 4 monthly click columns, and Total Clicks column.", label_visibility="collapsed", ) with col_up2: site_name_input = st.text_input( "Website / Brand name (for report)", value="My Website", placeholder="e.g. Edstellar, TechBlog...", ) if uploaded_file: try: df_raw = pd.read_csv(uploaded_file) col_map = detect_columns(df_raw) # Validate detection if not col_map["url_col"]: st.error("❌ Could not detect a URL column. Make sure your CSV has a column with URLs (e.g. 'Blogs').") st.stop() if not col_map["month_cols"]: st.error("❌ Could not detect monthly click columns. Ensure your numeric click columns are present.") st.stop() df_clean = clean_and_tier(df_raw, col_map) st.session_state["df_clean"] = df_clean st.session_state["col_map"] = col_map st.session_state["site_name"] = site_name_input # Preview with st.expander("📋 CSV Preview & Column Detection", expanded=True): c1, c2, c3, c4 = st.columns(4) c1.metric("Total Blogs", len(df_clean)) c2.metric("URL Column", col_map["url_col"]) c3.metric("Title Column", col_map["title_col"] or "—") c4.metric("Month Columns", len(col_map["month_cols"])) st.markdown(f"**Detected month columns:** `{'` · `'.join(col_map['month_cols'])}`") st.dataframe(df_clean[["url", "title"] + col_map["month_cols"] + ["total_clicks", "tier", "trend"]].head(8), use_container_width=True, height=220) st.success(f"✅ File loaded: **{len(df_clean)} blogs** detected.") except Exception as e: st.error(f"❌ Error reading CSV: {e}") st.stop() st.markdown("---") # ══════════════════════════════════════════════════════════════════════════════ # STEP 2 — AI CONFIGURATION # ══════════════════════════════════════════════════════════════════════════════ st.markdown('

Step 2 — AI Configuration (OpenRouter)

', unsafe_allow_html=True) st.caption("Get a free API key at [openrouter.ai/keys](https://openrouter.ai/keys)") ai_col1, ai_col2, ai_col3 = st.columns([2, 2, 1]) with ai_col1: api_key_input = st.text_input( "🔑 OpenRouter API Key", type="password", placeholder="sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxx", value=st.secrets.get("OPENROUTER_API_KEY", "") if hasattr(st, "secrets") else "", ) with ai_col2: model_label = st.selectbox( "🧠 Select LLM Model", options=list(MODEL_OPTIONS.keys()), index=0, ) with ai_col3: st.markdown("
", unsafe_allow_html=True) test_btn = st.button("🔌 Test Connection", use_container_width=True) if test_btn: if not api_key_input: st.warning("⚠️ Please enter your API key first.") else: with st.spinner("Testing connection..."): ok, msg = test_connection(api_key_input, MODEL_OPTIONS[model_label]) if ok: st.session_state["api_key"] = api_key_input st.session_state["model_id"] = MODEL_OPTIONS[model_label] st.session_state["model_label"] = model_label st.success(f"✅ {msg} Model: **{model_label}**") else: st.error(f"❌ {msg}") # Auto-save key if already entered without test if api_key_input and not test_btn: st.session_state["api_key"] = api_key_input st.session_state["model_id"] = MODEL_OPTIONS[model_label] st.session_state["model_label"] = model_label st.markdown("---") # ══════════════════════════════════════════════════════════════════════════════ # STEP 3 — RUN ANALYSIS # ══════════════════════════════════════════════════════════════════════════════ st.markdown('

Step 3 — Run Analysis

', unsafe_allow_html=True) run_ready = ( st.session_state.get("df_clean") is not None and st.session_state.get("api_key") ) if not run_ready: st.info("⬆️ Complete Steps 1 and 2 above before running the analysis.") run_btn = st.button( "🚀 Run Full Analysis", disabled=not run_ready, use_container_width=False, type="primary", ) if run_btn: df_clean = st.session_state["df_clean"] col_map = st.session_state["col_map"] api_key = st.session_state["api_key"] model_id = st.session_state["model_id"] model_label = st.session_state["model_label"] progress_bar = st.progress(0, text="Starting analysis...") status_box = st.empty() # Step A — Embeddings status_box.info("🧠 Step 1/3 — Loading embedding model and generating title vectors...") progress_bar.progress(10, text="Loading embedding model...") with st.spinner("Generating embeddings (this may take 30–60s on first run)..."): load_embedder() # warm up cache merge_candidates = find_merge_candidates(df_clean, threshold=0.72, max_weak_clicks=200) progress_bar.progress(45, text=f"Found {len(merge_candidates)} merge candidate pairs.") status_box.success(f"✅ Embeddings done — {len(merge_candidates)} potential merge pairs found.") time.sleep(0.4) # Step B — LLM enrichment if merge_candidates: status_box.info(f"🤖 Step 2/3 — Sending {len(merge_candidates)} pairs to {model_label} for merge reasoning...") progress_bar.progress(50, text="LLM analysis in progress...") llm_progress = st.empty() llm_prog_bar = st.progress(0) def llm_cb(done, total): pct = int(done / total * 100) llm_prog_bar.progress(pct, text=f"LLM: {done}/{total} pairs processed") enriched_pairs = enrich_merge_pairs( merge_candidates, api_key, model_id, batch_size=10, progress_callback=llm_cb, ) llm_prog_bar.empty() llm_progress.empty() else: enriched_pairs = [] st.session_state["merge_pairs"] = enriched_pairs progress_bar.progress(90, text="Building report...") # Step C — Done status_box.success(f"✅ Analysis complete! {len(enriched_pairs)} merge recommendations generated.") progress_bar.progress(100, text="✅ Analysis complete!") st.session_state["analysis_done"] = True time.sleep(0.5) st.rerun() st.markdown("---") # ══════════════════════════════════════════════════════════════════════════════ # STEP 4 — RESULTS DASHBOARD # ══════════════════════════════════════════════════════════════════════════════ if st.session_state["analysis_done"] and st.session_state["df_clean"] is not None: df_clean = st.session_state["df_clean"] col_map = st.session_state["col_map"] month_cols = col_map["month_cols"] merge_pairs = st.session_state.get("merge_pairs", []) site_name = st.session_state.get("site_name", "Website") st.markdown('

Step 4 — Results Dashboard

', unsafe_allow_html=True) counts = get_tier_summary(df_clean) # ── KPI metric row ────────────────────────────────────────────────────── m_cols = st.columns(7) kpis = [ ("Total Blogs", len(df_clean), "#4F46E5"), ("🔴 Take Down", counts["TAKEDOWN_ZERO"] + counts["TAKEDOWN_LOW"], "#E74C3C"), ("🔵 Merge", len(merge_pairs), "#2980B9"), ("🟡 Monitor", counts["MONITOR"], "#F39C12"), ("✅ Performing", counts["PERFORMING"], "#27AE60"), ("💪 Strong", counts["STRONG"], "#1ABC9C"), ("🏆 Top", counts["TOP"], "#9B59B6"), ] for col_w, (label, val, color) in zip(m_cols, kpis): col_w.markdown( f'

' f'

{val}

' f'

{label}

' f'

', unsafe_allow_html=True, ) st.markdown("
", unsafe_allow_html=True) # ── Charts ────────────────────────────────────────────────────────────── chart_col1, chart_col2 = st.columns(2) with chart_col1: st.markdown("**Click Distribution by Tier**") tier_labels = [TIER_META[t]["label"] for t in counts] tier_values = list(counts.values()) tier_colors = [TIER_META[t]["color"] for t in counts] fig_pie = go.Figure(go.Pie( labels=tier_labels, values=tier_values, marker_colors=tier_colors, hole=0.45, textinfo="label+percent", )) fig_pie.update_layout( showlegend=False, margin=dict(t=10, b=10, l=10, r=10), height=280 ) st.plotly_chart(fig_pie, use_container_width=True) with chart_col2: st.markdown("**Monthly Click Trend (Top 5 Blogs)**") top5 = df_clean.nlargest(5, "total_clicks") fig_line = go.Figure() for _, row in top5.iterrows(): short_title = row["title"][:40] + "…" if len(row["title"]) > 40 else row["title"] fig_line.add_trace(go.Scatter( x=month_cols, y=[row[m] for m in month_cols], mode="lines+markers", name=short_title, )) fig_line.update_layout( margin=dict(t=10, b=10, l=10, r=10), height=280, legend=dict(font=dict(size=9)), xaxis_title="Month", yaxis_title="Clicks", ) st.plotly_chart(fig_line, use_container_width=True) st.markdown("---") # ── Result Tabs ───────────────────────────────────────────────────────── tab_td, tab_merge, tab_mon, tab_perf, tab_str, tab_top = st.tabs([ f"🔴 Take Down ({counts['TAKEDOWN_ZERO'] + counts['TAKEDOWN_LOW']})", f"🔵 Merge ({len(merge_pairs)})", f"🟡 Monitor ({counts['MONITOR']})", f"✅ Performing ({counts['PERFORMING']})", f"💪 Strong ({counts['STRONG']})", f"🏆 Top ({counts['TOP']})", ]) display_cols_base = ["url", "title"] + month_cols + ["total_clicks", "trend", "action"] # ── Take Down tab ──────────────────────────────────────────────────────── with tab_td: st.markdown("Blogs with zero or negligible traffic. Recommended for removal or merge.") td_df = df_clean[df_clean["tier"].isin(["TAKEDOWN_ZERO", "TAKEDOWN_LOW"])].sort_values("total_clicks") if len(td_df): # Search filter search_td = st.text_input("🔍 Filter by URL or title", key="search_td", placeholder="Type to filter...") if search_td: mask = td_df["url"].str.contains(search_td, case=False, na=False) | \ td_df["title"].str.contains(search_td, case=False, na=False) td_df = td_df[mask] st.dataframe(td_df[display_cols_base], use_container_width=True, height=400) st.caption(f"{len(td_df)} blogs") else: st.success("🎉 No take-down candidates found!") # ── Merge tab ──────────────────────────────────────────────────────────── with tab_merge: st.markdown("AI-detected topically overlapping blogs. Review and approve/reject each suggestion.") if merge_pairs: # Review interface st.markdown("**✅ = Keep in report ❌ = Reject suggestion**") # Init approval state if "merge_approvals" not in st.session_state: st.session_state["merge_approvals"] = {i: True for i in range(len(merge_pairs))} col_filter, col_actions = st.columns([3, 1]) with col_filter: search_merge = st.text_input("🔍 Filter merges", key="search_merge", placeholder="Filter by title or cluster...") with col_actions: st.markdown("
", unsafe_allow_html=True) if st.button("✅ Approve All", key="approve_all"): for i in range(len(merge_pairs)): st.session_state["merge_approvals"][i] = True st.rerun() # Display table merge_display = [] for i, p in enumerate(merge_pairs): if search_merge and search_merge.lower() not in p.get("weak_title","").lower() \ and search_merge.lower() not in p.get("strong_title","").lower() \ and search_merge.lower() not in p.get("topic_cluster","").lower(): continue approved = st.session_state["merge_approvals"].get(i, True) merge_display.append({ "#": i + 1, "Weak Blog (Merge FROM)": p.get("weak_title", "")[:60], "Weak Clicks": p.get("weak_clicks", 0), "Strong Blog (Merge INTO)":p.get("strong_title", "")[:60], "Strong Clicks": p.get("strong_clicks", 0), "Similarity": p.get("similarity", ""), "Topic Cluster": p.get("topic_cluster", ""), "AI Merge Reason": p.get("merge_reason", "")[:100], "Approved": "✅ Yes" if approved else "❌ Rejected", }) merge_df_display = pd.DataFrame(merge_display) st.dataframe(merge_df_display, use_container_width=True, height=380) # Approve/Reject individual rows st.markdown("**Toggle individual suggestions:**") toggle_cols = st.columns(5) for i, p in enumerate(merge_pairs[:20]): # show first 20 toggles col_idx = i % 5 with toggle_cols[col_idx]: current = st.session_state["merge_approvals"].get(i, True) label = f"#{i+1}: {p.get('weak_title','')[:25]}…" new_val = st.checkbox(label, value=current, key=f"merge_toggle_{i}") st.session_state["merge_approvals"][i] = new_val approved_count = sum(st.session_state["merge_approvals"].values()) st.caption(f"✅ {approved_count} approved · ❌ {len(merge_pairs) - approved_count} rejected") else: st.info("No merge candidates detected above the similarity threshold.") # ── Generic tier tab helper ────────────────────────────────────────────── def render_tier_tab(tier_key, tab_obj, search_key): with tab_obj: subset = df_clean[df_clean["tier"] == tier_key].sort_values("total_clicks", ascending=False) search = st.text_input("🔍 Filter", key=search_key, placeholder="Filter by URL or title...") if search: mask = subset["url"].str.contains(search, case=False, na=False) | \ subset["title"].str.contains(search, case=False, na=False) subset = subset[mask] trend_filter = st.selectbox( "Trend filter", ["All", "📈 Growing", "📉 Declining", "➡️ Stable"], key=f"{search_key}_trend" ) if trend_filter != "All": subset = subset[subset["trend"] == trend_filter] st.dataframe(subset[display_cols_base], use_container_width=True, height=400) st.caption(f"{len(subset)} blogs") render_tier_tab("MONITOR", tab_mon, "search_mon") render_tier_tab("PERFORMING", tab_perf, "search_perf") render_tier_tab("STRONG", tab_str, "search_str") render_tier_tab("TOP", tab_top, "search_top") st.markdown("---") # ══════════════════════════════════════════════════════════════════════════ # STEP 5 — DOWNLOAD REPORT # ══════════════════════════════════════════════════════════════════════════ st.markdown('

Step 5 — Download Excel Report

', unsafe_allow_html=True) # Apply approvals to merge pairs before export approvals = st.session_state.get("merge_approvals", {}) approved_pairs = [p for i, p in enumerate(merge_pairs) if approvals.get(i, True)] dl_col1, dl_col2 = st.columns([2, 3]) with dl_col1: with st.spinner("Building Excel report..."): excel_bytes = build_excel( df_clean, approved_pairs, month_cols, site_name=st.session_state.get("site_name", "Website"), ) filename = f"{site_name.replace(' ', '_').lower()}_blog_audit_report.xlsx" st.download_button( label="⬇️ Download 7-Tab Excel Report", data=excel_bytes, file_name=filename, mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", type="primary", use_container_width=True, ) with dl_col2: st.markdown(f""" **Report includes 7 tabs:** - 📊 Summary Dashboard - 🔴 Take Down ({counts['TAKEDOWN_ZERO'] + counts['TAKEDOWN_LOW']} blogs) - 🔵 Merge Recommendations ({len(approved_pairs)} approved pairs) - 🟡 Monitor · ✅ Performing · 💪 Strong · 🏆 Top Performers """) st.caption(f"Model used: {st.session_state.get('model_label','—')}") elif not st.session_state["analysis_done"]: st.markdown('

Step 4 — Results Dashboard

', unsafe_allow_html=True) st.info("⬆️ Complete Steps 1–3 above to see results here.")