import streamlit as st import pandas as pd import plotly.graph_objects as go import plotly.express as px import time from utils.data_processor import detect_columns, clean_and_tier, get_tier_summary, TIER_META from utils.embeddings import find_merge_candidates, load_embedder from utils.llm_client import MODEL_OPTIONS, test_connection, enrich_merge_pairs from utils.excel_builder import build_excel # ── Page config ─────────────────────────────────────────────────────────────── st.set_page_config( page_title="Blog Audit Tool", page_icon="📊", layout="wide", initial_sidebar_state="expanded", ) # ── Global CSS ──────────────────────────────────────────────────────────────── st.markdown(""" """, unsafe_allow_html=True) # ── Session state init ──────────────────────────────────────────────────────── for key in ["df_clean", "col_map", "merge_pairs", "analysis_done", "api_key", "model_id", "model_label", "site_name"]: if key not in st.session_state: st.session_state[key] = None if "analysis_done" not in st.session_state or st.session_state["analysis_done"] is None: st.session_state["analysis_done"] = False # ── Sidebar ─────────────────────────────────────────────────────────────────── with st.sidebar: st.image("https://via.placeholder.com/200x50/4F46E5/FFFFFF?text=Blog+Audit", width=200) st.markdown("---") st.markdown("### 📋 Steps") st.markdown(""" 1. 📁 Upload your CSV 2. 🔑 Enter API key & pick model 3. 🚀 Run analysis 4. 📊 Review results 5. 💾 Download report """) st.markdown("---") st.markdown("### ℹ️ CSV Format") st.markdown(""" Your CSV must have: - A **URL** column (`Blogs` or similar) - A **title** column (`H1` or `Title`) - **4 monthly click** columns (any names) - A **Total Clicks** column """) st.markdown("---") st.caption("Blog Audit Tool — works for any website") # ══════════════════════════════════════════════════════════════════════════════ # HEADER # ══════════════════════════════════════════════════════════════════════════════ st.title("📊 Blog Performance Audit Tool") st.markdown("Upload any website's blog click data CSV to get a full performance audit with AI-powered merge recommendations.") st.markdown("---") # ══════════════════════════════════════════════════════════════════════════════ # STEP 1 — UPLOAD CSV # ══════════════════════════════════════════════════════════════════════════════ st.markdown('
Step 1 — Upload Your CSV
', unsafe_allow_html=True) col_up1, col_up2 = st.columns([2, 1]) with col_up1: uploaded_file = st.file_uploader( "Upload your blog clicks CSV", type=["csv"], help="CSV with URL column, H1/title column, 4 monthly click columns, and Total Clicks column.", label_visibility="collapsed", ) with col_up2: site_name_input = st.text_input( "Website / Brand name (for report)", value="My Website", placeholder="e.g. Edstellar, TechBlog...", ) if uploaded_file: try: df_raw = pd.read_csv(uploaded_file) col_map = detect_columns(df_raw) # Validate detection if not col_map["url_col"]: st.error("❌ Could not detect a URL column. Make sure your CSV has a column with URLs (e.g. 'Blogs').") st.stop() if not col_map["month_cols"]: st.error("❌ Could not detect monthly click columns. Ensure your numeric click columns are present.") st.stop() df_clean = clean_and_tier(df_raw, col_map) st.session_state["df_clean"] = df_clean st.session_state["col_map"] = col_map st.session_state["site_name"] = site_name_input # Preview with st.expander("📋 CSV Preview & Column Detection", expanded=True): c1, c2, c3, c4 = st.columns(4) c1.metric("Total Blogs", len(df_clean)) c2.metric("URL Column", col_map["url_col"]) c3.metric("Title Column", col_map["title_col"] or "—") c4.metric("Month Columns", len(col_map["month_cols"])) st.markdown(f"**Detected month columns:** `{'` · `'.join(col_map['month_cols'])}`") st.dataframe(df_clean[["url", "title"] + col_map["month_cols"] + ["total_clicks", "tier", "trend"]].head(8), use_container_width=True, height=220) st.success(f"✅ File loaded: **{len(df_clean)} blogs** detected.") except Exception as e: st.error(f"❌ Error reading CSV: {e}") st.stop() st.markdown("---") # ══════════════════════════════════════════════════════════════════════════════ # STEP 2 — AI CONFIGURATION # ══════════════════════════════════════════════════════════════════════════════ st.markdown('Step 2 — AI Configuration (OpenRouter)
', unsafe_allow_html=True) st.caption("Get a free API key at [openrouter.ai/keys](https://openrouter.ai/keys)") ai_col1, ai_col2, ai_col3 = st.columns([2, 2, 1]) with ai_col1: api_key_input = st.text_input( "🔑 OpenRouter API Key", type="password", placeholder="sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxx", value=st.secrets.get("OPENROUTER_API_KEY", "") if hasattr(st, "secrets") else "", ) with ai_col2: model_label = st.selectbox( "🧠 Select LLM Model", options=list(MODEL_OPTIONS.keys()), index=0, ) with ai_col3: st.markdown("Step 3 — Run Analysis
', unsafe_allow_html=True) run_ready = ( st.session_state.get("df_clean") is not None and st.session_state.get("api_key") ) if not run_ready: st.info("⬆️ Complete Steps 1 and 2 above before running the analysis.") run_btn = st.button( "🚀 Run Full Analysis", disabled=not run_ready, use_container_width=False, type="primary", ) if run_btn: df_clean = st.session_state["df_clean"] col_map = st.session_state["col_map"] api_key = st.session_state["api_key"] model_id = st.session_state["model_id"] model_label = st.session_state["model_label"] progress_bar = st.progress(0, text="Starting analysis...") status_box = st.empty() # Step A — Embeddings status_box.info("🧠 Step 1/3 — Loading embedding model and generating title vectors...") progress_bar.progress(10, text="Loading embedding model...") with st.spinner("Generating embeddings (this may take 30–60s on first run)..."): load_embedder() # warm up cache merge_candidates = find_merge_candidates(df_clean, threshold=0.72, max_weak_clicks=200) progress_bar.progress(45, text=f"Found {len(merge_candidates)} merge candidate pairs.") status_box.success(f"✅ Embeddings done — {len(merge_candidates)} potential merge pairs found.") time.sleep(0.4) # Step B — LLM enrichment if merge_candidates: status_box.info(f"🤖 Step 2/3 — Sending {len(merge_candidates)} pairs to {model_label} for merge reasoning...") progress_bar.progress(50, text="LLM analysis in progress...") llm_progress = st.empty() llm_prog_bar = st.progress(0) def llm_cb(done, total): pct = int(done / total * 100) llm_prog_bar.progress(pct, text=f"LLM: {done}/{total} pairs processed") enriched_pairs = enrich_merge_pairs( merge_candidates, api_key, model_id, batch_size=10, progress_callback=llm_cb, ) llm_prog_bar.empty() llm_progress.empty() else: enriched_pairs = [] st.session_state["merge_pairs"] = enriched_pairs progress_bar.progress(90, text="Building report...") # Step C — Done status_box.success(f"✅ Analysis complete! {len(enriched_pairs)} merge recommendations generated.") progress_bar.progress(100, text="✅ Analysis complete!") st.session_state["analysis_done"] = True time.sleep(0.5) st.rerun() st.markdown("---") # ══════════════════════════════════════════════════════════════════════════════ # STEP 4 — RESULTS DASHBOARD # ══════════════════════════════════════════════════════════════════════════════ if st.session_state["analysis_done"] and st.session_state["df_clean"] is not None: df_clean = st.session_state["df_clean"] col_map = st.session_state["col_map"] month_cols = col_map["month_cols"] merge_pairs = st.session_state.get("merge_pairs", []) site_name = st.session_state.get("site_name", "Website") st.markdown('Step 4 — Results Dashboard
', unsafe_allow_html=True) counts = get_tier_summary(df_clean) # ── KPI metric row ────────────────────────────────────────────────────── m_cols = st.columns(7) kpis = [ ("Total Blogs", len(df_clean), "#4F46E5"), ("🔴 Take Down", counts["TAKEDOWN_ZERO"] + counts["TAKEDOWN_LOW"], "#E74C3C"), ("🔵 Merge", len(merge_pairs), "#2980B9"), ("🟡 Monitor", counts["MONITOR"], "#F39C12"), ("✅ Performing", counts["PERFORMING"], "#27AE60"), ("💪 Strong", counts["STRONG"], "#1ABC9C"), ("🏆 Top", counts["TOP"], "#9B59B6"), ] for col_w, (label, val, color) in zip(m_cols, kpis): col_w.markdown( f'Step 5 — Download Excel Report
', unsafe_allow_html=True) # Apply approvals to merge pairs before export approvals = st.session_state.get("merge_approvals", {}) approved_pairs = [p for i, p in enumerate(merge_pairs) if approvals.get(i, True)] dl_col1, dl_col2 = st.columns([2, 3]) with dl_col1: with st.spinner("Building Excel report..."): excel_bytes = build_excel( df_clean, approved_pairs, month_cols, site_name=st.session_state.get("site_name", "Website"), ) filename = f"{site_name.replace(' ', '_').lower()}_blog_audit_report.xlsx" st.download_button( label="⬇️ Download 7-Tab Excel Report", data=excel_bytes, file_name=filename, mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", type="primary", use_container_width=True, ) with dl_col2: st.markdown(f""" **Report includes 7 tabs:** - 📊 Summary Dashboard - 🔴 Take Down ({counts['TAKEDOWN_ZERO'] + counts['TAKEDOWN_LOW']} blogs) - 🔵 Merge Recommendations ({len(approved_pairs)} approved pairs) - 🟡 Monitor · ✅ Performing · 💪 Strong · 🏆 Top Performers """) st.caption(f"Model used: {st.session_state.get('model_label','—')}") elif not st.session_state["analysis_done"]: st.markdown('Step 4 — Results Dashboard
', unsafe_allow_html=True) st.info("⬆️ Complete Steps 1–3 above to see results here.")