Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| import time | |
| from utils.data_processor import detect_columns, clean_and_tier, get_tier_summary, TIER_META | |
| from utils.embeddings import find_merge_candidates, load_embedder | |
| from utils.llm_client import MODEL_OPTIONS, test_connection, enrich_merge_pairs | |
| from utils.excel_builder import build_excel | |
| # ββ Page config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.set_page_config( | |
| page_title="Blog Audit Tool", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded", | |
| ) | |
| # ββ Global CSS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown(""" | |
| <style> | |
| .block-container { padding-top: 1.5rem; } | |
| .metric-card { | |
| background: #F8F9FA; | |
| border: 1px solid #E0E0E0; | |
| border-radius: 10px; | |
| padding: 16px 20px; | |
| text-align: center; | |
| } | |
| .metric-card .value { font-size: 2rem; font-weight: 700; } | |
| .metric-card .label { font-size: 0.82rem; color: #666; margin-top: 2px; } | |
| .section-header { | |
| font-size: 1.15rem; font-weight: 700; | |
| color: #1A1A2E; margin: 1.2rem 0 0.4rem 0; | |
| padding-bottom: 4px; border-bottom: 2px solid #4F46E5; | |
| display: inline-block; | |
| } | |
| .status-ok { color: #2ECC71; font-weight: 600; } | |
| .status-err { color: #E74C3C; font-weight: 600; } | |
| div[data-testid="stDataFrame"] { border-radius: 8px; } | |
| .stButton > button { | |
| border-radius: 8px; font-weight: 600; | |
| } | |
| .approve-row { background: #E8F5E9 !important; } | |
| .reject-row { background: #FFEBEE !important; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ββ Session state init ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| for key in ["df_clean", "col_map", "merge_pairs", "analysis_done", | |
| "api_key", "model_id", "model_label", "site_name"]: | |
| if key not in st.session_state: | |
| st.session_state[key] = None | |
| if "analysis_done" not in st.session_state or st.session_state["analysis_done"] is None: | |
| st.session_state["analysis_done"] = False | |
| # ββ Sidebar βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with st.sidebar: | |
| st.image("https://via.placeholder.com/200x50/4F46E5/FFFFFF?text=Blog+Audit", width=200) | |
| st.markdown("---") | |
| st.markdown("### π Steps") | |
| st.markdown(""" | |
| 1. π Upload your CSV | |
| 2. π Enter API key & pick model | |
| 3. π Run analysis | |
| 4. π Review results | |
| 5. πΎ Download report | |
| """) | |
| st.markdown("---") | |
| st.markdown("### βΉοΈ CSV Format") | |
| st.markdown(""" | |
| Your CSV must have: | |
| - A **URL** column (`Blogs` or similar) | |
| - A **title** column (`H1` or `Title`) | |
| - **4 monthly click** columns (any names) | |
| - A **Total Clicks** column | |
| """) | |
| st.markdown("---") | |
| st.caption("Blog Audit Tool β works for any website") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HEADER | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.title("π Blog Performance Audit Tool") | |
| st.markdown("Upload any website's blog click data CSV to get a full performance audit with AI-powered merge recommendations.") | |
| st.markdown("---") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # STEP 1 β UPLOAD CSV | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown('<p class="section-header">Step 1 β Upload Your CSV</p>', unsafe_allow_html=True) | |
| col_up1, col_up2 = st.columns([2, 1]) | |
| with col_up1: | |
| uploaded_file = st.file_uploader( | |
| "Upload your blog clicks CSV", | |
| type=["csv"], | |
| help="CSV with URL column, H1/title column, 4 monthly click columns, and Total Clicks column.", | |
| label_visibility="collapsed", | |
| ) | |
| with col_up2: | |
| site_name_input = st.text_input( | |
| "Website / Brand name (for report)", | |
| value="My Website", | |
| placeholder="e.g. Edstellar, TechBlog...", | |
| ) | |
| if uploaded_file: | |
| try: | |
| df_raw = pd.read_csv(uploaded_file) | |
| col_map = detect_columns(df_raw) | |
| # Validate detection | |
| if not col_map["url_col"]: | |
| st.error("β Could not detect a URL column. Make sure your CSV has a column with URLs (e.g. 'Blogs').") | |
| st.stop() | |
| if not col_map["month_cols"]: | |
| st.error("β Could not detect monthly click columns. Ensure your numeric click columns are present.") | |
| st.stop() | |
| df_clean = clean_and_tier(df_raw, col_map) | |
| st.session_state["df_clean"] = df_clean | |
| st.session_state["col_map"] = col_map | |
| st.session_state["site_name"] = site_name_input | |
| # Preview | |
| with st.expander("π CSV Preview & Column Detection", expanded=True): | |
| c1, c2, c3, c4 = st.columns(4) | |
| c1.metric("Total Blogs", len(df_clean)) | |
| c2.metric("URL Column", col_map["url_col"]) | |
| c3.metric("Title Column", col_map["title_col"] or "β") | |
| c4.metric("Month Columns", len(col_map["month_cols"])) | |
| st.markdown(f"**Detected month columns:** `{'` Β· `'.join(col_map['month_cols'])}`") | |
| st.dataframe(df_clean[["url", "title"] + col_map["month_cols"] + ["total_clicks", "tier", "trend"]].head(8), | |
| use_container_width=True, height=220) | |
| st.success(f"β File loaded: **{len(df_clean)} blogs** detected.") | |
| except Exception as e: | |
| st.error(f"β Error reading CSV: {e}") | |
| st.stop() | |
| st.markdown("---") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # STEP 2 β AI CONFIGURATION | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown('<p class="section-header">Step 2 β AI Configuration (OpenRouter)</p>', unsafe_allow_html=True) | |
| st.caption("Get a free API key at [openrouter.ai/keys](https://openrouter.ai/keys)") | |
| ai_col1, ai_col2, ai_col3 = st.columns([2, 2, 1]) | |
| with ai_col1: | |
| api_key_input = st.text_input( | |
| "π OpenRouter API Key", | |
| type="password", | |
| placeholder="sk-or-v1-xxxxxxxxxxxxxxxxxxxxxxxx", | |
| value=st.secrets.get("OPENROUTER_API_KEY", "") if hasattr(st, "secrets") else "", | |
| ) | |
| with ai_col2: | |
| model_label = st.selectbox( | |
| "π§ Select LLM Model", | |
| options=list(MODEL_OPTIONS.keys()), | |
| index=0, | |
| ) | |
| with ai_col3: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| test_btn = st.button("π Test Connection", use_container_width=True) | |
| if test_btn: | |
| if not api_key_input: | |
| st.warning("β οΈ Please enter your API key first.") | |
| else: | |
| with st.spinner("Testing connection..."): | |
| ok, msg = test_connection(api_key_input, MODEL_OPTIONS[model_label]) | |
| if ok: | |
| st.session_state["api_key"] = api_key_input | |
| st.session_state["model_id"] = MODEL_OPTIONS[model_label] | |
| st.session_state["model_label"] = model_label | |
| st.success(f"β {msg} Model: **{model_label}**") | |
| else: | |
| st.error(f"β {msg}") | |
| # Auto-save key if already entered without test | |
| if api_key_input and not test_btn: | |
| st.session_state["api_key"] = api_key_input | |
| st.session_state["model_id"] = MODEL_OPTIONS[model_label] | |
| st.session_state["model_label"] = model_label | |
| st.markdown("---") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # STEP 3 β RUN ANALYSIS | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown('<p class="section-header">Step 3 β Run Analysis</p>', unsafe_allow_html=True) | |
| run_ready = ( | |
| st.session_state.get("df_clean") is not None | |
| and st.session_state.get("api_key") | |
| ) | |
| if not run_ready: | |
| st.info("β¬οΈ Complete Steps 1 and 2 above before running the analysis.") | |
| run_btn = st.button( | |
| "π Run Full Analysis", | |
| disabled=not run_ready, | |
| use_container_width=False, | |
| type="primary", | |
| ) | |
| if run_btn: | |
| df_clean = st.session_state["df_clean"] | |
| col_map = st.session_state["col_map"] | |
| api_key = st.session_state["api_key"] | |
| model_id = st.session_state["model_id"] | |
| model_label = st.session_state["model_label"] | |
| progress_bar = st.progress(0, text="Starting analysis...") | |
| status_box = st.empty() | |
| # Step A β Embeddings | |
| status_box.info("π§ Step 1/3 β Loading embedding model and generating title vectors...") | |
| progress_bar.progress(10, text="Loading embedding model...") | |
| with st.spinner("Generating embeddings (this may take 30β60s on first run)..."): | |
| load_embedder() # warm up cache | |
| merge_candidates = find_merge_candidates(df_clean, threshold=0.72, max_weak_clicks=200) | |
| progress_bar.progress(45, text=f"Found {len(merge_candidates)} merge candidate pairs.") | |
| status_box.success(f"β Embeddings done β {len(merge_candidates)} potential merge pairs found.") | |
| time.sleep(0.4) | |
| # Step B β LLM enrichment | |
| if merge_candidates: | |
| status_box.info(f"π€ Step 2/3 β Sending {len(merge_candidates)} pairs to {model_label} for merge reasoning...") | |
| progress_bar.progress(50, text="LLM analysis in progress...") | |
| llm_progress = st.empty() | |
| llm_prog_bar = st.progress(0) | |
| def llm_cb(done, total): | |
| pct = int(done / total * 100) | |
| llm_prog_bar.progress(pct, text=f"LLM: {done}/{total} pairs processed") | |
| enriched_pairs = enrich_merge_pairs( | |
| merge_candidates, api_key, model_id, | |
| batch_size=10, progress_callback=llm_cb, | |
| ) | |
| llm_prog_bar.empty() | |
| llm_progress.empty() | |
| else: | |
| enriched_pairs = [] | |
| st.session_state["merge_pairs"] = enriched_pairs | |
| progress_bar.progress(90, text="Building report...") | |
| # Step C β Done | |
| status_box.success(f"β Analysis complete! {len(enriched_pairs)} merge recommendations generated.") | |
| progress_bar.progress(100, text="β Analysis complete!") | |
| st.session_state["analysis_done"] = True | |
| time.sleep(0.5) | |
| st.rerun() | |
| st.markdown("---") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # STEP 4 β RESULTS DASHBOARD | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if st.session_state["analysis_done"] and st.session_state["df_clean"] is not None: | |
| df_clean = st.session_state["df_clean"] | |
| col_map = st.session_state["col_map"] | |
| month_cols = col_map["month_cols"] | |
| merge_pairs = st.session_state.get("merge_pairs", []) | |
| site_name = st.session_state.get("site_name", "Website") | |
| st.markdown('<p class="section-header">Step 4 β Results Dashboard</p>', unsafe_allow_html=True) | |
| counts = get_tier_summary(df_clean) | |
| # ββ KPI metric row ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| m_cols = st.columns(7) | |
| kpis = [ | |
| ("Total Blogs", len(df_clean), "#4F46E5"), | |
| ("π΄ Take Down", counts["TAKEDOWN_ZERO"] + counts["TAKEDOWN_LOW"], "#E74C3C"), | |
| ("π΅ Merge", len(merge_pairs), "#2980B9"), | |
| ("π‘ Monitor", counts["MONITOR"], "#F39C12"), | |
| ("β Performing", counts["PERFORMING"], "#27AE60"), | |
| ("πͺ Strong", counts["STRONG"], "#1ABC9C"), | |
| ("π Top", counts["TOP"], "#9B59B6"), | |
| ] | |
| for col_w, (label, val, color) in zip(m_cols, kpis): | |
| col_w.markdown( | |
| f'<div class="metric-card">' | |
| f'<div class="value" style="color:{color}">{val}</div>' | |
| f'<div class="label">{label}</div>' | |
| f'</div>', | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| # ββ Charts ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| chart_col1, chart_col2 = st.columns(2) | |
| with chart_col1: | |
| st.markdown("**Click Distribution by Tier**") | |
| tier_labels = [TIER_META[t]["label"] for t in counts] | |
| tier_values = list(counts.values()) | |
| tier_colors = [TIER_META[t]["color"] for t in counts] | |
| fig_pie = go.Figure(go.Pie( | |
| labels=tier_labels, values=tier_values, | |
| marker_colors=tier_colors, | |
| hole=0.45, textinfo="label+percent", | |
| )) | |
| fig_pie.update_layout( | |
| showlegend=False, margin=dict(t=10, b=10, l=10, r=10), height=280 | |
| ) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| with chart_col2: | |
| st.markdown("**Monthly Click Trend (Top 5 Blogs)**") | |
| top5 = df_clean.nlargest(5, "total_clicks") | |
| fig_line = go.Figure() | |
| for _, row in top5.iterrows(): | |
| short_title = row["title"][:40] + "β¦" if len(row["title"]) > 40 else row["title"] | |
| fig_line.add_trace(go.Scatter( | |
| x=month_cols, | |
| y=[row[m] for m in month_cols], | |
| mode="lines+markers", | |
| name=short_title, | |
| )) | |
| fig_line.update_layout( | |
| margin=dict(t=10, b=10, l=10, r=10), height=280, | |
| legend=dict(font=dict(size=9)), | |
| xaxis_title="Month", yaxis_title="Clicks", | |
| ) | |
| st.plotly_chart(fig_line, use_container_width=True) | |
| st.markdown("---") | |
| # ββ Result Tabs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| tab_td, tab_merge, tab_mon, tab_perf, tab_str, tab_top = st.tabs([ | |
| f"π΄ Take Down ({counts['TAKEDOWN_ZERO'] + counts['TAKEDOWN_LOW']})", | |
| f"π΅ Merge ({len(merge_pairs)})", | |
| f"π‘ Monitor ({counts['MONITOR']})", | |
| f"β Performing ({counts['PERFORMING']})", | |
| f"πͺ Strong ({counts['STRONG']})", | |
| f"π Top ({counts['TOP']})", | |
| ]) | |
| display_cols_base = ["url", "title"] + month_cols + ["total_clicks", "trend", "action"] | |
| # ββ Take Down tab ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_td: | |
| st.markdown("Blogs with zero or negligible traffic. Recommended for removal or merge.") | |
| td_df = df_clean[df_clean["tier"].isin(["TAKEDOWN_ZERO", "TAKEDOWN_LOW"])].sort_values("total_clicks") | |
| if len(td_df): | |
| # Search filter | |
| search_td = st.text_input("π Filter by URL or title", key="search_td", placeholder="Type to filter...") | |
| if search_td: | |
| mask = td_df["url"].str.contains(search_td, case=False, na=False) | \ | |
| td_df["title"].str.contains(search_td, case=False, na=False) | |
| td_df = td_df[mask] | |
| st.dataframe(td_df[display_cols_base], use_container_width=True, height=400) | |
| st.caption(f"{len(td_df)} blogs") | |
| else: | |
| st.success("π No take-down candidates found!") | |
| # ββ Merge tab ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with tab_merge: | |
| st.markdown("AI-detected topically overlapping blogs. Review and approve/reject each suggestion.") | |
| if merge_pairs: | |
| # Review interface | |
| st.markdown("**β = Keep in report β = Reject suggestion**") | |
| # Init approval state | |
| if "merge_approvals" not in st.session_state: | |
| st.session_state["merge_approvals"] = {i: True for i in range(len(merge_pairs))} | |
| col_filter, col_actions = st.columns([3, 1]) | |
| with col_filter: | |
| search_merge = st.text_input("π Filter merges", key="search_merge", placeholder="Filter by title or cluster...") | |
| with col_actions: | |
| st.markdown("<br>", unsafe_allow_html=True) | |
| if st.button("β Approve All", key="approve_all"): | |
| for i in range(len(merge_pairs)): | |
| st.session_state["merge_approvals"][i] = True | |
| st.rerun() | |
| # Display table | |
| merge_display = [] | |
| for i, p in enumerate(merge_pairs): | |
| if search_merge and search_merge.lower() not in p.get("weak_title","").lower() \ | |
| and search_merge.lower() not in p.get("strong_title","").lower() \ | |
| and search_merge.lower() not in p.get("topic_cluster","").lower(): | |
| continue | |
| approved = st.session_state["merge_approvals"].get(i, True) | |
| merge_display.append({ | |
| "#": i + 1, | |
| "Weak Blog (Merge FROM)": p.get("weak_title", "")[:60], | |
| "Weak Clicks": p.get("weak_clicks", 0), | |
| "Strong Blog (Merge INTO)":p.get("strong_title", "")[:60], | |
| "Strong Clicks": p.get("strong_clicks", 0), | |
| "Similarity": p.get("similarity", ""), | |
| "Topic Cluster": p.get("topic_cluster", ""), | |
| "AI Merge Reason": p.get("merge_reason", "")[:100], | |
| "Approved": "β Yes" if approved else "β Rejected", | |
| }) | |
| merge_df_display = pd.DataFrame(merge_display) | |
| st.dataframe(merge_df_display, use_container_width=True, height=380) | |
| # Approve/Reject individual rows | |
| st.markdown("**Toggle individual suggestions:**") | |
| toggle_cols = st.columns(5) | |
| for i, p in enumerate(merge_pairs[:20]): # show first 20 toggles | |
| col_idx = i % 5 | |
| with toggle_cols[col_idx]: | |
| current = st.session_state["merge_approvals"].get(i, True) | |
| label = f"#{i+1}: {p.get('weak_title','')[:25]}β¦" | |
| new_val = st.checkbox(label, value=current, key=f"merge_toggle_{i}") | |
| st.session_state["merge_approvals"][i] = new_val | |
| approved_count = sum(st.session_state["merge_approvals"].values()) | |
| st.caption(f"β {approved_count} approved Β· β {len(merge_pairs) - approved_count} rejected") | |
| else: | |
| st.info("No merge candidates detected above the similarity threshold.") | |
| # ββ Generic tier tab helper ββββββββββββββββββββββββββββββββββββββββββββββ | |
| def render_tier_tab(tier_key, tab_obj, search_key): | |
| with tab_obj: | |
| subset = df_clean[df_clean["tier"] == tier_key].sort_values("total_clicks", ascending=False) | |
| search = st.text_input("π Filter", key=search_key, placeholder="Filter by URL or title...") | |
| if search: | |
| mask = subset["url"].str.contains(search, case=False, na=False) | \ | |
| subset["title"].str.contains(search, case=False, na=False) | |
| subset = subset[mask] | |
| trend_filter = st.selectbox( | |
| "Trend filter", ["All", "π Growing", "π Declining", "β‘οΈ Stable"], | |
| key=f"{search_key}_trend" | |
| ) | |
| if trend_filter != "All": | |
| subset = subset[subset["trend"] == trend_filter] | |
| st.dataframe(subset[display_cols_base], use_container_width=True, height=400) | |
| st.caption(f"{len(subset)} blogs") | |
| render_tier_tab("MONITOR", tab_mon, "search_mon") | |
| render_tier_tab("PERFORMING", tab_perf, "search_perf") | |
| render_tier_tab("STRONG", tab_str, "search_str") | |
| render_tier_tab("TOP", tab_top, "search_top") | |
| st.markdown("---") | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # STEP 5 β DOWNLOAD REPORT | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| st.markdown('<p class="section-header">Step 5 β Download Excel Report</p>', unsafe_allow_html=True) | |
| # Apply approvals to merge pairs before export | |
| approvals = st.session_state.get("merge_approvals", {}) | |
| approved_pairs = [p for i, p in enumerate(merge_pairs) if approvals.get(i, True)] | |
| dl_col1, dl_col2 = st.columns([2, 3]) | |
| with dl_col1: | |
| with st.spinner("Building Excel report..."): | |
| excel_bytes = build_excel( | |
| df_clean, approved_pairs, month_cols, | |
| site_name=st.session_state.get("site_name", "Website"), | |
| ) | |
| filename = f"{site_name.replace(' ', '_').lower()}_blog_audit_report.xlsx" | |
| st.download_button( | |
| label="β¬οΈ Download 7-Tab Excel Report", | |
| data=excel_bytes, | |
| file_name=filename, | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
| type="primary", | |
| use_container_width=True, | |
| ) | |
| with dl_col2: | |
| st.markdown(f""" | |
| **Report includes 7 tabs:** | |
| - π Summary Dashboard | |
| - π΄ Take Down ({counts['TAKEDOWN_ZERO'] + counts['TAKEDOWN_LOW']} blogs) | |
| - π΅ Merge Recommendations ({len(approved_pairs)} approved pairs) | |
| - π‘ Monitor Β· β Performing Β· πͺ Strong Β· π Top Performers | |
| """) | |
| st.caption(f"Model used: {st.session_state.get('model_label','β')}") | |
| elif not st.session_state["analysis_done"]: | |
| st.markdown('<p class="section-header">Step 4 β Results Dashboard</p>', unsafe_allow_html=True) | |
| st.info("β¬οΈ Complete Steps 1β3 above to see results here.") | |