Spaces:

Avisut
/

Sales_Toolkit

Sleeping

File size: 7,938 Bytes

bc7640d

import csv
import pandas as pd
import streamlit as st


def render(get_conn, get_lookup_table, extract_pmp):
    st.header("📊 Comscore Site List")

    # 1) Composition Index Threshold input
    threshold = st.number_input(
        "Composition Index Threshold",
        min_value=0.0,
        step=1.0,
        value=150.0,
        help="Show only rows where Composition Index UV ≥ this value",
    )

    # 2) Exclude Codes selector
    # ── Fetch all distinct Codes for the dropdown
    conn_codes = get_conn()
    cs_codes = conn_codes.cursor()
    cs_codes.execute(
        """
        SELECT DISTINCT Codes
        FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
        ORDER BY Codes
    """
    )
    all_codes = [row[0] for row in cs_codes.fetchall()]
    cs_codes.close()
    conn_codes.close()

    exclude_codes = st.multiselect(
        "Exclude Codes",
        options=all_codes,
        default=["bsopt_23"] if "bsopt_23" in all_codes else [],
        help="Select Codes whose SITEIDs you want to filter out of the main table",
    )

    # 3) File uploader
    upload = st.file_uploader(
        "Upload your Comscore CSV", type="csv", key="comscore_uploader"
    )
    if not upload:
        st.info("Please upload your file from Comscore.")
    else:
        try:
            # ─── Parse the raw into rows ───────────────────────────────────────
            raw_text = upload.read().decode("latin-1")
            lines = raw_text.splitlines()
            reader = csv.reader(lines, skipinitialspace=True)
            rows = list(reader)

            # ─── Locate your three headers anywhere in the first N lines ───────
            required = ["Media", "Target Audience (000)", "Composition Index UV"]
            header_pos = {}
            for i, row in enumerate(rows[:30]):
                cleaned = [cell.strip() for cell in row]
                for col in required:
                    if col not in header_pos and col in cleaned:
                        header_pos[col] = i
                if len(header_pos) == len(required):
                    break

            missing = [c for c in required if c not in header_pos]
            if missing:
                st.error(f"Missing required column(s): {', '.join(missing)}")
                st.stop()

            # map header names → column indices
            idx_map = {}
            for col, prow in header_pos.items():
                cleaned = [cell.strip() for cell in rows[prow]]
                idx_map[col] = cleaned.index(col)

            # data starts right after the bottom header row
            data_start = max(header_pos.values()) + 1
            data_rows = rows[data_start:]

            # pull out exactly those three columns
            data = []
            for row in data_rows:
                if not row:
                    continue
                vals = []
                for col in required:
                    idx = idx_map[col]
                    vals.append(row[idx].strip() if idx < len(row) else None)
                data.append(vals)

            # build dataframe & coerce types
            df_clean = pd.DataFrame(data, columns=required)
            df_clean["Target Audience (000)"] = pd.to_numeric(
                df_clean["Target Audience (000)"], errors="coerce"
            )
            df_clean["Composition Index UV"] = pd.to_numeric(
                df_clean["Composition Index UV"], errors="coerce"
            )

            # ─── ORIGINAL Snowflake lookup & merge ───────────────────────────
            df_lookup = (
                get_lookup_table()
            )  # DISCUSSION: this still uses your SIGMA_SCRATCH view
            lookup_small = df_lookup[
                ["entity", "site_id", "site_name", "status", "ad_options"]
            ]
            df_merged = pd.merge(
                df_clean,
                lookup_small,
                how="left",
                left_on="Media",
                right_on="entity",
            ).drop(columns=["entity"])
            df_merged["pmp"] = (
                df_merged["ad_options"]
                .astype(str)
                .apply(extract_pmp)
                .map({True: "Yes", False: "No"})
                .fillna("No")
            )

            # ─── NEW: fetch SITEIDs to exclude based on Codes ────────────────
            if exclude_codes:
                conn_ex = get_conn()
                cs_ex = conn_ex.cursor()
                quoted = ", ".join(f"'{c}'" for c in exclude_codes)
                cs_ex.execute(
                    f"""
                    SELECT DISTINCT SITEID
                    FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
                    WHERE Codes IN ({quoted})
                """
                )
                excluded_site_ids = {row[0] for row in cs_ex.fetchall()}
                cs_ex.close()
                conn_ex.close()
            else:
                excluded_site_ids = set()

            # ─── Combined filter ─────────────────────────────────────────────
            df_filtered = df_merged[
                (df_merged["Composition Index UV"] >= threshold)
                & (df_merged["site_name"].notna())
                & (df_merged["status"] != "Dropped")
                & (df_merged["pmp"] == "Yes")
                & (~df_merged["site_id"].isin(excluded_site_ids))
            ]

            # ─── drop extras, sort, display & export ──────────────────────────
            df_display = (
                df_filtered.drop(columns=["ad_options"])
                .sort_values("Target Audience (000)", ascending=False)
                .reset_index(drop=True)
            )

            if df_display.empty:
                st.warning("No rows meet the criteria.")
            else:
                st.success(f"Showing {len(df_display)} rows after filtering")
                st.dataframe(df_display)

                # Export – GAM
                site_ids = df_display["site_id"].dropna().astype(str)
                gam_str = ",".join(site_ids)
                gam_df = pd.DataFrame({"site_ids": [gam_str]})
                gam_csv = gam_df.to_csv(index=False, header=False)
                st.download_button(
                    label="Export - GAM",
                    data=gam_csv,
                    file_name="gam_export.csv",
                    mime="text/csv",
                )

                # Export – Client Facing
                client_df = df_display[["site_name", "Media"]].rename(
                    columns={"Media": "Domain"}
                )
                client_csv = client_df.to_csv(index=False)
                st.download_button(
                    label="Export - Client Facing",
                    data=client_csv,
                    file_name="client_facing_export.csv",
                    mime="text/csv",
                )

                # Export – Site Names
                site_names = df_display["site_name"].dropna().astype(str)
                names_str = ",".join(site_names)
                names_df = pd.DataFrame({"site_names": [names_str]})
                names_csv = names_df.to_csv(index=False, header=False)
                st.download_button(
                    label="Export - Site Names",
                    data=names_csv,
                    file_name="site_names_export.csv",
                    mime="text/csv",
                )

        except Exception as e:
            st.error(f"Could not parse, merge, or export file: {e}")