File size: 7,938 Bytes
bc7640d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
import csv
import pandas as pd
import streamlit as st


def render(get_conn, get_lookup_table, extract_pmp):
    st.header("πŸ“Š Comscore Site List")

    # 1) Composition Index Threshold input
    threshold = st.number_input(
        "Composition Index Threshold",
        min_value=0.0,
        step=1.0,
        value=150.0,
        help="Show only rows where Composition Index UV β‰₯ this value",
    )

    # 2) Exclude Codes selector
    # ── Fetch all distinct Codes for the dropdown
    conn_codes = get_conn()
    cs_codes = conn_codes.cursor()
    cs_codes.execute(
        """
        SELECT DISTINCT Codes
        FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
        ORDER BY Codes
    """
    )
    all_codes = [row[0] for row in cs_codes.fetchall()]
    cs_codes.close()
    conn_codes.close()

    exclude_codes = st.multiselect(
        "Exclude Codes",
        options=all_codes,
        default=["bsopt_23"] if "bsopt_23" in all_codes else [],
        help="Select Codes whose SITEIDs you want to filter out of the main table",
    )

    # 3) File uploader
    upload = st.file_uploader(
        "Upload your Comscore CSV", type="csv", key="comscore_uploader"
    )
    if not upload:
        st.info("Please upload your file from Comscore.")
    else:
        try:
            # ─── Parse the raw into rows ───────────────────────────────────────
            raw_text = upload.read().decode("latin-1")
            lines = raw_text.splitlines()
            reader = csv.reader(lines, skipinitialspace=True)
            rows = list(reader)

            # ─── Locate your three headers anywhere in the first N lines ───────
            required = ["Media", "Target Audience (000)", "Composition Index UV"]
            header_pos = {}
            for i, row in enumerate(rows[:30]):
                cleaned = [cell.strip() for cell in row]
                for col in required:
                    if col not in header_pos and col in cleaned:
                        header_pos[col] = i
                if len(header_pos) == len(required):
                    break

            missing = [c for c in required if c not in header_pos]
            if missing:
                st.error(f"Missing required column(s): {', '.join(missing)}")
                st.stop()

            # map header names β†’ column indices
            idx_map = {}
            for col, prow in header_pos.items():
                cleaned = [cell.strip() for cell in rows[prow]]
                idx_map[col] = cleaned.index(col)

            # data starts right after the bottom header row
            data_start = max(header_pos.values()) + 1
            data_rows = rows[data_start:]

            # pull out exactly those three columns
            data = []
            for row in data_rows:
                if not row:
                    continue
                vals = []
                for col in required:
                    idx = idx_map[col]
                    vals.append(row[idx].strip() if idx < len(row) else None)
                data.append(vals)

            # build dataframe & coerce types
            df_clean = pd.DataFrame(data, columns=required)
            df_clean["Target Audience (000)"] = pd.to_numeric(
                df_clean["Target Audience (000)"], errors="coerce"
            )
            df_clean["Composition Index UV"] = pd.to_numeric(
                df_clean["Composition Index UV"], errors="coerce"
            )

            # ─── ORIGINAL Snowflake lookup & merge ───────────────────────────
            df_lookup = (
                get_lookup_table()
            )  # DISCUSSION: this still uses your SIGMA_SCRATCH view
            lookup_small = df_lookup[
                ["entity", "site_id", "site_name", "status", "ad_options"]
            ]
            df_merged = pd.merge(
                df_clean,
                lookup_small,
                how="left",
                left_on="Media",
                right_on="entity",
            ).drop(columns=["entity"])
            df_merged["pmp"] = (
                df_merged["ad_options"]
                .astype(str)
                .apply(extract_pmp)
                .map({True: "Yes", False: "No"})
                .fillna("No")
            )

            # ─── NEW: fetch SITEIDs to exclude based on Codes ────────────────
            if exclude_codes:
                conn_ex = get_conn()
                cs_ex = conn_ex.cursor()
                quoted = ", ".join(f"'{c}'" for c in exclude_codes)
                cs_ex.execute(
                    f"""
                    SELECT DISTINCT SITEID
                    FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
                    WHERE Codes IN ({quoted})
                """
                )
                excluded_site_ids = {row[0] for row in cs_ex.fetchall()}
                cs_ex.close()
                conn_ex.close()
            else:
                excluded_site_ids = set()

            # ─── Combined filter ─────────────────────────────────────────────
            df_filtered = df_merged[
                (df_merged["Composition Index UV"] >= threshold)
                & (df_merged["site_name"].notna())
                & (df_merged["status"] != "Dropped")
                & (df_merged["pmp"] == "Yes")
                & (~df_merged["site_id"].isin(excluded_site_ids))
            ]

            # ─── drop extras, sort, display & export ──────────────────────────
            df_display = (
                df_filtered.drop(columns=["ad_options"])
                .sort_values("Target Audience (000)", ascending=False)
                .reset_index(drop=True)
            )

            if df_display.empty:
                st.warning("No rows meet the criteria.")
            else:
                st.success(f"Showing {len(df_display)} rows after filtering")
                st.dataframe(df_display)

                # Export – GAM
                site_ids = df_display["site_id"].dropna().astype(str)
                gam_str = ",".join(site_ids)
                gam_df = pd.DataFrame({"site_ids": [gam_str]})
                gam_csv = gam_df.to_csv(index=False, header=False)
                st.download_button(
                    label="Export - GAM",
                    data=gam_csv,
                    file_name="gam_export.csv",
                    mime="text/csv",
                )

                # Export – Client Facing
                client_df = df_display[["site_name", "Media"]].rename(
                    columns={"Media": "Domain"}
                )
                client_csv = client_df.to_csv(index=False)
                st.download_button(
                    label="Export - Client Facing",
                    data=client_csv,
                    file_name="client_facing_export.csv",
                    mime="text/csv",
                )

                # Export – Site Names
                site_names = df_display["site_name"].dropna().astype(str)
                names_str = ",".join(site_names)
                names_df = pd.DataFrame({"site_names": [names_str]})
                names_csv = names_df.to_csv(index=False, header=False)
                st.download_button(
                    label="Export - Site Names",
                    data=names_csv,
                    file_name="site_names_export.csv",
                    mime="text/csv",
                )

        except Exception as e:
            st.error(f"Could not parse, merge, or export file: {e}")