Spaces:
Sleeping
Sleeping
File size: 7,938 Bytes
bc7640d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | import csv
import pandas as pd
import streamlit as st
def render(get_conn, get_lookup_table, extract_pmp):
st.header("π Comscore Site List")
# 1) Composition Index Threshold input
threshold = st.number_input(
"Composition Index Threshold",
min_value=0.0,
step=1.0,
value=150.0,
help="Show only rows where Composition Index UV β₯ this value",
)
# 2) Exclude Codes selector
# ββ Fetch all distinct Codes for the dropdown
conn_codes = get_conn()
cs_codes = conn_codes.cursor()
cs_codes.execute(
"""
SELECT DISTINCT Codes
FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
ORDER BY Codes
"""
)
all_codes = [row[0] for row in cs_codes.fetchall()]
cs_codes.close()
conn_codes.close()
exclude_codes = st.multiselect(
"Exclude Codes",
options=all_codes,
default=["bsopt_23"] if "bsopt_23" in all_codes else [],
help="Select Codes whose SITEIDs you want to filter out of the main table",
)
# 3) File uploader
upload = st.file_uploader(
"Upload your Comscore CSV", type="csv", key="comscore_uploader"
)
if not upload:
st.info("Please upload your file from Comscore.")
else:
try:
# βββ Parse the raw into rows βββββββββββββββββββββββββββββββββββββββ
raw_text = upload.read().decode("latin-1")
lines = raw_text.splitlines()
reader = csv.reader(lines, skipinitialspace=True)
rows = list(reader)
# βββ Locate your three headers anywhere in the first N lines βββββββ
required = ["Media", "Target Audience (000)", "Composition Index UV"]
header_pos = {}
for i, row in enumerate(rows[:30]):
cleaned = [cell.strip() for cell in row]
for col in required:
if col not in header_pos and col in cleaned:
header_pos[col] = i
if len(header_pos) == len(required):
break
missing = [c for c in required if c not in header_pos]
if missing:
st.error(f"Missing required column(s): {', '.join(missing)}")
st.stop()
# map header names β column indices
idx_map = {}
for col, prow in header_pos.items():
cleaned = [cell.strip() for cell in rows[prow]]
idx_map[col] = cleaned.index(col)
# data starts right after the bottom header row
data_start = max(header_pos.values()) + 1
data_rows = rows[data_start:]
# pull out exactly those three columns
data = []
for row in data_rows:
if not row:
continue
vals = []
for col in required:
idx = idx_map[col]
vals.append(row[idx].strip() if idx < len(row) else None)
data.append(vals)
# build dataframe & coerce types
df_clean = pd.DataFrame(data, columns=required)
df_clean["Target Audience (000)"] = pd.to_numeric(
df_clean["Target Audience (000)"], errors="coerce"
)
df_clean["Composition Index UV"] = pd.to_numeric(
df_clean["Composition Index UV"], errors="coerce"
)
# βββ ORIGINAL Snowflake lookup & merge βββββββββββββββββββββββββββ
df_lookup = (
get_lookup_table()
) # DISCUSSION: this still uses your SIGMA_SCRATCH view
lookup_small = df_lookup[
["entity", "site_id", "site_name", "status", "ad_options"]
]
df_merged = pd.merge(
df_clean,
lookup_small,
how="left",
left_on="Media",
right_on="entity",
).drop(columns=["entity"])
df_merged["pmp"] = (
df_merged["ad_options"]
.astype(str)
.apply(extract_pmp)
.map({True: "Yes", False: "No"})
.fillna("No")
)
# βββ NEW: fetch SITEIDs to exclude based on Codes ββββββββββββββββ
if exclude_codes:
conn_ex = get_conn()
cs_ex = conn_ex.cursor()
quoted = ", ".join(f"'{c}'" for c in exclude_codes)
cs_ex.execute(
f"""
SELECT DISTINCT SITEID
FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
WHERE Codes IN ({quoted})
"""
)
excluded_site_ids = {row[0] for row in cs_ex.fetchall()}
cs_ex.close()
conn_ex.close()
else:
excluded_site_ids = set()
# βββ Combined filter βββββββββββββββββββββββββββββββββββββββββββββ
df_filtered = df_merged[
(df_merged["Composition Index UV"] >= threshold)
& (df_merged["site_name"].notna())
& (df_merged["status"] != "Dropped")
& (df_merged["pmp"] == "Yes")
& (~df_merged["site_id"].isin(excluded_site_ids))
]
# βββ drop extras, sort, display & export ββββββββββββββββββββββββββ
df_display = (
df_filtered.drop(columns=["ad_options"])
.sort_values("Target Audience (000)", ascending=False)
.reset_index(drop=True)
)
if df_display.empty:
st.warning("No rows meet the criteria.")
else:
st.success(f"Showing {len(df_display)} rows after filtering")
st.dataframe(df_display)
# Export β GAM
site_ids = df_display["site_id"].dropna().astype(str)
gam_str = ",".join(site_ids)
gam_df = pd.DataFrame({"site_ids": [gam_str]})
gam_csv = gam_df.to_csv(index=False, header=False)
st.download_button(
label="Export - GAM",
data=gam_csv,
file_name="gam_export.csv",
mime="text/csv",
)
# Export β Client Facing
client_df = df_display[["site_name", "Media"]].rename(
columns={"Media": "Domain"}
)
client_csv = client_df.to_csv(index=False)
st.download_button(
label="Export - Client Facing",
data=client_csv,
file_name="client_facing_export.csv",
mime="text/csv",
)
# Export β Site Names
site_names = df_display["site_name"].dropna().astype(str)
names_str = ",".join(site_names)
names_df = pd.DataFrame({"site_names": [names_str]})
names_csv = names_df.to_csv(index=False, header=False)
st.download_button(
label="Export - Site Names",
data=names_csv,
file_name="site_names_export.csv",
mime="text/csv",
)
except Exception as e:
st.error(f"Could not parse, merge, or export file: {e}")
|