Spaces:
Sleeping
Sleeping
| import csv | |
| import pandas as pd | |
| import streamlit as st | |
| def render(get_conn, get_lookup_table, extract_pmp): | |
| st.header("π Comscore Site List") | |
| # 1) Composition Index Threshold input | |
| threshold = st.number_input( | |
| "Composition Index Threshold", | |
| min_value=0.0, | |
| step=1.0, | |
| value=150.0, | |
| help="Show only rows where Composition Index UV β₯ this value", | |
| ) | |
| # 2) Exclude Codes selector | |
| # ββ Fetch all distinct Codes for the dropdown | |
| conn_codes = get_conn() | |
| cs_codes = conn_codes.cursor() | |
| cs_codes.execute( | |
| """ | |
| SELECT DISTINCT Codes | |
| FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5 | |
| ORDER BY Codes | |
| """ | |
| ) | |
| all_codes = [row[0] for row in cs_codes.fetchall()] | |
| cs_codes.close() | |
| conn_codes.close() | |
| exclude_codes = st.multiselect( | |
| "Exclude Codes", | |
| options=all_codes, | |
| default=["bsopt_23"] if "bsopt_23" in all_codes else [], | |
| help="Select Codes whose SITEIDs you want to filter out of the main table", | |
| ) | |
| # 3) File uploader | |
| upload = st.file_uploader( | |
| "Upload your Comscore CSV", type="csv", key="comscore_uploader" | |
| ) | |
| if not upload: | |
| st.info("Please upload your file from Comscore.") | |
| else: | |
| try: | |
| # βββ Parse the raw into rows βββββββββββββββββββββββββββββββββββββββ | |
| raw_text = upload.read().decode("latin-1") | |
| lines = raw_text.splitlines() | |
| reader = csv.reader(lines, skipinitialspace=True) | |
| rows = list(reader) | |
| # βββ Locate your three headers anywhere in the first N lines βββββββ | |
| required = ["Media", "Target Audience (000)", "Composition Index UV"] | |
| header_pos = {} | |
| for i, row in enumerate(rows[:30]): | |
| cleaned = [cell.strip() for cell in row] | |
| for col in required: | |
| if col not in header_pos and col in cleaned: | |
| header_pos[col] = i | |
| if len(header_pos) == len(required): | |
| break | |
| missing = [c for c in required if c not in header_pos] | |
| if missing: | |
| st.error(f"Missing required column(s): {', '.join(missing)}") | |
| st.stop() | |
| # map header names β column indices | |
| idx_map = {} | |
| for col, prow in header_pos.items(): | |
| cleaned = [cell.strip() for cell in rows[prow]] | |
| idx_map[col] = cleaned.index(col) | |
| # data starts right after the bottom header row | |
| data_start = max(header_pos.values()) + 1 | |
| data_rows = rows[data_start:] | |
| # pull out exactly those three columns | |
| data = [] | |
| for row in data_rows: | |
| if not row: | |
| continue | |
| vals = [] | |
| for col in required: | |
| idx = idx_map[col] | |
| vals.append(row[idx].strip() if idx < len(row) else None) | |
| data.append(vals) | |
| # build dataframe & coerce types | |
| df_clean = pd.DataFrame(data, columns=required) | |
| df_clean["Target Audience (000)"] = pd.to_numeric( | |
| df_clean["Target Audience (000)"], errors="coerce" | |
| ) | |
| df_clean["Composition Index UV"] = pd.to_numeric( | |
| df_clean["Composition Index UV"], errors="coerce" | |
| ) | |
| # βββ ORIGINAL Snowflake lookup & merge βββββββββββββββββββββββββββ | |
| df_lookup = ( | |
| get_lookup_table() | |
| ) # DISCUSSION: this still uses your SIGMA_SCRATCH view | |
| lookup_small = df_lookup[ | |
| ["entity", "site_id", "site_name", "status", "ad_options"] | |
| ] | |
| df_merged = pd.merge( | |
| df_clean, | |
| lookup_small, | |
| how="left", | |
| left_on="Media", | |
| right_on="entity", | |
| ).drop(columns=["entity"]) | |
| df_merged["pmp"] = ( | |
| df_merged["ad_options"] | |
| .astype(str) | |
| .apply(extract_pmp) | |
| .map({True: "Yes", False: "No"}) | |
| .fillna("No") | |
| ) | |
| # βββ NEW: fetch SITEIDs to exclude based on Codes ββββββββββββββββ | |
| if exclude_codes: | |
| conn_ex = get_conn() | |
| cs_ex = conn_ex.cursor() | |
| quoted = ", ".join(f"'{c}'" for c in exclude_codes) | |
| cs_ex.execute( | |
| f""" | |
| SELECT DISTINCT SITEID | |
| FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5 | |
| WHERE Codes IN ({quoted}) | |
| """ | |
| ) | |
| excluded_site_ids = {row[0] for row in cs_ex.fetchall()} | |
| cs_ex.close() | |
| conn_ex.close() | |
| else: | |
| excluded_site_ids = set() | |
| # βββ Combined filter βββββββββββββββββββββββββββββββββββββββββββββ | |
| df_filtered = df_merged[ | |
| (df_merged["Composition Index UV"] >= threshold) | |
| & (df_merged["site_name"].notna()) | |
| & (df_merged["status"] != "Dropped") | |
| & (df_merged["pmp"] == "Yes") | |
| & (~df_merged["site_id"].isin(excluded_site_ids)) | |
| ] | |
| # βββ drop extras, sort, display & export ββββββββββββββββββββββββββ | |
| df_display = ( | |
| df_filtered.drop(columns=["ad_options"]) | |
| .sort_values("Target Audience (000)", ascending=False) | |
| .reset_index(drop=True) | |
| ) | |
| if df_display.empty: | |
| st.warning("No rows meet the criteria.") | |
| else: | |
| st.success(f"Showing {len(df_display)} rows after filtering") | |
| st.dataframe(df_display) | |
| # Export β GAM | |
| site_ids = df_display["site_id"].dropna().astype(str) | |
| gam_str = ",".join(site_ids) | |
| gam_df = pd.DataFrame({"site_ids": [gam_str]}) | |
| gam_csv = gam_df.to_csv(index=False, header=False) | |
| st.download_button( | |
| label="Export - GAM", | |
| data=gam_csv, | |
| file_name="gam_export.csv", | |
| mime="text/csv", | |
| ) | |
| # Export β Client Facing | |
| client_df = df_display[["site_name", "Media"]].rename( | |
| columns={"Media": "Domain"} | |
| ) | |
| client_csv = client_df.to_csv(index=False) | |
| st.download_button( | |
| label="Export - Client Facing", | |
| data=client_csv, | |
| file_name="client_facing_export.csv", | |
| mime="text/csv", | |
| ) | |
| # Export β Site Names | |
| site_names = df_display["site_name"].dropna().astype(str) | |
| names_str = ",".join(site_names) | |
| names_df = pd.DataFrame({"site_names": [names_str]}) | |
| names_csv = names_df.to_csv(index=False, header=False) | |
| st.download_button( | |
| label="Export - Site Names", | |
| data=names_csv, | |
| file_name="site_names_export.csv", | |
| mime="text/csv", | |
| ) | |
| except Exception as e: | |
| st.error(f"Could not parse, merge, or export file: {e}") | |