Spaces:

Avisut
/

Sales_Toolkit

Sleeping

Sales_Toolkit / comscore_site_list.py

github-actions[bot]

sync: automatic content update from github

bc7640d 6 months ago

7.94 kB

	import csv
	import pandas as pd
	import streamlit as st


	def render(get_conn, get_lookup_table, extract_pmp):
	st.header("📊 Comscore Site List")

	# 1) Composition Index Threshold input
	threshold = st.number_input(
	"Composition Index Threshold",
	min_value=0.0,
	step=1.0,
	value=150.0,
	help="Show only rows where Composition Index UV ≥ this value",
	)

	# 2) Exclude Codes selector
	# ── Fetch all distinct Codes for the dropdown
	conn_codes = get_conn()
	cs_codes = conn_codes.cursor()
	cs_codes.execute(
	"""
	SELECT DISTINCT Codes
	FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
	ORDER BY Codes
	"""
	)
	all_codes = [row[0] for row in cs_codes.fetchall()]
	cs_codes.close()
	conn_codes.close()

	exclude_codes = st.multiselect(
	"Exclude Codes",
	options=all_codes,
	default=["bsopt_23"] if "bsopt_23" in all_codes else [],
	help="Select Codes whose SITEIDs you want to filter out of the main table",
	)

	# 3) File uploader
	upload = st.file_uploader(
	"Upload your Comscore CSV", type="csv", key="comscore_uploader"
	)
	if not upload:
	st.info("Please upload your file from Comscore.")
	else:
	try:
	# ─── Parse the raw into rows ───────────────────────────────────────
	raw_text = upload.read().decode("latin-1")
	lines = raw_text.splitlines()
	reader = csv.reader(lines, skipinitialspace=True)
	rows = list(reader)

	# ─── Locate your three headers anywhere in the first N lines ───────
	required = ["Media", "Target Audience (000)", "Composition Index UV"]
	header_pos = {}
	for i, row in enumerate(rows[:30]):
	cleaned = [cell.strip() for cell in row]
	for col in required:
	if col not in header_pos and col in cleaned:
	header_pos[col] = i
	if len(header_pos) == len(required):
	break

	missing = [c for c in required if c not in header_pos]
	if missing:
	st.error(f"Missing required column(s): {', '.join(missing)}")
	st.stop()

	# map header names → column indices
	idx_map = {}
	for col, prow in header_pos.items():
	cleaned = [cell.strip() for cell in rows[prow]]
	idx_map[col] = cleaned.index(col)

	# data starts right after the bottom header row
	data_start = max(header_pos.values()) + 1
	data_rows = rows[data_start:]

	# pull out exactly those three columns
	data = []
	for row in data_rows:
	if not row:
	continue
	vals = []
	for col in required:
	idx = idx_map[col]
	vals.append(row[idx].strip() if idx < len(row) else None)
	data.append(vals)

	# build dataframe & coerce types
	df_clean = pd.DataFrame(data, columns=required)
	df_clean["Target Audience (000)"] = pd.to_numeric(
	df_clean["Target Audience (000)"], errors="coerce"
	)
	df_clean["Composition Index UV"] = pd.to_numeric(
	df_clean["Composition Index UV"], errors="coerce"
	)

	# ─── ORIGINAL Snowflake lookup & merge ───────────────────────────
	df_lookup = (
	get_lookup_table()
	) # DISCUSSION: this still uses your SIGMA_SCRATCH view
	lookup_small = df_lookup[
	["entity", "site_id", "site_name", "status", "ad_options"]
	]
	df_merged = pd.merge(
	df_clean,
	lookup_small,
	how="left",
	left_on="Media",
	right_on="entity",
	).drop(columns=["entity"])
	df_merged["pmp"] = (
	df_merged["ad_options"]
	.astype(str)
	.apply(extract_pmp)
	.map({True: "Yes", False: "No"})
	.fillna("No")
	)

	# ─── NEW: fetch SITEIDs to exclude based on Codes ────────────────
	if exclude_codes:
	conn_ex = get_conn()
	cs_ex = conn_ex.cursor()
	quoted = ", ".join(f"'{c}'" for c in exclude_codes)
	cs_ex.execute(
	f"""
	SELECT DISTINCT SITEID
	FROM ANALYTICS.SIGMA_SCRATCH.VIEW_GAMLOG_AGG_SITE_CODE_VOLUME_188E5F89A6FD42C7994D0F012A2EECD5
	WHERE Codes IN ({quoted})
	"""
	)
	excluded_site_ids = {row[0] for row in cs_ex.fetchall()}
	cs_ex.close()
	conn_ex.close()
	else:
	excluded_site_ids = set()

	# ─── Combined filter ─────────────────────────────────────────────
	df_filtered = df_merged[
	(df_merged["Composition Index UV"] >= threshold)
	& (df_merged["site_name"].notna())
	& (df_merged["status"] != "Dropped")
	& (df_merged["pmp"] == "Yes")
	& (~df_merged["site_id"].isin(excluded_site_ids))
	]

	# ─── drop extras, sort, display & export ──────────────────────────
	df_display = (
	df_filtered.drop(columns=["ad_options"])
	.sort_values("Target Audience (000)", ascending=False)
	.reset_index(drop=True)
	)

	if df_display.empty:
	st.warning("No rows meet the criteria.")
	else:
	st.success(f"Showing {len(df_display)} rows after filtering")
	st.dataframe(df_display)

	# Export – GAM
	site_ids = df_display["site_id"].dropna().astype(str)
	gam_str = ",".join(site_ids)
	gam_df = pd.DataFrame({"site_ids": [gam_str]})
	gam_csv = gam_df.to_csv(index=False, header=False)
	st.download_button(
	label="Export - GAM",
	data=gam_csv,
	file_name="gam_export.csv",
	mime="text/csv",
	)

	# Export – Client Facing
	client_df = df_display[["site_name", "Media"]].rename(
	columns={"Media": "Domain"}
	)
	client_csv = client_df.to_csv(index=False)
	st.download_button(
	label="Export - Client Facing",
	data=client_csv,
	file_name="client_facing_export.csv",
	mime="text/csv",
	)

	# Export – Site Names
	site_names = df_display["site_name"].dropna().astype(str)
	names_str = ",".join(site_names)
	names_df = pd.DataFrame({"site_names": [names_str]})
	names_csv = names_df.to_csv(index=False, header=False)
	st.download_button(
	label="Export - Site Names",
	data=names_csv,
	file_name="site_names_export.csv",
	mime="text/csv",
	)

	except Exception as e:
	st.error(f"Could not parse, merge, or export file: {e}")