import gradio as gr
import pandas as pd
import re
# Mapping dictionary
mapping = {
"obs_commercial_purpose_commercial": "Commercial",
"obs_commercial_purpose_issue": "SIP",
"obs_environmental_politics_global": "Environmental Politics",
"obs_civil_social_rights_global": "Civil and Social Rights",
"obs_economy_global": "Economy",
"obs_security_foreign_policy_global": "Security and Foreign Policy",
"obs_political_values_global": "Political Values and Governance",
"obs_health_global": "Health",
"obs_immigration_global": "Immigration",
"obs_crime_global": "Crime",
"obs_guns_global": "Guns",
"obs_education_global": "Education",
}
def process_tags(tags):
if not isinstance(tags, str):
return ""
raw_tags = [t.strip() for t in tags.split(",") if t.strip()]
# Rule 1: If Commercial exists → only output Commercial
if "obs_commercial_purpose_commercial" in raw_tags:
return "Commercial"
# Otherwise map normally
mapped = []
sip_flag = False
for tag in raw_tags:
if tag == "obs_commercial_purpose_issue":
sip_flag = True # store SIP to add later
elif tag in mapping:
mapped.append(mapping[tag])
# Add SIP at the end if exists
if sip_flag:
mapped.append("SIP")
return ", ".join(mapped)
def highlight_matches(tm, auditor):
tm_tags = [t.strip() for t in tm.split(",") if t.strip()]
auditor_tags = [t.strip() for t in auditor.split(",") if t.strip()]
highlighted_tm = []
highlighted_auditor = []
for tag in tm_tags:
if tag in auditor_tags:
highlighted_tm.append(f'{tag}')
else:
highlighted_tm.append(f'{tag}')
for tag in auditor_tags:
if tag in tm_tags:
highlighted_auditor.append(f'{tag}')
else:
highlighted_auditor.append(f'{tag}')
return ", ".join(highlighted_tm), ", ".join(highlighted_auditor)
def process_sheet(sheet_url):
# Convert Google Sheets share link to CSV export
match = re.search(r"/d/([a-zA-Z0-9-_]+)", sheet_url)
if not match:
return "❌ Invalid Google Sheets link."
sheet_id = match.group(1)
csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
try:
df = pd.read_csv(csv_url)
except Exception as e:
return f"❌ Error reading sheet: {e}"
outputs = []
for _, row in df.iterrows():
name = row.get("Full Name", "")
job_id = row.get("job_id", "")
# TM (from R)
tm = process_tags(row.get("Combined", ""))
# Auditor: gather all AF–AW columns
auditor_tags = []
for col in df.columns:
if col.startswith("A"): # AF, AG, AH...
auditor_tags.append(str(row[col]))
auditor = process_tags(", ".join(auditor_tags))
# Rule: if both TM and Auditor are blank → NT
if not tm.strip():
tm = "NT"
if not auditor.strip():
auditor = "NT"
# Highlight matches/mismatches
tm_h, auditor_h = highlight_matches(tm, auditor)
# Column CC = rationale (fixed index approach)
rationale = ""
try:
rationale = row.iloc[80] # 0-based index for CC
except Exception:
pass
rationale_text = f"
Rationale: {rationale}" if pd.notna(rationale) and str(rationale).strip() else ""
# Final formatted text (using HTML now)
text = f"""
{name}
job id: {job_id}
TM: {tm_h}
Auditor: {auditor_h}{rationale_text}
"""
outputs.append(text)
return "