import gradio as gr import pandas as pd import re # Mapping dictionary mapping = { "obs_commercial_purpose_commercial": "Commercial", "obs_commercial_purpose_issue": "SIP", "obs_environmental_politics_global": "Environmental Politics", "obs_civil_social_rights_global": "Civil and Social Rights", "obs_economy_global": "Economy", "obs_security_foreign_policy_global": "Security and Foreign Policy", "obs_political_values_global": "Political Values and Governance", "obs_health_global": "Health", "obs_immigration_global": "Immigration", "obs_crime_global": "Crime", "obs_guns_global": "Guns", "obs_education_global": "Education", } def process_tags(tags): if not isinstance(tags, str): return "" raw_tags = [t.strip() for t in tags.split(",") if t.strip()] # Rule 1: If Commercial exists → only output Commercial if "obs_commercial_purpose_commercial" in raw_tags: return "Commercial" # Otherwise map normally mapped = [] sip_flag = False for tag in raw_tags: if tag == "obs_commercial_purpose_issue": sip_flag = True # store SIP to add later elif tag in mapping: mapped.append(mapping[tag]) # Add SIP at the end if exists if sip_flag: mapped.append("SIP") return ", ".join(mapped) def highlight_matches(tm, auditor): tm_tags = [t.strip() for t in tm.split(",") if t.strip()] auditor_tags = [t.strip() for t in auditor.split(",") if t.strip()] highlighted_tm = [] highlighted_auditor = [] for tag in tm_tags: if tag in auditor_tags: highlighted_tm.append(f'{tag}') else: highlighted_tm.append(f'{tag}') for tag in auditor_tags: if tag in tm_tags: highlighted_auditor.append(f'{tag}') else: highlighted_auditor.append(f'{tag}') return ", ".join(highlighted_tm), ", ".join(highlighted_auditor) def process_sheet(sheet_url): # Convert Google Sheets share link to CSV export match = re.search(r"/d/([a-zA-Z0-9-_]+)", sheet_url) if not match: return "❌ Invalid Google Sheets link." sheet_id = match.group(1) csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv" try: df = pd.read_csv(csv_url) except Exception as e: return f"❌ Error reading sheet: {e}" outputs = [] for _, row in df.iterrows(): name = row.get("Full Name", "") job_id = row.get("job_id", "") # TM (from R) tm = process_tags(row.get("Combined", "")) # Auditor: gather all AF–AW columns auditor_tags = [] for col in df.columns: if col.startswith("A"): # AF, AG, AH... auditor_tags.append(str(row[col])) auditor = process_tags(", ".join(auditor_tags)) # Rule: if both TM and Auditor are blank → NT if not tm.strip(): tm = "NT" if not auditor.strip(): auditor = "NT" # Highlight matches/mismatches tm_h, auditor_h = highlight_matches(tm, auditor) # Column CC = rationale (fixed index approach) rationale = "" try: rationale = row.iloc[80] # 0-based index for CC except Exception: pass rationale_text = f"

Rationale: {rationale}" if pd.notna(rationale) and str(rationale).strip() else "" # Final formatted text (using HTML now) text = f""" {name}
job id: {job_id}
TM: {tm_h}
Auditor: {auditor_h}{rationale_text} """ outputs.append(text) return "

".join(outputs) # Gradio App (switched output to HTML) demo = gr.Interface( fn=process_sheet, inputs=gr.Textbox(label="Google Sheets Link", placeholder="Paste Google Sheets link here"), outputs=gr.HTML(label="Formatted Output"), title="Google Sheets Formatter", description="Paste your Google Sheets link (make sure it's shared as 'Anyone with the link can view')." ) if __name__ == "__main__": demo.launch()