File size: 4,440 Bytes
318474b
 
2b4de8c
318474b
2b4de8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448068e
2b4de8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
448068e
e664338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b4de8c
 
 
 
 
 
 
 
 
 
 
 
 
448068e
2b4de8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e664338
 
 
2b4de8c
 
 
 
 
e664338
2b4de8c
e664338
2b4de8c
e664338
 
 
 
 
 
 
2b4de8c
 
448068e
e664338
318474b
e664338
2b4de8c
 
 
e664338
2b4de8c
 
 
318474b
2b4de8c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
import pandas as pd
import re

# Mapping dictionary
mapping = {
    "obs_commercial_purpose_commercial": "Commercial",
    "obs_commercial_purpose_issue": "SIP",
    "obs_environmental_politics_global": "Environmental Politics",
    "obs_civil_social_rights_global": "Civil and Social Rights",
    "obs_economy_global": "Economy",
    "obs_security_foreign_policy_global": "Security and Foreign Policy",
    "obs_political_values_global": "Political Values and Governance",
    "obs_health_global": "Health",
    "obs_immigration_global": "Immigration",
    "obs_crime_global": "Crime",
    "obs_guns_global": "Guns",
    "obs_education_global": "Education",
}

def process_tags(tags):
    if not isinstance(tags, str):
        return ""
    
    raw_tags = [t.strip() for t in tags.split(",") if t.strip()]
    
    # Rule 1: If Commercial exists → only output Commercial
    if "obs_commercial_purpose_commercial" in raw_tags:
        return "Commercial"
    
    # Otherwise map normally
    mapped = []
    sip_flag = False
    for tag in raw_tags:
        if tag == "obs_commercial_purpose_issue":
            sip_flag = True  # store SIP to add later
        elif tag in mapping:
            mapped.append(mapping[tag])
    
    # Add SIP at the end if exists
    if sip_flag:
        mapped.append("SIP")
    
    return ", ".join(mapped)

def highlight_matches(tm, auditor):
    tm_tags = [t.strip() for t in tm.split(",") if t.strip()]
    auditor_tags = [t.strip() for t in auditor.split(",") if t.strip()]
    
    highlighted_tm = []
    highlighted_auditor = []

    for tag in tm_tags:
        if tag in auditor_tags:
            highlighted_tm.append(f'<span style="color: green; font-weight: bold">{tag}</span>')
        else:
            highlighted_tm.append(f'<span style="color: red; font-weight: bold">{tag}</span>')
    
    for tag in auditor_tags:
        if tag in tm_tags:
            highlighted_auditor.append(f'<span style="color: green; font-weight: bold">{tag}</span>')
        else:
            highlighted_auditor.append(f'<span style="color: red; font-weight: bold">{tag}</span>')
    
    return ", ".join(highlighted_tm), ", ".join(highlighted_auditor)

def process_sheet(sheet_url):
    # Convert Google Sheets share link to CSV export
    match = re.search(r"/d/([a-zA-Z0-9-_]+)", sheet_url)
    if not match:
        return "❌ Invalid Google Sheets link."
    
    sheet_id = match.group(1)
    csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
    
    try:
        df = pd.read_csv(csv_url)
    except Exception as e:
        return f"❌ Error reading sheet: {e}"

    outputs = []
    for _, row in df.iterrows():
        name = row.get("Full Name", "")
        job_id = row.get("job_id", "")
        
        # TM (from R)
        tm = process_tags(row.get("Combined", ""))
        
        # Auditor: gather all AF–AW columns
        auditor_tags = []
        for col in df.columns:
            if col.startswith("A"):  # AF, AG, AH...
                auditor_tags.append(str(row[col]))
        auditor = process_tags(", ".join(auditor_tags))
        
        # Rule: if both TM and Auditor are blank → NT
        if not tm.strip():
            tm = "NT"
        if not auditor.strip():
            auditor = "NT"
        
        # Highlight matches/mismatches
        tm_h, auditor_h = highlight_matches(tm, auditor)
        
        # Column CC = rationale (fixed index approach)
        rationale = ""
        try:
            rationale = row.iloc[80]  # 0-based index for CC
        except Exception:
            pass
        
        rationale_text = f"<br><br><b>Rationale:</b> {rationale}" if pd.notna(rationale) and str(rationale).strip() else ""
        
        # Final formatted text (using HTML now)
        text = f"""
<b>{name}</b><br>
job id: {job_id}<br>
<b>TM:</b> {tm_h}<br>
<b>Auditor:</b> {auditor_h}{rationale_text}
"""
        
        outputs.append(text)

    return "<hr><br>".join(outputs)

# Gradio App (switched output to HTML)
demo = gr.Interface(
    fn=process_sheet,
    inputs=gr.Textbox(label="Google Sheets Link", placeholder="Paste Google Sheets link here"),
    outputs=gr.HTML(label="Formatted Output"),
    title="Google Sheets Formatter",
    description="Paste your Google Sheets link (make sure it's shared as 'Anyone with the link can view')."
)

if __name__ == "__main__":
    demo.launch()