Spaces:

KutisBayag
/

Predispute-Data-Extractor

Running

File size: 4,440 Bytes

import gradio as gr
import pandas as pd
import re

# Mapping dictionary
mapping = {
    "obs_commercial_purpose_commercial": "Commercial",
    "obs_commercial_purpose_issue": "SIP",
    "obs_environmental_politics_global": "Environmental Politics",
    "obs_civil_social_rights_global": "Civil and Social Rights",
    "obs_economy_global": "Economy",
    "obs_security_foreign_policy_global": "Security and Foreign Policy",
    "obs_political_values_global": "Political Values and Governance",
    "obs_health_global": "Health",
    "obs_immigration_global": "Immigration",
    "obs_crime_global": "Crime",
    "obs_guns_global": "Guns",
    "obs_education_global": "Education",
}

def process_tags(tags):
    if not isinstance(tags, str):
        return ""
    
    raw_tags = [t.strip() for t in tags.split(",") if t.strip()]
    
    # Rule 1: If Commercial exists → only output Commercial
    if "obs_commercial_purpose_commercial" in raw_tags:
        return "Commercial"
    
    # Otherwise map normally
    mapped = []
    sip_flag = False
    for tag in raw_tags:
        if tag == "obs_commercial_purpose_issue":
            sip_flag = True  # store SIP to add later
        elif tag in mapping:
            mapped.append(mapping[tag])
    
    # Add SIP at the end if exists
    if sip_flag:
        mapped.append("SIP")
    
    return ", ".join(mapped)

def highlight_matches(tm, auditor):
    tm_tags = [t.strip() for t in tm.split(",") if t.strip()]
    auditor_tags = [t.strip() for t in auditor.split(",") if t.strip()]
    
    highlighted_tm = []
    highlighted_auditor = []

    for tag in tm_tags:
        if tag in auditor_tags:
            highlighted_tm.append(f'<span style="color: green; font-weight: bold">{tag}</span>')
        else:
            highlighted_tm.append(f'<span style="color: red; font-weight: bold">{tag}</span>')
    
    for tag in auditor_tags:
        if tag in tm_tags:
            highlighted_auditor.append(f'<span style="color: green; font-weight: bold">{tag}</span>')
        else:
            highlighted_auditor.append(f'<span style="color: red; font-weight: bold">{tag}</span>')
    
    return ", ".join(highlighted_tm), ", ".join(highlighted_auditor)

def process_sheet(sheet_url):
    # Convert Google Sheets share link to CSV export
    match = re.search(r"/d/([a-zA-Z0-9-_]+)", sheet_url)
    if not match:
        return "❌ Invalid Google Sheets link."
    
    sheet_id = match.group(1)
    csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"
    
    try:
        df = pd.read_csv(csv_url)
    except Exception as e:
        return f"❌ Error reading sheet: {e}"

    outputs = []
    for _, row in df.iterrows():
        name = row.get("Full Name", "")
        job_id = row.get("job_id", "")
        
        # TM (from R)
        tm = process_tags(row.get("Combined", ""))
        
        # Auditor: gather all AF–AW columns
        auditor_tags = []
        for col in df.columns:
            if col.startswith("A"):  # AF, AG, AH...
                auditor_tags.append(str(row[col]))
        auditor = process_tags(", ".join(auditor_tags))
        
        # Rule: if both TM and Auditor are blank → NT
        if not tm.strip():
            tm = "NT"
        if not auditor.strip():
            auditor = "NT"
        
        # Highlight matches/mismatches
        tm_h, auditor_h = highlight_matches(tm, auditor)
        
        # Column CC = rationale (fixed index approach)
        rationale = ""
        try:
            rationale = row.iloc[80]  # 0-based index for CC
        except Exception:
            pass
        
        rationale_text = f"<br><br><b>Rationale:</b> {rationale}" if pd.notna(rationale) and str(rationale).strip() else ""
        
        # Final formatted text (using HTML now)
        text = f"""
<b>{name}</b><br>
job id: {job_id}<br>
<b>TM:</b> {tm_h}<br>
<b>Auditor:</b> {auditor_h}{rationale_text}
"""
        
        outputs.append(text)

    return "<hr><br>".join(outputs)

# Gradio App (switched output to HTML)
demo = gr.Interface(
    fn=process_sheet,
    inputs=gr.Textbox(label="Google Sheets Link", placeholder="Paste Google Sheets link here"),
    outputs=gr.HTML(label="Formatted Output"),
    title="Google Sheets Formatter",
    description="Paste your Google Sheets link (make sure it's shared as 'Anyone with the link can view')."
)

if __name__ == "__main__":
    demo.launch()