Spaces:

KutisBayag
/

Predispute-Data-Extractor

Running

App Files Files Community

Predispute-Data-Extractor / app.py

KutisBayag

Update app.py

e664338 verified 7 months ago

raw

history blame contribute delete

4.44 kB

	import gradio as gr
	import pandas as pd
	import re

	# Mapping dictionary
	mapping = {
	"obs_commercial_purpose_commercial": "Commercial",
	"obs_commercial_purpose_issue": "SIP",
	"obs_environmental_politics_global": "Environmental Politics",
	"obs_civil_social_rights_global": "Civil and Social Rights",
	"obs_economy_global": "Economy",
	"obs_security_foreign_policy_global": "Security and Foreign Policy",
	"obs_political_values_global": "Political Values and Governance",
	"obs_health_global": "Health",
	"obs_immigration_global": "Immigration",
	"obs_crime_global": "Crime",
	"obs_guns_global": "Guns",
	"obs_education_global": "Education",
	}

	def process_tags(tags):
	if not isinstance(tags, str):
	return ""

	raw_tags = [t.strip() for t in tags.split(",") if t.strip()]

	# Rule 1: If Commercial exists → only output Commercial
	if "obs_commercial_purpose_commercial" in raw_tags:
	return "Commercial"

	# Otherwise map normally
	mapped = []
	sip_flag = False
	for tag in raw_tags:
	if tag == "obs_commercial_purpose_issue":
	sip_flag = True # store SIP to add later
	elif tag in mapping:
	mapped.append(mapping[tag])

	# Add SIP at the end if exists
	if sip_flag:
	mapped.append("SIP")

	return ", ".join(mapped)

	def highlight_matches(tm, auditor):
	tm_tags = [t.strip() for t in tm.split(",") if t.strip()]
	auditor_tags = [t.strip() for t in auditor.split(",") if t.strip()]

	highlighted_tm = []
	highlighted_auditor = []

	for tag in tm_tags:
	if tag in auditor_tags:
	highlighted_tm.append(f'<span style="color: green; font-weight: bold">{tag}</span>')
	else:
	highlighted_tm.append(f'<span style="color: red; font-weight: bold">{tag}</span>')

	for tag in auditor_tags:
	if tag in tm_tags:
	highlighted_auditor.append(f'<span style="color: green; font-weight: bold">{tag}</span>')
	else:
	highlighted_auditor.append(f'<span style="color: red; font-weight: bold">{tag}</span>')

	return ", ".join(highlighted_tm), ", ".join(highlighted_auditor)

	def process_sheet(sheet_url):
	# Convert Google Sheets share link to CSV export
	match = re.search(r"/d/([a-zA-Z0-9-_]+)", sheet_url)
	if not match:
	return "❌ Invalid Google Sheets link."

	sheet_id = match.group(1)
	csv_url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv"

	try:
	df = pd.read_csv(csv_url)
	except Exception as e:
	return f"❌ Error reading sheet: {e}"

	outputs = []
	for _, row in df.iterrows():
	name = row.get("Full Name", "")
	job_id = row.get("job_id", "")

	# TM (from R)
	tm = process_tags(row.get("Combined", ""))

	# Auditor: gather all AF–AW columns
	auditor_tags = []
	for col in df.columns:
	if col.startswith("A"): # AF, AG, AH...
	auditor_tags.append(str(row[col]))
	auditor = process_tags(", ".join(auditor_tags))

	# Rule: if both TM and Auditor are blank → NT
	if not tm.strip():
	tm = "NT"
	if not auditor.strip():
	auditor = "NT"

	# Highlight matches/mismatches
	tm_h, auditor_h = highlight_matches(tm, auditor)

	# Column CC = rationale (fixed index approach)
	rationale = ""
	try:
	rationale = row.iloc[80] # 0-based index for CC
	except Exception:
	pass

	rationale_text = f"<br><br><b>Rationale:</b> {rationale}" if pd.notna(rationale) and str(rationale).strip() else ""

	# Final formatted text (using HTML now)
	text = f"""
	<b>{name}</b><br>
	job id: {job_id}<br>
	<b>TM:</b> {tm_h}<br>
	<b>Auditor:</b> {auditor_h}{rationale_text}
	"""

	outputs.append(text)

	return "<hr><br>".join(outputs)

	# Gradio App (switched output to HTML)
	demo = gr.Interface(
	fn=process_sheet,
	inputs=gr.Textbox(label="Google Sheets Link", placeholder="Paste Google Sheets link here"),
	outputs=gr.HTML(label="Formatted Output"),
	title="Google Sheets Formatter",
	description="Paste your Google Sheets link (make sure it's shared as 'Anyone with the link can view')."
	)

	if __name__ == "__main__":
	demo.launch()