Spaces:

leonardoimpact
/

Data_Validation_Process

Sleeping

App Files Files Community

Data_Validation_Process / app.py

fortuala

Update app.py

032020f verified 9 months ago

raw

history blame

2.86 kB

	import gradio as gr
	import pandas as pd
	import os

	import functions as f

	# Paths to the permanent files
	survey_path = 'Copy of AGT.MHVL.0A.202505.0001 4.xlsx'
	indicator_path = 'Indicators_indicators_Default view 18.xlsx'
	questions_path = 'Indicators_questions_Default View 18.xlsx'
	choice_path = 'Indicators_choices_Default View 17.xlsx'
	parameters_path = 'Indicators_surveys_Survey validation 1.xlsx'
	uuid = 'AGT.MHVL.0A.202505.0001'

	survey = pd.read_excel(survey_path)
	indicators = pd.read_excel(indicator_path)
	choices = pd.read_excel(choice_path)
	questions = pd.read_excel(questions_path)

	def run_validation():

	# Pass all inputs to your function (update name/args as needed)
	# parameters file
	indicator_df, questions_df, choice_df, data_all, raw_data, column_strategy_df = f.load_dataframes(
	indicator_path,
	questions_path,
	choice_path,
	survey_path)

	# consistency
	table_1_1, table_1_2, table_1_3 = f.consistency_score_report(
	raw_data=raw_data,
	indicator_df=indicator_df,
	questions_df=questions_df,
	column_strategy_df=column_strategy_df,
	data_all=data_all,
	theme_list=theme_list
	)
	# integrity
	table_2_1, table_2_2, table_2_3,table_2_4,table_2_5 = f.integrity_report(raw_data, questions_df, column_strategy_df, survey_type,table_1_2)

	# representativity
	if segmentation == 'yes':
	table_3_1, table_3_2, table_3_3, table_3_4 = f.representativity_report(segmentation, raw_data, table_2_4, segmentation_columns, mapping_segmentation_quotas,
	table_2_3, N, table_1_3)
	else:
	table_3_3, table_3_4 = f.representativity_report(segmentation, raw_data, table_2_4, segmentation_columns, mapping_segmentation_quotas,
	table_2_3, N, table_1_3)

	# enumerator bias
	if 'enumerator_name' in raw_data.columns:
	table_4_1, table_4_2 = f.enumerator_urgent_issues_report(raw_data, table_2_5)
	else:
	table_4_1 = []
	table_4_2 = []

	report = f.generate_data_quality_report(
	segmentation='no',
	table_1_1=table_1_1,
	table_2_1=table_2_1,
	table_2_3=table_2_3,
	table_3_1=None,
	table_3_2=None,
	table_3_3=table_3_3,
	table_3_4=table_3_4, # Replace with actual data
	table_4_1=table_4_1 # Replace with actual data
	)

	print(report)


	with gr.Blocks() as app:
	gr.Markdown("## Survey Validation App")

	survey_file = gr.File(label="Upload your survey (Excel or CSV)")
	uuid_box = gr.Textbox(label="UUID", value="AGT.MHVL.0A.202505.0001")
	run_btn = gr.Button("Run Validation")
	output = gr.Dataframe(label="Validation Output")

	run_btn.click(
	run_validation,
	inputs=[survey_file, uuid_box],
	outputs=[]
	)

	if __name__ == "__main__":
	app.launch()