Spaces:

saurabhharak
/

GovDataLabeler

Sleeping

App Files Files Community

GovDataLabeler / app.py

saurabhharak

rename the app name

e4230ac over 2 years ago

raw

history blame contribute delete

9.09 kB

	import base64
	import streamlit as st
	import pandas as pd

	# Define Data Source Codes and Sector Codes
	data_source_codes = {
	'CABSEC': 'Cabinet Secretariat',
	'CAG': 'Comptroller & Auditor General',
	'DAE': 'Department of Atomic Energy',
	'DOS': 'Department of Space',
	'ECI': 'Election Commission of India',
	'HCDELHI': 'HIGH COURT OF DELHI',
	'MOA': 'Ministry of Agriculture',
	'MCF': 'Ministry of Chemicals & Fertilizers',
	'MOCA': 'Ministry of Civil Aviation',
	'MOCOAL': 'Ministry of Coal',
	'MOCI': 'Ministry of Commerce & Industry',
	'MOCIT': 'Ministry of Communications & Information Tech.',
	'MOCF&PD': 'Ministry of Consumer Aff., Food, & Public Dist.',
	'MCA': 'Ministry of Corporate Affairs',
	'MOCULT': 'Ministry of Culture',
	'MOD': 'Ministry of Defence',
	'MDONER': 'Ministry of Development of North Eastern Region',
	'MDWS': 'Ministry of Drinking Water and Sanitation',
	'MOES': 'Ministry of Earth Sciences',
	'MOEF': 'Ministry of Environment & Forests',
	'MEA': 'Ministry of External Affairs',
	'MOF': 'Ministry of Finance',
	'MOFPI': 'Ministry of Food Processing Industries',
	'MOHFW': 'Ministry of Health & Family Welfare',
	'MHI&PE': 'Ministry of Heavy Industry & Public Enterprises',
	'MHA': 'Ministry of Home Affairs',
	'MOHUA': 'Ministry of Housing & Urban Poverty Alleviation',
	'MHRD': 'Ministry of Human Resource Development',
	'MOI&B': 'Ministry of Information & Broadcasting',
	'MOL&E': 'Ministry of Labour & Employment',
	'MOLJ': 'Ministry of Law & Justice',
	'MSME': 'Ministry of Micro, Small and Medium Enterprises',
	'MOM': 'Ministry of Mines',
	'MMA': 'Ministry of Minority Affairs',
	'MNRE': 'Ministry of New & Renewable Energy',
	'MPR': 'Ministry of Panchayati Raj',
	'MPA': 'Ministry of Parliamentary Affairs',
	'MPPP': 'Ministry of Personnel, Public Grievances & Pensions',
	'MPNG': 'Ministry of Petroleum & Natural Gas',
	'MP': 'Ministry of Power',
	'MR': 'Ministry of Railways',
	'MORTH': 'Ministry of Road Transport & Highways',
	'MRD': 'Ministry of Rural Development',
	'MST': 'Ministry of Science & Technology',
	'MS': 'Ministry of Shipping',
	'MSJE': 'Ministry of Social Justice & Empowerment',
	'MOSPI': 'Ministry of Statistics & Programme Implementation',
	'MSTL': 'Ministry of Steel',
	'MT': 'Ministry of Textiles',
	'MOT': 'Ministry of Tourism',
	'MTA': 'Ministry of Tribal Affairs',
	'MOUD': 'Ministry of Urban Development',
	'MWR': 'Ministry of Water Resources',
	'MWCD': 'Ministry of Women & Child Development',
	'MYAS': 'Ministry of Youth Affairs & Sports',
	'PC': 'Planning Commission',
	'PRES': 'President',
	'PMO': "Prime Minister's Office",
	'VP': 'Vice-President'
	}



	sector_codes = {
	'AGRI': 'Agriculture',
	'ANML': 'Animal Husbandry and Fisheries',
	'BNK': 'Banking',
	'CENS': 'Census',
	'CLMT': 'Climate & Weather',
	'CMDB': 'Commodity Boards',
	'COMR': 'Commerce',
	'CAFF': 'Consumer Affairs',
	'COVID': 'Covid',
	'CRIME': 'Crime',
	'CULT': 'Culture and Tourism',
	'DEMO': 'Demographics',
	'DIGINF': 'Digital Infrastructure',
	'ECON': 'Economy',
	'ELECT': 'Elections',
	'ENRG': 'Energy',
	'EXTAFF': 'External Affairs',
	'FINCL': 'Financial Inclusion',
	'FAGRI': 'Food and Agriculture',
	'FORWLD': 'Forestry and Wildlife',
	'GEN': 'General',
	'GOVSCM': 'Government Schemes',
	'HLTH': 'Health',
	'HSNG': 'Housing',
	'IND': 'Industries',
	'JUST': 'Justice',
	'NSS': 'National Sample Survey',
	'NATDIS': 'Natural Disasters',
	'OTHER': 'Other',
	'PETGAS': 'Petroleum and Gas',
	'RURALDEV': 'Rural Development',
	'SATIMG': 'Satellite Imagery Data',
	'SCI': 'Science',
	'SOCIOECO': 'Socio Economic',
	'TRANS': 'Transportation',
	'BUDGET': 'Union Budget',
	'WTR': 'Water'
	}

	#Granularity_values = ["District","State","Tehsil","Other Level", "India","Assembly Constituency","Point Level","Gram Panchayat","Block","Sub-District","Village","Country"]




	# Short namings for Granularity_values
	granularity_short_codes = {
	'District': 'DIS',
	'State': 'STA',
	'Tehsil': 'TEH',
	'Other Level': 'OTH',
	'India': 'IND',
	'Assembly Constituency': 'AC',
	'Point Level': 'PL',
	'Gram Panchayat': 'GP',
	'Block': 'BL',
	'Sub-District': 'SD',
	'Village': 'VIL',
	'Country': 'CTRY'
	}
	# frequency_values = ['Yearly', 'Weekly', 'Quinquennial', 'Daily', 'Fortnightly', 'Monthly', 'Seasonally', 'Other / One Time']


	# Short namings for frequency_values
	frequency_short_codes = {
	'Yearly': 'Y',
	'Weekly': 'W',
	'Quinquennial': 'Q',
	'Daily': 'D',
	'Fortnightly': 'F',
	'Monthly': 'M',
	'Seasonally': 'S',
	'Other / One Time': 'O'
	}


	# Read counter from file
	def read_counter():
	try:
	with open('counter.txt', 'r') as f:
	counter = int(f.read())
	except FileNotFoundError:
	counter = 1 # Starting counter value
	return counter

	# Update and save counter to file
	def update_counter(counter):
	with open('counter.txt', 'w') as f:
	f.write(str(counter))

	# Generate unique dataset IDs
	def generate_dataset_id(counter):
	return f'DID{counter:03}'

	# Generate dataset names
	def generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency):
	granularity_short = granularity_short_codes.get(granularity, 'UNK')
	frequency_short = frequency_short_codes.get(frequency, 'UNK')
	return f'{data_source_code}-{sector_code}-{granularity_short}-{frequency_short}-{dataset_id}'

	# List to store existing dataset names
	existing_dataset_names = []

	# Check if dataset name is unique
	def check_dataset_name_uniqueness(dataset_name):
	return dataset_name not in existing_dataset_names


	def generate_download_link(mapped_dataset):

	csv_file = mapped_dataset.to_csv(index=False)
	b64 = base64.b64encode(csv_file.encode()).decode()
	href = f'<a href="data:file/xlsx;base64,{b64}" download="mapped_dataset.xlsx">Download</a>'
	st.success('Download Mapped Dataset')
	st.markdown(href, unsafe_allow_html=True)

	# Streamlit App
	def main():
	st.title('Dataset Naming App')

	# Read counter from file
	counter = read_counter()

	# User input: Data Source
	data_source = st.selectbox('Select Data Source', list(data_source_codes.values()))
	data_source_code = next(code for code, name in data_source_codes.items() if name == data_source)

	# Generate dataset name
	dataset_id = generate_dataset_id(counter)

	# User input: Sector
	sector = st.selectbox('Select Sector', list(sector_codes.values()))
	sector_code = next(code for code, name in sector_codes.items() if name == sector)

	# User input: Start Year
	start_year = st.number_input('Enter Start Year', min_value=2000, max_value=2100, value=2022)

	# User input: End Year
	end_year = st.number_input('Enter End Year', min_value=start_year, max_value=2100, value=2022)

	# User input: Granularity
	granularity = st.selectbox('Select Granularity', list(granularity_short_codes.keys()))

	# User input: Frequency
	frequency = st.selectbox('Select Frequency', list(frequency_short_codes.keys()))

	# Generate dataset name
	dataset_id = generate_dataset_id(counter) # Update with your actual counter
	dataset_name = generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency)

	# User input: Original Dataset Name
	original_dataset_name = st.text_input('Enter Original Dataset Name')

	# Check if the dataset name is unique
	is_unique = check_dataset_name_uniqueness(dataset_name) # Implement this function

	# Display generated dataset name
	st.write('Gov Data Labeler')
	st.write(dataset_name)

	# Display warning/error if dataset name is not unique
	if not is_unique:
	st.warning('Dataset name is not unique. Please generate a new name.')

	# Save dataset info to Excel
	if st.button('Save to Excel') and is_unique:
	data = {
	'Dataset Name': [dataset_name],
	'Data Source': [data_source],
	'Sector': [sector],
	'Start Year': [start_year],
	'End Year': [end_year],
	'Granularity': [granularity],
	'Frequency': [frequency],
	'Original Dataset Name': [original_dataset_name]
	}
	df = pd.DataFrame(data)
	#df.to_excel('dataset_info.xlsx', index=False)

	# Update and save counter to file
	counter += 1
	update_counter(counter)
	generate_download_link(df)
	# Clear user inputs
	st.success('Dataset information saved to Excel.')
	data_source = ''
	sector = ''
	start_year = 2022 # Reset to default year
	end_year = 2022
	granularity = ''
	frequency = ''
	original_dataset_name = ''

	if __name__ == '__main__':
	main()