Spaces:
Sleeping
Sleeping
| import base64 | |
| import streamlit as st | |
| import pandas as pd | |
| # Define Data Source Codes and Sector Codes | |
| data_source_codes = { | |
| 'CABSEC': 'Cabinet Secretariat', | |
| 'CAG': 'Comptroller & Auditor General', | |
| 'DAE': 'Department of Atomic Energy', | |
| 'DOS': 'Department of Space', | |
| 'ECI': 'Election Commission of India', | |
| 'HCDELHI': 'HIGH COURT OF DELHI', | |
| 'MOA': 'Ministry of Agriculture', | |
| 'MCF': 'Ministry of Chemicals & Fertilizers', | |
| 'MOCA': 'Ministry of Civil Aviation', | |
| 'MOCOAL': 'Ministry of Coal', | |
| 'MOCI': 'Ministry of Commerce & Industry', | |
| 'MOCIT': 'Ministry of Communications & Information Tech.', | |
| 'MOCF&PD': 'Ministry of Consumer Aff., Food, & Public Dist.', | |
| 'MCA': 'Ministry of Corporate Affairs', | |
| 'MOCULT': 'Ministry of Culture', | |
| 'MOD': 'Ministry of Defence', | |
| 'MDONER': 'Ministry of Development of North Eastern Region', | |
| 'MDWS': 'Ministry of Drinking Water and Sanitation', | |
| 'MOES': 'Ministry of Earth Sciences', | |
| 'MOEF': 'Ministry of Environment & Forests', | |
| 'MEA': 'Ministry of External Affairs', | |
| 'MOF': 'Ministry of Finance', | |
| 'MOFPI': 'Ministry of Food Processing Industries', | |
| 'MOHFW': 'Ministry of Health & Family Welfare', | |
| 'MHI&PE': 'Ministry of Heavy Industry & Public Enterprises', | |
| 'MHA': 'Ministry of Home Affairs', | |
| 'MOHUA': 'Ministry of Housing & Urban Poverty Alleviation', | |
| 'MHRD': 'Ministry of Human Resource Development', | |
| 'MOI&B': 'Ministry of Information & Broadcasting', | |
| 'MOL&E': 'Ministry of Labour & Employment', | |
| 'MOLJ': 'Ministry of Law & Justice', | |
| 'MSME': 'Ministry of Micro, Small and Medium Enterprises', | |
| 'MOM': 'Ministry of Mines', | |
| 'MMA': 'Ministry of Minority Affairs', | |
| 'MNRE': 'Ministry of New & Renewable Energy', | |
| 'MPR': 'Ministry of Panchayati Raj', | |
| 'MPA': 'Ministry of Parliamentary Affairs', | |
| 'MPPP': 'Ministry of Personnel, Public Grievances & Pensions', | |
| 'MPNG': 'Ministry of Petroleum & Natural Gas', | |
| 'MP': 'Ministry of Power', | |
| 'MR': 'Ministry of Railways', | |
| 'MORTH': 'Ministry of Road Transport & Highways', | |
| 'MRD': 'Ministry of Rural Development', | |
| 'MST': 'Ministry of Science & Technology', | |
| 'MS': 'Ministry of Shipping', | |
| 'MSJE': 'Ministry of Social Justice & Empowerment', | |
| 'MOSPI': 'Ministry of Statistics & Programme Implementation', | |
| 'MSTL': 'Ministry of Steel', | |
| 'MT': 'Ministry of Textiles', | |
| 'MOT': 'Ministry of Tourism', | |
| 'MTA': 'Ministry of Tribal Affairs', | |
| 'MOUD': 'Ministry of Urban Development', | |
| 'MWR': 'Ministry of Water Resources', | |
| 'MWCD': 'Ministry of Women & Child Development', | |
| 'MYAS': 'Ministry of Youth Affairs & Sports', | |
| 'PC': 'Planning Commission', | |
| 'PRES': 'President', | |
| 'PMO': "Prime Minister's Office", | |
| 'VP': 'Vice-President' | |
| } | |
| sector_codes = { | |
| 'AGRI': 'Agriculture', | |
| 'ANML': 'Animal Husbandry and Fisheries', | |
| 'BNK': 'Banking', | |
| 'CENS': 'Census', | |
| 'CLMT': 'Climate & Weather', | |
| 'CMDB': 'Commodity Boards', | |
| 'COMR': 'Commerce', | |
| 'CAFF': 'Consumer Affairs', | |
| 'COVID': 'Covid', | |
| 'CRIME': 'Crime', | |
| 'CULT': 'Culture and Tourism', | |
| 'DEMO': 'Demographics', | |
| 'DIGINF': 'Digital Infrastructure', | |
| 'ECON': 'Economy', | |
| 'ELECT': 'Elections', | |
| 'ENRG': 'Energy', | |
| 'EXTAFF': 'External Affairs', | |
| 'FINCL': 'Financial Inclusion', | |
| 'FAGRI': 'Food and Agriculture', | |
| 'FORWLD': 'Forestry and Wildlife', | |
| 'GEN': 'General', | |
| 'GOVSCM': 'Government Schemes', | |
| 'HLTH': 'Health', | |
| 'HSNG': 'Housing', | |
| 'IND': 'Industries', | |
| 'JUST': 'Justice', | |
| 'NSS': 'National Sample Survey', | |
| 'NATDIS': 'Natural Disasters', | |
| 'OTHER': 'Other', | |
| 'PETGAS': 'Petroleum and Gas', | |
| 'RURALDEV': 'Rural Development', | |
| 'SATIMG': 'Satellite Imagery Data', | |
| 'SCI': 'Science', | |
| 'SOCIOECO': 'Socio Economic', | |
| 'TRANS': 'Transportation', | |
| 'BUDGET': 'Union Budget', | |
| 'WTR': 'Water' | |
| } | |
| #Granularity_values = ["District","State","Tehsil","Other Level", "India","Assembly Constituency","Point Level","Gram Panchayat","Block","Sub-District","Village","Country"] | |
| # Short namings for Granularity_values | |
| granularity_short_codes = { | |
| 'District': 'DIS', | |
| 'State': 'STA', | |
| 'Tehsil': 'TEH', | |
| 'Other Level': 'OTH', | |
| 'India': 'IND', | |
| 'Assembly Constituency': 'AC', | |
| 'Point Level': 'PL', | |
| 'Gram Panchayat': 'GP', | |
| 'Block': 'BL', | |
| 'Sub-District': 'SD', | |
| 'Village': 'VIL', | |
| 'Country': 'CTRY' | |
| } | |
| # frequency_values = ['Yearly', 'Weekly', 'Quinquennial', 'Daily', 'Fortnightly', 'Monthly', 'Seasonally', 'Other / One Time'] | |
| # Short namings for frequency_values | |
| frequency_short_codes = { | |
| 'Yearly': 'Y', | |
| 'Weekly': 'W', | |
| 'Quinquennial': 'Q', | |
| 'Daily': 'D', | |
| 'Fortnightly': 'F', | |
| 'Monthly': 'M', | |
| 'Seasonally': 'S', | |
| 'Other / One Time': 'O' | |
| } | |
| # Read counter from file | |
| def read_counter(): | |
| try: | |
| with open('counter.txt', 'r') as f: | |
| counter = int(f.read()) | |
| except FileNotFoundError: | |
| counter = 1 # Starting counter value | |
| return counter | |
| # Update and save counter to file | |
| def update_counter(counter): | |
| with open('counter.txt', 'w') as f: | |
| f.write(str(counter)) | |
| # Generate unique dataset IDs | |
| def generate_dataset_id(counter): | |
| return f'DID{counter:03}' | |
| # Generate dataset names | |
| def generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency): | |
| granularity_short = granularity_short_codes.get(granularity, 'UNK') | |
| frequency_short = frequency_short_codes.get(frequency, 'UNK') | |
| return f'{data_source_code}-{sector_code}-{granularity_short}-{frequency_short}-{dataset_id}' | |
| # List to store existing dataset names | |
| existing_dataset_names = [] | |
| # Check if dataset name is unique | |
| def check_dataset_name_uniqueness(dataset_name): | |
| return dataset_name not in existing_dataset_names | |
| def generate_download_link(mapped_dataset): | |
| csv_file = mapped_dataset.to_csv(index=False) | |
| b64 = base64.b64encode(csv_file.encode()).decode() | |
| href = f'<a href="data:file/xlsx;base64,{b64}" download="mapped_dataset.xlsx">Download</a>' | |
| st.success('Download Mapped Dataset') | |
| st.markdown(href, unsafe_allow_html=True) | |
| # Streamlit App | |
| def main(): | |
| st.title('Dataset Naming App') | |
| # Read counter from file | |
| counter = read_counter() | |
| # User input: Data Source | |
| data_source = st.selectbox('Select Data Source', list(data_source_codes.values())) | |
| data_source_code = next(code for code, name in data_source_codes.items() if name == data_source) | |
| # Generate dataset name | |
| dataset_id = generate_dataset_id(counter) | |
| # User input: Sector | |
| sector = st.selectbox('Select Sector', list(sector_codes.values())) | |
| sector_code = next(code for code, name in sector_codes.items() if name == sector) | |
| # User input: Start Year | |
| start_year = st.number_input('Enter Start Year', min_value=2000, max_value=2100, value=2022) | |
| # User input: End Year | |
| end_year = st.number_input('Enter End Year', min_value=start_year, max_value=2100, value=2022) | |
| # User input: Granularity | |
| granularity = st.selectbox('Select Granularity', list(granularity_short_codes.keys())) | |
| # User input: Frequency | |
| frequency = st.selectbox('Select Frequency', list(frequency_short_codes.keys())) | |
| # Generate dataset name | |
| dataset_id = generate_dataset_id(counter) # Update with your actual counter | |
| dataset_name = generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency) | |
| # User input: Original Dataset Name | |
| original_dataset_name = st.text_input('Enter Original Dataset Name') | |
| # Check if the dataset name is unique | |
| is_unique = check_dataset_name_uniqueness(dataset_name) # Implement this function | |
| # Display generated dataset name | |
| st.write('Gov Data Labeler') | |
| st.write(dataset_name) | |
| # Display warning/error if dataset name is not unique | |
| if not is_unique: | |
| st.warning('Dataset name is not unique. Please generate a new name.') | |
| # Save dataset info to Excel | |
| if st.button('Save to Excel') and is_unique: | |
| data = { | |
| 'Dataset Name': [dataset_name], | |
| 'Data Source': [data_source], | |
| 'Sector': [sector], | |
| 'Start Year': [start_year], | |
| 'End Year': [end_year], | |
| 'Granularity': [granularity], | |
| 'Frequency': [frequency], | |
| 'Original Dataset Name': [original_dataset_name] | |
| } | |
| df = pd.DataFrame(data) | |
| #df.to_excel('dataset_info.xlsx', index=False) | |
| # Update and save counter to file | |
| counter += 1 | |
| update_counter(counter) | |
| generate_download_link(df) | |
| # Clear user inputs | |
| st.success('Dataset information saved to Excel.') | |
| data_source = '' | |
| sector = '' | |
| start_year = 2022 # Reset to default year | |
| end_year = 2022 | |
| granularity = '' | |
| frequency = '' | |
| original_dataset_name = '' | |
| if __name__ == '__main__': | |
| main() | |