Spaces:
Sleeping
Sleeping
File size: 9,094 Bytes
3a27418 e4230ac 3a27418 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 | import base64
import streamlit as st
import pandas as pd
# Define Data Source Codes and Sector Codes
data_source_codes = {
'CABSEC': 'Cabinet Secretariat',
'CAG': 'Comptroller & Auditor General',
'DAE': 'Department of Atomic Energy',
'DOS': 'Department of Space',
'ECI': 'Election Commission of India',
'HCDELHI': 'HIGH COURT OF DELHI',
'MOA': 'Ministry of Agriculture',
'MCF': 'Ministry of Chemicals & Fertilizers',
'MOCA': 'Ministry of Civil Aviation',
'MOCOAL': 'Ministry of Coal',
'MOCI': 'Ministry of Commerce & Industry',
'MOCIT': 'Ministry of Communications & Information Tech.',
'MOCF&PD': 'Ministry of Consumer Aff., Food, & Public Dist.',
'MCA': 'Ministry of Corporate Affairs',
'MOCULT': 'Ministry of Culture',
'MOD': 'Ministry of Defence',
'MDONER': 'Ministry of Development of North Eastern Region',
'MDWS': 'Ministry of Drinking Water and Sanitation',
'MOES': 'Ministry of Earth Sciences',
'MOEF': 'Ministry of Environment & Forests',
'MEA': 'Ministry of External Affairs',
'MOF': 'Ministry of Finance',
'MOFPI': 'Ministry of Food Processing Industries',
'MOHFW': 'Ministry of Health & Family Welfare',
'MHI&PE': 'Ministry of Heavy Industry & Public Enterprises',
'MHA': 'Ministry of Home Affairs',
'MOHUA': 'Ministry of Housing & Urban Poverty Alleviation',
'MHRD': 'Ministry of Human Resource Development',
'MOI&B': 'Ministry of Information & Broadcasting',
'MOL&E': 'Ministry of Labour & Employment',
'MOLJ': 'Ministry of Law & Justice',
'MSME': 'Ministry of Micro, Small and Medium Enterprises',
'MOM': 'Ministry of Mines',
'MMA': 'Ministry of Minority Affairs',
'MNRE': 'Ministry of New & Renewable Energy',
'MPR': 'Ministry of Panchayati Raj',
'MPA': 'Ministry of Parliamentary Affairs',
'MPPP': 'Ministry of Personnel, Public Grievances & Pensions',
'MPNG': 'Ministry of Petroleum & Natural Gas',
'MP': 'Ministry of Power',
'MR': 'Ministry of Railways',
'MORTH': 'Ministry of Road Transport & Highways',
'MRD': 'Ministry of Rural Development',
'MST': 'Ministry of Science & Technology',
'MS': 'Ministry of Shipping',
'MSJE': 'Ministry of Social Justice & Empowerment',
'MOSPI': 'Ministry of Statistics & Programme Implementation',
'MSTL': 'Ministry of Steel',
'MT': 'Ministry of Textiles',
'MOT': 'Ministry of Tourism',
'MTA': 'Ministry of Tribal Affairs',
'MOUD': 'Ministry of Urban Development',
'MWR': 'Ministry of Water Resources',
'MWCD': 'Ministry of Women & Child Development',
'MYAS': 'Ministry of Youth Affairs & Sports',
'PC': 'Planning Commission',
'PRES': 'President',
'PMO': "Prime Minister's Office",
'VP': 'Vice-President'
}
sector_codes = {
'AGRI': 'Agriculture',
'ANML': 'Animal Husbandry and Fisheries',
'BNK': 'Banking',
'CENS': 'Census',
'CLMT': 'Climate & Weather',
'CMDB': 'Commodity Boards',
'COMR': 'Commerce',
'CAFF': 'Consumer Affairs',
'COVID': 'Covid',
'CRIME': 'Crime',
'CULT': 'Culture and Tourism',
'DEMO': 'Demographics',
'DIGINF': 'Digital Infrastructure',
'ECON': 'Economy',
'ELECT': 'Elections',
'ENRG': 'Energy',
'EXTAFF': 'External Affairs',
'FINCL': 'Financial Inclusion',
'FAGRI': 'Food and Agriculture',
'FORWLD': 'Forestry and Wildlife',
'GEN': 'General',
'GOVSCM': 'Government Schemes',
'HLTH': 'Health',
'HSNG': 'Housing',
'IND': 'Industries',
'JUST': 'Justice',
'NSS': 'National Sample Survey',
'NATDIS': 'Natural Disasters',
'OTHER': 'Other',
'PETGAS': 'Petroleum and Gas',
'RURALDEV': 'Rural Development',
'SATIMG': 'Satellite Imagery Data',
'SCI': 'Science',
'SOCIOECO': 'Socio Economic',
'TRANS': 'Transportation',
'BUDGET': 'Union Budget',
'WTR': 'Water'
}
#Granularity_values = ["District","State","Tehsil","Other Level", "India","Assembly Constituency","Point Level","Gram Panchayat","Block","Sub-District","Village","Country"]
# Short namings for Granularity_values
granularity_short_codes = {
'District': 'DIS',
'State': 'STA',
'Tehsil': 'TEH',
'Other Level': 'OTH',
'India': 'IND',
'Assembly Constituency': 'AC',
'Point Level': 'PL',
'Gram Panchayat': 'GP',
'Block': 'BL',
'Sub-District': 'SD',
'Village': 'VIL',
'Country': 'CTRY'
}
# frequency_values = ['Yearly', 'Weekly', 'Quinquennial', 'Daily', 'Fortnightly', 'Monthly', 'Seasonally', 'Other / One Time']
# Short namings for frequency_values
frequency_short_codes = {
'Yearly': 'Y',
'Weekly': 'W',
'Quinquennial': 'Q',
'Daily': 'D',
'Fortnightly': 'F',
'Monthly': 'M',
'Seasonally': 'S',
'Other / One Time': 'O'
}
# Read counter from file
def read_counter():
try:
with open('counter.txt', 'r') as f:
counter = int(f.read())
except FileNotFoundError:
counter = 1 # Starting counter value
return counter
# Update and save counter to file
def update_counter(counter):
with open('counter.txt', 'w') as f:
f.write(str(counter))
# Generate unique dataset IDs
def generate_dataset_id(counter):
return f'DID{counter:03}'
# Generate dataset names
def generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency):
granularity_short = granularity_short_codes.get(granularity, 'UNK')
frequency_short = frequency_short_codes.get(frequency, 'UNK')
return f'{data_source_code}-{sector_code}-{granularity_short}-{frequency_short}-{dataset_id}'
# List to store existing dataset names
existing_dataset_names = []
# Check if dataset name is unique
def check_dataset_name_uniqueness(dataset_name):
return dataset_name not in existing_dataset_names
def generate_download_link(mapped_dataset):
csv_file = mapped_dataset.to_csv(index=False)
b64 = base64.b64encode(csv_file.encode()).decode()
href = f'<a href="data:file/xlsx;base64,{b64}" download="mapped_dataset.xlsx">Download</a>'
st.success('Download Mapped Dataset')
st.markdown(href, unsafe_allow_html=True)
# Streamlit App
def main():
st.title('Dataset Naming App')
# Read counter from file
counter = read_counter()
# User input: Data Source
data_source = st.selectbox('Select Data Source', list(data_source_codes.values()))
data_source_code = next(code for code, name in data_source_codes.items() if name == data_source)
# Generate dataset name
dataset_id = generate_dataset_id(counter)
# User input: Sector
sector = st.selectbox('Select Sector', list(sector_codes.values()))
sector_code = next(code for code, name in sector_codes.items() if name == sector)
# User input: Start Year
start_year = st.number_input('Enter Start Year', min_value=2000, max_value=2100, value=2022)
# User input: End Year
end_year = st.number_input('Enter End Year', min_value=start_year, max_value=2100, value=2022)
# User input: Granularity
granularity = st.selectbox('Select Granularity', list(granularity_short_codes.keys()))
# User input: Frequency
frequency = st.selectbox('Select Frequency', list(frequency_short_codes.keys()))
# Generate dataset name
dataset_id = generate_dataset_id(counter) # Update with your actual counter
dataset_name = generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency)
# User input: Original Dataset Name
original_dataset_name = st.text_input('Enter Original Dataset Name')
# Check if the dataset name is unique
is_unique = check_dataset_name_uniqueness(dataset_name) # Implement this function
# Display generated dataset name
st.write('Gov Data Labeler')
st.write(dataset_name)
# Display warning/error if dataset name is not unique
if not is_unique:
st.warning('Dataset name is not unique. Please generate a new name.')
# Save dataset info to Excel
if st.button('Save to Excel') and is_unique:
data = {
'Dataset Name': [dataset_name],
'Data Source': [data_source],
'Sector': [sector],
'Start Year': [start_year],
'End Year': [end_year],
'Granularity': [granularity],
'Frequency': [frequency],
'Original Dataset Name': [original_dataset_name]
}
df = pd.DataFrame(data)
#df.to_excel('dataset_info.xlsx', index=False)
# Update and save counter to file
counter += 1
update_counter(counter)
generate_download_link(df)
# Clear user inputs
st.success('Dataset information saved to Excel.')
data_source = ''
sector = ''
start_year = 2022 # Reset to default year
end_year = 2022
granularity = ''
frequency = ''
original_dataset_name = ''
if __name__ == '__main__':
main()
|