File size: 9,094 Bytes
3a27418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4230ac
3a27418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
import base64
import streamlit as st
import pandas as pd

# Define Data Source Codes and Sector Codes
data_source_codes = {
    'CABSEC': 'Cabinet Secretariat',
    'CAG': 'Comptroller & Auditor General',
    'DAE': 'Department of Atomic Energy',
    'DOS': 'Department of Space',
    'ECI': 'Election Commission of India',
    'HCDELHI': 'HIGH COURT OF DELHI',
    'MOA': 'Ministry of Agriculture',
    'MCF': 'Ministry of Chemicals & Fertilizers',
    'MOCA': 'Ministry of Civil Aviation',
    'MOCOAL': 'Ministry of Coal',
    'MOCI': 'Ministry of Commerce & Industry',
    'MOCIT': 'Ministry of Communications & Information Tech.',
    'MOCF&PD': 'Ministry of Consumer Aff., Food, & Public Dist.',
    'MCA': 'Ministry of Corporate Affairs',
    'MOCULT': 'Ministry of Culture',
    'MOD': 'Ministry of Defence',
    'MDONER': 'Ministry of Development of North Eastern Region',
    'MDWS': 'Ministry of Drinking Water and Sanitation',
    'MOES': 'Ministry of Earth Sciences',
    'MOEF': 'Ministry of Environment & Forests',
    'MEA': 'Ministry of External Affairs',
    'MOF': 'Ministry of Finance',
    'MOFPI': 'Ministry of Food Processing Industries',
    'MOHFW': 'Ministry of Health & Family Welfare',
    'MHI&PE': 'Ministry of Heavy Industry & Public Enterprises',
    'MHA': 'Ministry of Home Affairs',
    'MOHUA': 'Ministry of Housing & Urban Poverty Alleviation',
    'MHRD': 'Ministry of Human Resource Development',
    'MOI&B': 'Ministry of Information & Broadcasting',
    'MOL&E': 'Ministry of Labour & Employment',
    'MOLJ': 'Ministry of Law & Justice',
    'MSME': 'Ministry of Micro, Small and Medium Enterprises',
    'MOM': 'Ministry of Mines',
    'MMA': 'Ministry of Minority Affairs',
    'MNRE': 'Ministry of New & Renewable Energy',
    'MPR': 'Ministry of Panchayati Raj',
    'MPA': 'Ministry of Parliamentary Affairs',
    'MPPP': 'Ministry of Personnel, Public Grievances & Pensions',
    'MPNG': 'Ministry of Petroleum & Natural Gas',
    'MP': 'Ministry of Power',
    'MR': 'Ministry of Railways',
    'MORTH': 'Ministry of Road Transport & Highways',
    'MRD': 'Ministry of Rural Development',
    'MST': 'Ministry of Science & Technology',
    'MS': 'Ministry of Shipping',
    'MSJE': 'Ministry of Social Justice & Empowerment',
    'MOSPI': 'Ministry of Statistics & Programme Implementation',
    'MSTL': 'Ministry of Steel',
    'MT': 'Ministry of Textiles',
    'MOT': 'Ministry of Tourism',
    'MTA': 'Ministry of Tribal Affairs',
    'MOUD': 'Ministry of Urban Development',
    'MWR': 'Ministry of Water Resources',
    'MWCD': 'Ministry of Women & Child Development',
    'MYAS': 'Ministry of Youth Affairs & Sports',
    'PC': 'Planning Commission',
    'PRES': 'President',
    'PMO': "Prime Minister's Office",
    'VP': 'Vice-President'
}



sector_codes = {
    'AGRI': 'Agriculture',
    'ANML': 'Animal Husbandry and Fisheries',
    'BNK': 'Banking',
    'CENS': 'Census',
    'CLMT': 'Climate & Weather',
    'CMDB': 'Commodity Boards',
    'COMR': 'Commerce',
    'CAFF': 'Consumer Affairs',
    'COVID': 'Covid',
    'CRIME': 'Crime',
    'CULT': 'Culture and Tourism',
    'DEMO': 'Demographics',
    'DIGINF': 'Digital Infrastructure',
    'ECON': 'Economy',
    'ELECT': 'Elections',
    'ENRG': 'Energy',
    'EXTAFF': 'External Affairs',
    'FINCL': 'Financial Inclusion',
    'FAGRI': 'Food and Agriculture',
    'FORWLD': 'Forestry and Wildlife',
    'GEN': 'General',
    'GOVSCM': 'Government Schemes',
    'HLTH': 'Health',
    'HSNG': 'Housing',
    'IND': 'Industries',
    'JUST': 'Justice',
    'NSS': 'National Sample Survey',
    'NATDIS': 'Natural Disasters',
    'OTHER': 'Other',
    'PETGAS': 'Petroleum and Gas',
    'RURALDEV': 'Rural Development',
    'SATIMG': 'Satellite Imagery Data',
    'SCI': 'Science',
    'SOCIOECO': 'Socio Economic',
    'TRANS': 'Transportation',
    'BUDGET': 'Union Budget',
    'WTR': 'Water'
}

#Granularity_values = ["District","State","Tehsil","Other Level", "India","Assembly Constituency","Point Level","Gram Panchayat","Block","Sub-District","Village","Country"]
 



# Short namings for Granularity_values
granularity_short_codes = {
    'District': 'DIS',
    'State': 'STA',
    'Tehsil': 'TEH',
    'Other Level': 'OTH',
    'India': 'IND',
    'Assembly Constituency': 'AC',
    'Point Level': 'PL',
    'Gram Panchayat': 'GP',
    'Block': 'BL',
    'Sub-District': 'SD',
    'Village': 'VIL',
    'Country': 'CTRY'
}
# frequency_values = ['Yearly', 'Weekly', 'Quinquennial', 'Daily', 'Fortnightly', 'Monthly', 'Seasonally', 'Other / One Time']
 

# Short namings for frequency_values
frequency_short_codes = {
    'Yearly': 'Y',
    'Weekly': 'W',
    'Quinquennial': 'Q',
    'Daily': 'D',
    'Fortnightly': 'F',
    'Monthly': 'M',
    'Seasonally': 'S',
    'Other / One Time': 'O'
}


# Read counter from file
def read_counter():
    try:
        with open('counter.txt', 'r') as f:
            counter = int(f.read())
    except FileNotFoundError:
        counter = 1  # Starting counter value
    return counter

# Update and save counter to file
def update_counter(counter):
    with open('counter.txt', 'w') as f:
        f.write(str(counter))

# Generate unique dataset IDs
def generate_dataset_id(counter):
    return f'DID{counter:03}'

# Generate dataset names
def generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency):
    granularity_short = granularity_short_codes.get(granularity, 'UNK')
    frequency_short = frequency_short_codes.get(frequency, 'UNK')
    return f'{data_source_code}-{sector_code}-{granularity_short}-{frequency_short}-{dataset_id}'

# List to store existing dataset names
existing_dataset_names = []

# Check if dataset name is unique
def check_dataset_name_uniqueness(dataset_name):
    return dataset_name not in existing_dataset_names


def generate_download_link(mapped_dataset):

    csv_file = mapped_dataset.to_csv(index=False)
    b64 = base64.b64encode(csv_file.encode()).decode()
    href = f'<a href="data:file/xlsx;base64,{b64}" download="mapped_dataset.xlsx">Download</a>'
    st.success('Download Mapped Dataset')
    st.markdown(href, unsafe_allow_html=True) 

# Streamlit App
def main():
    st.title('Dataset Naming App')
    
    # Read counter from file
    counter = read_counter()
    
    # User input: Data Source
    data_source = st.selectbox('Select Data Source', list(data_source_codes.values()))
    data_source_code = next(code for code, name in data_source_codes.items() if name == data_source)
    
    # Generate dataset name
    dataset_id = generate_dataset_id(counter)
    
    # User input: Sector
    sector = st.selectbox('Select Sector', list(sector_codes.values()))
    sector_code = next(code for code, name in sector_codes.items() if name == sector)
    
    # User input: Start Year
    start_year = st.number_input('Enter Start Year', min_value=2000, max_value=2100, value=2022)
    
    # User input: End Year
    end_year = st.number_input('Enter End Year', min_value=start_year, max_value=2100, value=2022)
    
    # User input: Granularity
    granularity = st.selectbox('Select Granularity', list(granularity_short_codes.keys()))
    
    # User input: Frequency
    frequency = st.selectbox('Select Frequency', list(frequency_short_codes.keys()))
    
    # Generate dataset name
    dataset_id = generate_dataset_id(counter)  # Update with your actual counter
    dataset_name = generate_dataset_name(data_source_code, sector_code, start_year, end_year, dataset_id, granularity, frequency)
    
    # User input: Original Dataset Name
    original_dataset_name = st.text_input('Enter Original Dataset Name')
    
    # Check if the dataset name is unique
    is_unique = check_dataset_name_uniqueness(dataset_name)  # Implement this function
    
    # Display generated dataset name
    st.write('Gov Data Labeler')
    st.write(dataset_name)
    
    # Display warning/error if dataset name is not unique
    if not is_unique:
        st.warning('Dataset name is not unique. Please generate a new name.')
    
    # Save dataset info to Excel
    if st.button('Save to Excel') and is_unique:
        data = {
            'Dataset Name': [dataset_name],
            'Data Source': [data_source],
            'Sector': [sector],
            'Start Year': [start_year],
            'End Year': [end_year],
            'Granularity': [granularity],
            'Frequency': [frequency],
            'Original Dataset Name': [original_dataset_name]
        }
        df = pd.DataFrame(data)
        #df.to_excel('dataset_info.xlsx', index=False)
        
        # Update and save counter to file
        counter += 1
        update_counter(counter)
        generate_download_link(df)
        # Clear user inputs
        st.success('Dataset information saved to Excel.')
        data_source = ''
        sector = ''
        start_year = 2022  # Reset to default year
        end_year = 2022
        granularity = ''
        frequency = ''
        original_dataset_name = ''
    
if __name__ == '__main__':
    main()