CodeYatra / mapping.py
saurabhharak's picture
Upload 7 files
e433a21
import sqlite3
import streamlit as st
import pandas as pd
from collections import defaultdict
import base64
import multiprocessing
from pg_utils_fn import create_mapped_dataset, get_state_mappings
def create_gp_mapped_dataset(dataset, mapping):
"""
Create a mapped dataset by associating gp codes with gp names in the dataset.
"""
dataset['panchayat_name'] = dataset['panchayat_name'].str.strip()
dataset['gp_code'] = dataset['panchayat_name'].str.lower().map(mapping)
dataset.loc[dataset['gp_code'].isnull(), 'gp_code'] = -2
return dataset
def fetch_gp_mapping():
"""
Fetch the gp mapping from the SQLite database.
Returns:
- A list of tuples containing the gp entity name, LGD code, name variants, and parent entity.
"""
# Connect to the SQLite database
conn = sqlite3.connect('lgd_database.db')
cursor = conn.cursor()
# Retrieve gp data from the 'gps' table
cursor.execute("SELECT entityName, entityLGDCode, entityNameVariants, entityParent FROM gp")
data = cursor.fetchall()
# Close the database connection
conn.close()
return data
def populate_gp_mapping():
"""
Populates a gp mapping dictionary using data from a database and a local file.
Returns:
A defaultdict containing the mapping of gp names to their respective codes.
"""
state_dataset = pd.read_csv('data.csv')
data = fetch_gp_mapping()
unique_rows = state_dataset.drop_duplicates(subset=['panchayat_name'])
unique_rows_lower = unique_rows.apply(lambda x: (x['panchayat_name'].strip().lower(), x['block_code']), axis=1).tolist()
entity_mapping = {}
edname = "Not Available"
for entity_name, entity_code, entity_variants, parent_code in data:
for row in unique_rows_lower:
entity_name_lower = row[0]
state_code = row[1]
if entity_name_lower == entity_name.lower() :
if int(parent_code) == int(state_code) :
entity_mapping[entity_name_lower.lower()] = entity_code
if entity_variants:
for variant in entity_variants.split(','):
entity_mapping[variant.strip().lower()] = entity_code
return entity_mapping
import pandas as pd
def main_gp():
gp_dataset = pd.read_csv('data.csv')
gp_mapping = populate_gp_mapping()
mapped_dataset = create_gp_mapped_dataset(gp_dataset, gp_mapping)
unmatched_names = mapped_dataset[mapped_dataset['gp_code'] == -2]['panchayat_name']
return unmatched_names,mapped_dataset
def main_state(dataset):
state_mapping = get_state_mappings()
mapped_dataset = create_mapped_dataset(dataset, state_mapping)
unmatched_names = mapped_dataset[mapped_dataset['state_code'] == -2]['state_name']
return unmatched_names,mapped_dataset