pujithapsx's picture
initial push
e9084d7
import configparser
import os
import ast
import pandas as pd
import logging
logger = logging.getLogger("config")
# Initialize config
logger = logging.getLogger("config")
config = configparser.ConfigParser()
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Try backend/config first, then config/ as fallback
conf_file_path = os.path.join(project_root, "backend", "config", "common.properties")
if not os.path.exists(conf_file_path):
conf_file_path = os.path.join(project_root, "config", "common.properties")
if not os.path.exists(conf_file_path):
raise FileNotFoundError(f"common.properties not found. Searched in backend/config/ and config/ under {project_root}")
config.read(conf_file_path)
logger.info(f"Config loaded from: {conf_file_path}")
#temporary read
# print("CONFIG FILES LOADED:", files)
# print("SECTIONS FOUND:", config.sections())
# Export any config constants if needed
APARTMENT_IDENTIFIER = config.get("IDENTIFIERS", "APARTMENT_IDENTIFIER")
FLAT_NUMBER_IDENTIFIER = config.get("IDENTIFIERS", "FLAT_NUMBER_IDENTIFIER")
HOUSE_NUMBER_IDENTIFIER = config.get("IDENTIFIERS", "HOUSE_NUMBER_IDENTIFIER")
STREET_KEYWORD = config.get("IDENTIFIERS", "STREET_KEYWORD")
FLOOR_NO_KEYWORD=config.get("IDENTIFIERS","FLOOR_KEYWORD")
SURNAME_IDENTIFIER = ast.literal_eval(config.get("IDENTIFIERS","INDIAN_SURNAMES"))
STATE_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "STATE_MAPPING"))
CITY_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "CITY_MAPPING"))
ADDRESS_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "ADDRESS_MAPPING"))
MODEL_WEIGHTS = ast.literal_eval(config.get("MATCHING_LOGIC", "MODEL_WEIGHTS"))
MATCHING_RULES = ast.literal_eval(config.get("MATCHING_LOGIC", "MATCHING_RULES"))
# Name-specific weights (embedding 0.7 + fuzz 0.2 + phonetic 0.1)
try:
NAME_MODEL_WEIGHTS = ast.literal_eval(config.get("NAME_MATCHING", "NAME_MODEL_WEIGHTS"))
NAME_MATCH_ADJUSTMENTS = ast.literal_eval(config.get("NAME_MATCHING", "NAME_MATCH_ADJUSTMENTS"))
except Exception:
NAME_MODEL_WEIGHTS = MODEL_WEIGHTS
NAME_MATCH_ADJUSTMENTS = {"surname_penalty": -30, "initial_boost": 30, "subset_boost": 40}
# Address-specific weights (embedding + fuzz, no phonetic)
try:
ADDRESS_MODEL_WEIGHTS = ast.literal_eval(config.get("ADDRESS_MATCHING", "ADDRESS_MODEL_WEIGHTS"))
ADDRESS_MATCH_ADJUSTMENTS = ast.literal_eval(config.get("ADDRESS_MATCHING", "ADDRESS_MATCH_ADJUSTMENTS"))
except Exception:
ADDRESS_MODEL_WEIGHTS = MODEL_WEIGHTS
ADDRESS_MATCH_ADJUSTMENTS = {"house_match_boost": 30, "house_mismatch_penalty": 70}
try:
MODEL_1_NAME = config.get("EMBEDDING_MODELS", "MODEL_1_NAME").strip()
MODEL_2_NAME = config.get("EMBEDDING_MODELS", "MODEL_2_NAME").strip()
except Exception:
MODEL_1_NAME = "sentence-transformers/all-mpnet-base-v2"
MODEL_2_NAME = "sentence-transformers/all-MiniLM-L6-v2"
# =========================================================
# CSV DATA LOADING (replacing MySQL)
# =========================================================
def load_csv_file(csv_path: str, file_name: str) -> pd.DataFrame:
"""Load CSV file as DataFrame with error handling."""
try:
# Convert to absolute path relative to project root
if not os.path.isabs(csv_path):
csv_path = os.path.join(project_root, csv_path)
# Normalize path and resolve any ../ or ./ references
csv_path = os.path.abspath(csv_path)
if os.path.exists(csv_path):
df = pd.read_csv(csv_path)
logger.info(f"Loaded {file_name}: {len(df)} rows from {csv_path}")
return df
else:
logger.warning(f"CSV file not found: {csv_path}")
return pd.DataFrame()
except Exception as e:
logger.error(f"Failed to load {file_name}: {e}")
return pd.DataFrame()
# Load CSV reference tables
try:
name_variation_df = load_csv_file(config.get("csv", "name_variation_standard"), "name_variation_standard")
hno_variation_df = load_csv_file(config.get("csv", "hno_variation_standard"), "hno_variation_standard")
city_prev_pres_df = load_csv_file(config.get("csv", "city_prev_pres"), "city_prev_pres")
state_name_standard_df = load_csv_file(config.get("csv", "state_name_standard"), "state_name_standard")
sur_comm_names_df = load_csv_file(config.get("csv", "sur_comm_names"), "sur_comm_names")
pin_city_state_df = load_csv_file(config.get("csv", "pin_city_state"), "pin_city_state")
logger.info("All CSV files loaded successfully")
except Exception as e:
logger.warning(f"Some CSV files may not have loaded: {e}")
name_variation_df = pd.DataFrame()
hno_variation_df = pd.DataFrame()
city_prev_pres_df = pd.DataFrame()
state_name_standard_df = pd.DataFrame()
sur_comm_names_df = pd.DataFrame()
pin_city_state_df = pd.DataFrame()
# Legacy string exports for backward compatibility
pin_city_state = "pin_city_state"
sur_comm_names = "sur_comm_names"
city_prev_pres = "city_prev_pres"
state_name_standard = "state_name_standard"
hno_variation_standard = "hno_variation_standard"
name_variation_standard = "name_variation_standard"