import configparser import os import ast import pandas as pd import logging logger = logging.getLogger("config") # Initialize config logger = logging.getLogger("config") config = configparser.ConfigParser() project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Try backend/config first, then config/ as fallback conf_file_path = os.path.join(project_root, "backend", "config", "common.properties") if not os.path.exists(conf_file_path): conf_file_path = os.path.join(project_root, "config", "common.properties") if not os.path.exists(conf_file_path): raise FileNotFoundError(f"common.properties not found. Searched in backend/config/ and config/ under {project_root}") config.read(conf_file_path) logger.info(f"Config loaded from: {conf_file_path}") #temporary read # print("CONFIG FILES LOADED:", files) # print("SECTIONS FOUND:", config.sections()) # Export any config constants if needed APARTMENT_IDENTIFIER = config.get("IDENTIFIERS", "APARTMENT_IDENTIFIER") FLAT_NUMBER_IDENTIFIER = config.get("IDENTIFIERS", "FLAT_NUMBER_IDENTIFIER") HOUSE_NUMBER_IDENTIFIER = config.get("IDENTIFIERS", "HOUSE_NUMBER_IDENTIFIER") STREET_KEYWORD = config.get("IDENTIFIERS", "STREET_KEYWORD") FLOOR_NO_KEYWORD=config.get("IDENTIFIERS","FLOOR_KEYWORD") SURNAME_IDENTIFIER = ast.literal_eval(config.get("IDENTIFIERS","INDIAN_SURNAMES")) STATE_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "STATE_MAPPING")) CITY_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "CITY_MAPPING")) ADDRESS_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "ADDRESS_MAPPING")) MODEL_WEIGHTS = ast.literal_eval(config.get("MATCHING_LOGIC", "MODEL_WEIGHTS")) MATCHING_RULES = ast.literal_eval(config.get("MATCHING_LOGIC", "MATCHING_RULES")) # Name-specific weights (embedding 0.7 + fuzz 0.2 + phonetic 0.1) try: NAME_MODEL_WEIGHTS = ast.literal_eval(config.get("NAME_MATCHING", "NAME_MODEL_WEIGHTS")) NAME_MATCH_ADJUSTMENTS = ast.literal_eval(config.get("NAME_MATCHING", "NAME_MATCH_ADJUSTMENTS")) except Exception: NAME_MODEL_WEIGHTS = MODEL_WEIGHTS NAME_MATCH_ADJUSTMENTS = {"surname_penalty": -30, "initial_boost": 30, "subset_boost": 40} # Address-specific weights (embedding + fuzz, no phonetic) try: ADDRESS_MODEL_WEIGHTS = ast.literal_eval(config.get("ADDRESS_MATCHING", "ADDRESS_MODEL_WEIGHTS")) ADDRESS_MATCH_ADJUSTMENTS = ast.literal_eval(config.get("ADDRESS_MATCHING", "ADDRESS_MATCH_ADJUSTMENTS")) except Exception: ADDRESS_MODEL_WEIGHTS = MODEL_WEIGHTS ADDRESS_MATCH_ADJUSTMENTS = {"house_match_boost": 30, "house_mismatch_penalty": 70} try: MODEL_1_NAME = config.get("EMBEDDING_MODELS", "MODEL_1_NAME").strip() MODEL_2_NAME = config.get("EMBEDDING_MODELS", "MODEL_2_NAME").strip() except Exception: MODEL_1_NAME = "sentence-transformers/all-mpnet-base-v2" MODEL_2_NAME = "sentence-transformers/all-MiniLM-L6-v2" # ========================================================= # CSV DATA LOADING (replacing MySQL) # ========================================================= def load_csv_file(csv_path: str, file_name: str) -> pd.DataFrame: """Load CSV file as DataFrame with error handling.""" try: # Convert to absolute path relative to project root if not os.path.isabs(csv_path): csv_path = os.path.join(project_root, csv_path) # Normalize path and resolve any ../ or ./ references csv_path = os.path.abspath(csv_path) if os.path.exists(csv_path): df = pd.read_csv(csv_path) logger.info(f"Loaded {file_name}: {len(df)} rows from {csv_path}") return df else: logger.warning(f"CSV file not found: {csv_path}") return pd.DataFrame() except Exception as e: logger.error(f"Failed to load {file_name}: {e}") return pd.DataFrame() # Load CSV reference tables try: name_variation_df = load_csv_file(config.get("csv", "name_variation_standard"), "name_variation_standard") hno_variation_df = load_csv_file(config.get("csv", "hno_variation_standard"), "hno_variation_standard") city_prev_pres_df = load_csv_file(config.get("csv", "city_prev_pres"), "city_prev_pres") state_name_standard_df = load_csv_file(config.get("csv", "state_name_standard"), "state_name_standard") sur_comm_names_df = load_csv_file(config.get("csv", "sur_comm_names"), "sur_comm_names") pin_city_state_df = load_csv_file(config.get("csv", "pin_city_state"), "pin_city_state") logger.info("All CSV files loaded successfully") except Exception as e: logger.warning(f"Some CSV files may not have loaded: {e}") name_variation_df = pd.DataFrame() hno_variation_df = pd.DataFrame() city_prev_pres_df = pd.DataFrame() state_name_standard_df = pd.DataFrame() sur_comm_names_df = pd.DataFrame() pin_city_state_df = pd.DataFrame() # Legacy string exports for backward compatibility pin_city_state = "pin_city_state" sur_comm_names = "sur_comm_names" city_prev_pres = "city_prev_pres" state_name_standard = "state_name_standard" hno_variation_standard = "hno_variation_standard" name_variation_standard = "name_variation_standard"