| import configparser |
| import os |
| import ast |
| import pandas as pd |
| import logging |
|
|
| logger = logging.getLogger("config") |
|
|
| |
| logger = logging.getLogger("config") |
|
|
| config = configparser.ConfigParser() |
|
|
| project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
| |
| conf_file_path = os.path.join(project_root, "backend", "config", "common.properties") |
| if not os.path.exists(conf_file_path): |
| conf_file_path = os.path.join(project_root, "config", "common.properties") |
|
|
| if not os.path.exists(conf_file_path): |
| raise FileNotFoundError(f"common.properties not found. Searched in backend/config/ and config/ under {project_root}") |
|
|
| config.read(conf_file_path) |
|
|
| logger.info(f"Config loaded from: {conf_file_path}") |
| |
| |
| |
|
|
| |
| APARTMENT_IDENTIFIER = config.get("IDENTIFIERS", "APARTMENT_IDENTIFIER") |
| FLAT_NUMBER_IDENTIFIER = config.get("IDENTIFIERS", "FLAT_NUMBER_IDENTIFIER") |
| HOUSE_NUMBER_IDENTIFIER = config.get("IDENTIFIERS", "HOUSE_NUMBER_IDENTIFIER") |
| STREET_KEYWORD = config.get("IDENTIFIERS", "STREET_KEYWORD") |
| FLOOR_NO_KEYWORD=config.get("IDENTIFIERS","FLOOR_KEYWORD") |
| SURNAME_IDENTIFIER = ast.literal_eval(config.get("IDENTIFIERS","INDIAN_SURNAMES")) |
|
|
| STATE_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "STATE_MAPPING")) |
| CITY_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "CITY_MAPPING")) |
| ADDRESS_MAPPING = ast.literal_eval(config.get("MAPPING_DICT", "ADDRESS_MAPPING")) |
|
|
| MODEL_WEIGHTS = ast.literal_eval(config.get("MATCHING_LOGIC", "MODEL_WEIGHTS")) |
| MATCHING_RULES = ast.literal_eval(config.get("MATCHING_LOGIC", "MATCHING_RULES")) |
|
|
| |
| try: |
| NAME_MODEL_WEIGHTS = ast.literal_eval(config.get("NAME_MATCHING", "NAME_MODEL_WEIGHTS")) |
| NAME_MATCH_ADJUSTMENTS = ast.literal_eval(config.get("NAME_MATCHING", "NAME_MATCH_ADJUSTMENTS")) |
| except Exception: |
| NAME_MODEL_WEIGHTS = MODEL_WEIGHTS |
| NAME_MATCH_ADJUSTMENTS = {"surname_penalty": -30, "initial_boost": 30, "subset_boost": 40} |
|
|
| |
| try: |
| ADDRESS_MODEL_WEIGHTS = ast.literal_eval(config.get("ADDRESS_MATCHING", "ADDRESS_MODEL_WEIGHTS")) |
| ADDRESS_MATCH_ADJUSTMENTS = ast.literal_eval(config.get("ADDRESS_MATCHING", "ADDRESS_MATCH_ADJUSTMENTS")) |
| except Exception: |
| ADDRESS_MODEL_WEIGHTS = MODEL_WEIGHTS |
| ADDRESS_MATCH_ADJUSTMENTS = {"house_match_boost": 30, "house_mismatch_penalty": 70} |
|
|
| try: |
| MODEL_1_NAME = config.get("EMBEDDING_MODELS", "MODEL_1_NAME").strip() |
| MODEL_2_NAME = config.get("EMBEDDING_MODELS", "MODEL_2_NAME").strip() |
| except Exception: |
| MODEL_1_NAME = "sentence-transformers/all-mpnet-base-v2" |
| MODEL_2_NAME = "sentence-transformers/all-MiniLM-L6-v2" |
|
|
| |
| |
| |
|
|
| def load_csv_file(csv_path: str, file_name: str) -> pd.DataFrame: |
| """Load CSV file as DataFrame with error handling.""" |
| try: |
| |
| if not os.path.isabs(csv_path): |
| csv_path = os.path.join(project_root, csv_path) |
| |
| |
| csv_path = os.path.abspath(csv_path) |
| |
| if os.path.exists(csv_path): |
| df = pd.read_csv(csv_path) |
| logger.info(f"Loaded {file_name}: {len(df)} rows from {csv_path}") |
| return df |
| else: |
| logger.warning(f"CSV file not found: {csv_path}") |
| return pd.DataFrame() |
| except Exception as e: |
| logger.error(f"Failed to load {file_name}: {e}") |
| return pd.DataFrame() |
|
|
| |
| try: |
| name_variation_df = load_csv_file(config.get("csv", "name_variation_standard"), "name_variation_standard") |
| hno_variation_df = load_csv_file(config.get("csv", "hno_variation_standard"), "hno_variation_standard") |
| city_prev_pres_df = load_csv_file(config.get("csv", "city_prev_pres"), "city_prev_pres") |
| state_name_standard_df = load_csv_file(config.get("csv", "state_name_standard"), "state_name_standard") |
| sur_comm_names_df = load_csv_file(config.get("csv", "sur_comm_names"), "sur_comm_names") |
| pin_city_state_df = load_csv_file(config.get("csv", "pin_city_state"), "pin_city_state") |
| |
| logger.info("All CSV files loaded successfully") |
| except Exception as e: |
| logger.warning(f"Some CSV files may not have loaded: {e}") |
| name_variation_df = pd.DataFrame() |
| hno_variation_df = pd.DataFrame() |
| city_prev_pres_df = pd.DataFrame() |
| state_name_standard_df = pd.DataFrame() |
| sur_comm_names_df = pd.DataFrame() |
| pin_city_state_df = pd.DataFrame() |
|
|
| |
| pin_city_state = "pin_city_state" |
| sur_comm_names = "sur_comm_names" |
| city_prev_pres = "city_prev_pres" |
| state_name_standard = "state_name_standard" |
| hno_variation_standard = "hno_variation_standard" |
| name_variation_standard = "name_variation_standard" |
|
|
|
|
|
|