import re import pandas as pd TECH_SHEET_KEYWORDS: dict[str, tuple[str, ...]] = { "2g": ("2g", "gsm", "geran"), "3g": ("3g", "wcdma", "umts", "utra"), "4g": ("4g", "lte", "eutran"), } def _normalize_sheet_name(sheet_name: object) -> tuple[str, str]: normalized = re.sub(r"[^a-z0-9]+", " ", str(sheet_name).strip().lower()).strip() collapsed = normalized.replace(" ", "") return normalized, collapsed def _score_sheet_name(sheet_name: str, technology: str) -> int: keywords = TECH_SHEET_KEYWORDS.get(technology.lower(), (technology.lower(),)) normalized, collapsed = _normalize_sheet_name(sheet_name) tokens = normalized.split() best_score = 0 for keyword in keywords: if normalized == keyword: best_score = max(best_score, 100) elif keyword in tokens: best_score = max(best_score, 80) elif collapsed.endswith(keyword) or collapsed.startswith(keyword): best_score = max(best_score, 60) elif keyword in collapsed: best_score = max(best_score, 40) return best_score def _get_workbook_sheet_names(ciq_file) -> list[str]: if hasattr(ciq_file, "seek"): ciq_file.seek(0) workbook = pd.ExcelFile(ciq_file, engine="calamine") return list(workbook.sheet_names) def get_ciq_sheet_selection_details(ciq_file, technology: str) -> tuple[str, list[str]]: sheet_names = _get_workbook_sheet_names(ciq_file) if not sheet_names: raise ValueError("The uploaded Excel file does not contain any sheet.") if len(sheet_names) == 1: return sheet_names[0], sheet_names scored_matches = [] for idx, sheet_name in enumerate(sheet_names): score = _score_sheet_name(sheet_name, technology) if score > 0: scored_matches.append((score, idx, sheet_name)) if not scored_matches: joined = ", ".join(str(name) for name in sheet_names) raise ValueError( f"Unable to detect the {technology.upper()} sheet automatically. " f"Available sheets: {joined}" ) scored_matches.sort(key=lambda item: (-item[0], item[1])) return scored_matches[0][2], sheet_names def select_ciq_sheet_name(ciq_file, technology: str) -> str: selected_sheet, _sheet_names = get_ciq_sheet_selection_details( ciq_file, technology=technology ) return selected_sheet def read_ciq_excel( ciq_file, technology: str, sheet_name: str | None = None ) -> pd.DataFrame: if sheet_name is None: sheet_name = select_ciq_sheet_name(ciq_file, technology=technology) else: sheet_names = _get_workbook_sheet_names(ciq_file) if sheet_name not in sheet_names: joined = ", ".join(str(name) for name in sheet_names) raise ValueError( f"Sheet '{sheet_name}' does not exist in the uploaded Excel file. " f"Available sheets: {joined}" ) if hasattr(ciq_file, "seek"): ciq_file.seek(0) return pd.read_excel(ciq_file, sheet_name=sheet_name, engine="calamine")