| import re |
|
|
| import pandas as pd |
|
|
|
|
| TECH_SHEET_KEYWORDS: dict[str, tuple[str, ...]] = { |
| "2g": ("2g", "gsm", "geran"), |
| "3g": ("3g", "wcdma", "umts", "utra"), |
| "4g": ("4g", "lte", "eutran"), |
| } |
|
|
|
|
| def _normalize_sheet_name(sheet_name: object) -> tuple[str, str]: |
| normalized = re.sub(r"[^a-z0-9]+", " ", str(sheet_name).strip().lower()).strip() |
| collapsed = normalized.replace(" ", "") |
| return normalized, collapsed |
|
|
|
|
| def _score_sheet_name(sheet_name: str, technology: str) -> int: |
| keywords = TECH_SHEET_KEYWORDS.get(technology.lower(), (technology.lower(),)) |
| normalized, collapsed = _normalize_sheet_name(sheet_name) |
| tokens = normalized.split() |
|
|
| best_score = 0 |
| for keyword in keywords: |
| if normalized == keyword: |
| best_score = max(best_score, 100) |
| elif keyword in tokens: |
| best_score = max(best_score, 80) |
| elif collapsed.endswith(keyword) or collapsed.startswith(keyword): |
| best_score = max(best_score, 60) |
| elif keyword in collapsed: |
| best_score = max(best_score, 40) |
|
|
| return best_score |
|
|
|
|
| def _get_workbook_sheet_names(ciq_file) -> list[str]: |
| if hasattr(ciq_file, "seek"): |
| ciq_file.seek(0) |
|
|
| workbook = pd.ExcelFile(ciq_file, engine="calamine") |
| return list(workbook.sheet_names) |
|
|
|
|
| def get_ciq_sheet_selection_details(ciq_file, technology: str) -> tuple[str, list[str]]: |
| sheet_names = _get_workbook_sheet_names(ciq_file) |
|
|
| if not sheet_names: |
| raise ValueError("The uploaded Excel file does not contain any sheet.") |
|
|
| if len(sheet_names) == 1: |
| return sheet_names[0], sheet_names |
|
|
| scored_matches = [] |
| for idx, sheet_name in enumerate(sheet_names): |
| score = _score_sheet_name(sheet_name, technology) |
| if score > 0: |
| scored_matches.append((score, idx, sheet_name)) |
|
|
| if not scored_matches: |
| joined = ", ".join(str(name) for name in sheet_names) |
| raise ValueError( |
| f"Unable to detect the {technology.upper()} sheet automatically. " |
| f"Available sheets: {joined}" |
| ) |
|
|
| scored_matches.sort(key=lambda item: (-item[0], item[1])) |
| return scored_matches[0][2], sheet_names |
|
|
|
|
| def select_ciq_sheet_name(ciq_file, technology: str) -> str: |
| selected_sheet, _sheet_names = get_ciq_sheet_selection_details( |
| ciq_file, technology=technology |
| ) |
| return selected_sheet |
|
|
|
|
| def read_ciq_excel( |
| ciq_file, technology: str, sheet_name: str | None = None |
| ) -> pd.DataFrame: |
| if sheet_name is None: |
| sheet_name = select_ciq_sheet_name(ciq_file, technology=technology) |
| else: |
| sheet_names = _get_workbook_sheet_names(ciq_file) |
| if sheet_name not in sheet_names: |
| joined = ", ".join(str(name) for name in sheet_names) |
| raise ValueError( |
| f"Sheet '{sheet_name}' does not exist in the uploaded Excel file. " |
| f"Available sheets: {joined}" |
| ) |
|
|
| if hasattr(ciq_file, "seek"): |
| ciq_file.seek(0) |
|
|
| return pd.read_excel(ciq_file, sheet_name=sheet_name, engine="calamine") |