Spaces:
Sleeping
Sleeping
| import re | |
| import pandas as pd | |
| TECH_SHEET_KEYWORDS: dict[str, tuple[str, ...]] = { | |
| "2g": ("2g", "gsm", "geran"), | |
| "3g": ("3g", "wcdma", "umts", "utra"), | |
| "4g": ("4g", "lte", "eutran"), | |
| } | |
| def _normalize_sheet_name(sheet_name: object) -> tuple[str, str]: | |
| normalized = re.sub(r"[^a-z0-9]+", " ", str(sheet_name).strip().lower()).strip() | |
| collapsed = normalized.replace(" ", "") | |
| return normalized, collapsed | |
| def _score_sheet_name(sheet_name: str, technology: str) -> int: | |
| keywords = TECH_SHEET_KEYWORDS.get(technology.lower(), (technology.lower(),)) | |
| normalized, collapsed = _normalize_sheet_name(sheet_name) | |
| tokens = normalized.split() | |
| best_score = 0 | |
| for keyword in keywords: | |
| if normalized == keyword: | |
| best_score = max(best_score, 100) | |
| elif keyword in tokens: | |
| best_score = max(best_score, 80) | |
| elif collapsed.endswith(keyword) or collapsed.startswith(keyword): | |
| best_score = max(best_score, 60) | |
| elif keyword in collapsed: | |
| best_score = max(best_score, 40) | |
| return best_score | |
| def _get_workbook_sheet_names(ciq_file) -> list[str]: | |
| if hasattr(ciq_file, "seek"): | |
| ciq_file.seek(0) | |
| workbook = pd.ExcelFile(ciq_file, engine="calamine") | |
| return list(workbook.sheet_names) | |
| def get_ciq_sheet_selection_details(ciq_file, technology: str) -> tuple[str, list[str]]: | |
| sheet_names = _get_workbook_sheet_names(ciq_file) | |
| if not sheet_names: | |
| raise ValueError("The uploaded Excel file does not contain any sheet.") | |
| if len(sheet_names) == 1: | |
| return sheet_names[0], sheet_names | |
| scored_matches = [] | |
| for idx, sheet_name in enumerate(sheet_names): | |
| score = _score_sheet_name(sheet_name, technology) | |
| if score > 0: | |
| scored_matches.append((score, idx, sheet_name)) | |
| if not scored_matches: | |
| joined = ", ".join(str(name) for name in sheet_names) | |
| raise ValueError( | |
| f"Unable to detect the {technology.upper()} sheet automatically. " | |
| f"Available sheets: {joined}" | |
| ) | |
| scored_matches.sort(key=lambda item: (-item[0], item[1])) | |
| return scored_matches[0][2], sheet_names | |
| def select_ciq_sheet_name(ciq_file, technology: str) -> str: | |
| selected_sheet, _sheet_names = get_ciq_sheet_selection_details( | |
| ciq_file, technology=technology | |
| ) | |
| return selected_sheet | |
| def read_ciq_excel( | |
| ciq_file, technology: str, sheet_name: str | None = None | |
| ) -> pd.DataFrame: | |
| if sheet_name is None: | |
| sheet_name = select_ciq_sheet_name(ciq_file, technology=technology) | |
| else: | |
| sheet_names = _get_workbook_sheet_names(ciq_file) | |
| if sheet_name not in sheet_names: | |
| joined = ", ".join(str(name) for name in sheet_names) | |
| raise ValueError( | |
| f"Sheet '{sheet_name}' does not exist in the uploaded Excel file. " | |
| f"Available sheets: {joined}" | |
| ) | |
| if hasattr(ciq_file, "seek"): | |
| ciq_file.seek(0) | |
| return pd.read_excel(ciq_file, sheet_name=sheet_name, engine="calamine") |