import pandas as pd def standardize_columns(df: pd.DataFrame) -> pd.DataFrame: """ 标准化列名:去除空格、下划线和非可见字符 """ df.columns = [c.strip().replace("\xa0", "").replace(" ", "").replace("_", "") for c in df.columns] return df def load_file(file) -> pd.DataFrame: if file.name.endswith(".csv"): df = pd.read_csv(file) elif file.name.endswith(".xlsx"): df = pd.read_excel(file) else: raise ValueError("File type not supported, only CSV or Excel are accepted") return standardize_columns(df) def recommend_buildings(building_list, query, scorer, limit, threshold, index_builder, fuzzy_engine): if not query: return [] idx_map = index_builder(building_list) keys = list(idx_map.keys()) matches = fuzzy_engine(query.lower(), keys, scorer=scorer, limit=limit) return [idx_map[k] for k, score, _ in matches if score >= threshold]