File size: 944 Bytes
6782585
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import pandas as pd

def standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
    """
    标准化列名:去除空格、下划线和非可见字符
    """
    df.columns = [c.strip().replace("\xa0", "").replace(" ", "").replace("_", "") for c in df.columns]
    return df

def load_file(file) -> pd.DataFrame:
    if file.name.endswith(".csv"):
        df = pd.read_csv(file)
    elif file.name.endswith(".xlsx"):
        df = pd.read_excel(file)
    else:
        raise ValueError("File type not supported, only CSV or Excel are accepted")
    return standardize_columns(df)

def recommend_buildings(building_list, query, scorer, limit, threshold, index_builder, fuzzy_engine):
    if not query:
        return []
    idx_map = index_builder(building_list)
    keys = list(idx_map.keys())
    matches = fuzzy_engine(query.lower(), keys, scorer=scorer, limit=limit)
    return [idx_map[k] for k, score, _ in matches if score >= threshold]