from tqdm import tqdm
from transliterate import translit, detect_language
import pandas as pd
from rapidfuzz import fuzz, process


def normalize_name(name):
    """
    Нормализует строку: если обнаруживается русский язык, транслитерирует её в латиницу,
    приводит к нижнему регистру.
    """
    try:
        if detect_language(name) == 'ru':
            return translit(name, 'ru', reversed=True).lower()
    except Exception:
        pass
    return name.lower()

def prepare_groups_with_ids(items_df):
    """
    Предварительная группировка данных из items по (new_brand, type, volume, new_type_wine, sour)
    с учетом нормализованного названия.

    Добавляем столбец 'norm_name', чтобы нормализовать значение name один раз заранее.

    :param items_df: DataFrame с колонками 'new_brand', 'type', 'name', 'id', 'volume', 'new_type_wine', 'sour'.
    :return: Словарь {(new_brand, type, volume, new_type_wine, sour): [(id, name, norm_name, volume, new_type_wine, sour)]}.
    """
    items_df = items_df.copy()
    items_df['norm_name'] = items_df['name'].apply(normalize_name)

    grouped = items_df.groupby(['new_brand', 'type', 'volume', 'new_type_wine', 'sour']).apply(
        lambda x: list(zip(x['id'], x['name'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
    ).to_dict()
    return grouped

def prepare_groups_by_alternative_keys(items_df):
    """
    Группировка данных из items по (new_type_wine, new_type, volume, sour) с сохранением id, new_brand,
    оригинального и нормализованного имени.

    :param items_df: DataFrame с колонками 'new_brand', 'new_type_wine', 'new_type', 'volume', 'name', 'id', 'sour'.
    :return: Словарь {(new_type_wine, new_type, volume, sour): [(id, new_brand, name, norm_name, volume, new_type_wine, sour)]}.
    """
    items_df = items_df.copy()
    items_df['norm_name'] = items_df['name'].apply(normalize_name)

    grouped = items_df.groupby(['new_type_wine', 'new_type', 'volume', 'sour']).apply(
        lambda x: list(zip(x['id'], x['new_brand'], x['name'], x['norm_name'], x['volume'], x['new_type_wine'], x['sour'], x['year']))
    ).to_dict()
    return grouped

def new_find_matches_with_ids(products_df, items_groups, items_df, name_threshold=85):
    """
    Поиск совпадений с сохранением id найденных итемов, используя заранее подготовленные
    нормализованные группы.

    Производится два прохода:
    - Первый: поиск по группам (brand, type, volume, new_type_wine, sour);
    - Второй: для продуктов без совпадения ищем по альтернативным группам (new_type_wine, new_type, volume, sour),
      исключая итемы с исходным брендом.

    Сравнение производится по столбцу norm_name, а для вывода используется оригинальное name.

    :param products_df: DataFrame с колонками 'id', 'brand', 'type', 'name', 'volume', 'new_type_wine', 'sour', 'new_type'.
    :param items_groups: Словарь, сформированный функцией prepare_groups_with_ids.
    :param items_df: DataFrame итемов с колонками 'id', 'new_brand', 'new_type_wine', 'new_type', 'volume', 'name', 'sour'.
    :param name_threshold: Порог сходства для fuzzy matching.
    :return: DataFrame с добавленными столбцами 'matched_items' (список совпадений) и 'alternative' (альтернативные совпадения).
    """
    results = []
    no_match_products = []  # Список для хранения продуктов без совпадения в исходной группе

    # Первый проход: поиск по группам (brand, type, volume, new_type_wine, sour)
    for idx, product in tqdm(products_df.iterrows(), total=len(products_df)):
        product_brand = product['brand']
        product_type = product['type']
        product_name = product['name']
        product_volume = product['volume']
        product_type_wine = product['new_type_wine']
        product_sour = product['sour']

        key = (product_brand, product_type, product_volume, product_type_wine, product_sour)
        items_data = items_groups.get(key, [])
        if items_data:
            # Распаковываем: id, оригинальное имя, нормализованное имя, volume, new_type_wine, sour
            items_ids, items_names, items_norm_names, items_volumes, item_type_wine, items_sour, items_year = zip(*items_data)
        else:
            items_ids, items_names, items_norm_names, items_volumes, item_type_wine, items_sour, items_year = ([], [], [], [], [], [],[])

        norm_product_name = normalize_name(product_name)
        matches = process.extract(
            norm_product_name, list(items_norm_names), scorer=fuzz.ratio, score_cutoff=name_threshold
        )
        matched_items = [
            {
                'item_id': items_ids[idx_candidate],
                'item_name': items_names[idx_candidate],
                'score': score,
                'volume': items_volumes[idx_candidate],
                'color': item_type_wine[idx_candidate],
                'sour': items_sour[idx_candidate],
                'year': items_year[idx_candidate],
            }
            for match, score, idx_candidate in matches
        ]

        if not matched_items:
            no_match_products.append((idx, product))

        results.append({
            'product_id': product['id'],
            'matched_items': matched_items,
            'alternative': []  # Заполняется во втором проходе
        })

    # Подготовка альтернативной группировки по (new_type_wine, new_type, volume, sour)
    groups_by_alternative_keys = prepare_groups_by_alternative_keys(items_df)

    # Второй проход: для продуктов без совпадений ищем по альтернативным группам
    for idx, product in tqdm(no_match_products):
        product_brand = product['brand']
        product_type_wine = product['new_type_wine']
        product_type = product['new_type']
        product_volume = product['volume']
        product_name = product['name']
        product_sour = product['sour']

        alt_key = (product_type_wine, product_type, product_volume, product_sour)
        type_items = groups_by_alternative_keys.get(alt_key, [])
        # Фильтруем, исключая итемы с исходным брендом
        filtered_items = [item for item in type_items if item[1] != product_brand]
        if filtered_items:
            alt_ids, alt_brands, alt_names, alt_norm_names, alt_volumes, alt_type_wine, alt_sour, alt_year = zip(*filtered_items)
        else:
            alt_ids, alt_brands, alt_names, alt_norm_names, alt_volumes, alt_type_wine, alt_sour, alt_year = ([], [], [], [], [], [], [],[])

        norm_product_name = normalize_name(product_name)
        alt_matches = process.extract(
            norm_product_name, list(alt_norm_names), scorer=fuzz.ratio, score_cutoff=name_threshold
        )
        alt_matched_items = [
            {
                'item_id': alt_ids[idx_candidate],
                'item_name': alt_names[idx_candidate],
                'score': score,
                'volume': alt_volumes[idx_candidate],
                'color': alt_type_wine[idx_candidate],
                'sour': alt_sour[idx_candidate],
                'year': alt_year[idx_candidate],
            }
            for match, score, idx_candidate in alt_matches
        ]

        results[idx]['alternative'] = alt_matched_items

    results_df = pd.DataFrame(results)
    merged_df = products_df.merge(results_df, left_on='id', right_on='product_id').drop(columns=['product_id'])
    return merged_df