import json
import pandas as pd
from multiprocessing import Process, Queue
from preprocess.utils.common.utils import get_delimiter
from rapidfuzz import process
from preprocess.utils.products.products import *


OUTPUT_COUNTS = True
OUTPUT_CSV = True

INCORRECT_MANUAL_MATCHINGS = {"47":"64088", "50":"", "59":"", "133":"77024", "207":"", "252":"94238", "367":"104051",
                              "674":"", "2686":"", "7986":"", "21204":"", "2007154":"15248", "2007498":"108089", "2007609":"61397",
                              "2007652":"2383", "2008041":"", "2008052":"", "2008131":"", "2008606":"", "2008647":"2036",
                              "2009069":"97208", "2009093": "81511", "2009521":"34044", "2010101":"107433", "2010586":"98170",
                              "2017376":"", "2018418":"", "2033420":"15745", "2038482":"", "2051521":"", "1261":"", "2214":"",
                              "142744":"88696", "142748":"", "142757":"7770", "142760":"13584", "4665045":"116169",
                              "22845":"", "105736":"102244", "106425":"", "106539":"61254"}

COMPLEX_MANUAL_MATCHINGS = {"22918":"", "22938":"", "22973":"", "22978":""}


def score_correct_item_to_product(item, product):
    item_to_compare = item['name']
    if 'brand' in item.keys() and item['brand'] and item['brand'] not in item['name']:
        item_to_compare = item['brand'] + " " + item['name']

    match, score, _ = process.extractOne(item_to_compare, product['name_with_brand'])
    match2, score2, _ = process.extractOne(item_to_compare, product['name_2'])
    if score2 > score:
        score = score2

    return score


def compare_matching_with_correct_func(data, qresult):
    job_index = data["index"]
    items_df = data["items_df"]
    products_df = data["products_df"]
    match_df = data["match_df"]
    manual_df = data["manual_df"]
    results = data["initial_results"]

    row_index = 0
    row_count = int(items_df.count()[0])

    result_list = []

    for index, row in items_df.iterrows():
        result_data = {}

        row_index = row_index + 1
        #print("Processing row #" + str(job_index) + "-" + str(row_index) + "/" + str(row_count))
        #if row["id"] == 25197:
        #    row_index == row_index

        result_data["id"] = row["id"]
        result_data["match_side"] = "no_match"
        result_data["match_score"] = "N"
        result_data["best_score_ex"] = ""
        result_data["manual_match_score"] = -1

        auto_match = match_df[match_df['id'] == row["id"]]
        if len(auto_match) == 0:
            print("Auto matched for item id=" + str(row["id"]) + " not found")
            results["auto_match_count_no_products"] = results["auto_match_count_no_products"] + 1
            auto_match = ""
        else:
            result_data["best_score_ex"] = auto_match["best_score_ex"].values[0]
            auto_match = auto_match["matched_items"].values[0]

            if (auto_match is not None) and isinstance(auto_match, str) and len(auto_match) > 2:
                results["auto_match_total_count"] = results["auto_match_total_count"] + 1

        manual_match = None
        manual = manual_df[manual_df['item_id'] == row["id"]]['state']
        if (len(manual) > 0) and (manual.values[0] == 1):
            p = products_df[products_df["id"] == manual_df.iloc[int(manual.index[0])]["product_id"]]

            if len(p.values) > 0:
                manual_match = p
                results["manual_match_total_count"] = results["manual_match_total_count"] + 1
                result_data["manual_match_score"] = score_correct_item_to_product(row, p)
            else:
                print("Manually matched product id=" + str(manual_df.iloc[int(manual.index[0])]["product_id"]) + " for item=" + str(row["id"]) + " not found")
                results["manual_match_count_no_products"] = results["manual_match_count_no_products"] + 1

        if (auto_match is not None) and isinstance(auto_match, str) and len(auto_match) > 2 and (manual_match is not None):
            result_data["match_side"] = "both"
            results["both_match_count"] = results["both_match_count"] + 1

            manual_id = int(manual_match["id"].values[0])
            auto_match_ns = auto_match.replace(" ", "")
            i1 = auto_match_ns.find('"id":')
            i2 = auto_match_ns.find('"id":' + str(manual_id))

            if i1 == i2 and i1 != -1 and i2 != -1:
                result_data["match_score"] = 'E'
                results["equal_match_count"] = results["equal_match_count"] + 1
            elif i2 > i1:
                pos = 0
                partial_index = 0
                result_data["match_score"] = 'P'
                while pos != -1:
                    pos = auto_match_ns.find('"id":', pos+1)
                    if pos == i2:
                        if partial_index < 5:
                            result_data["match_score"] = 'P5'
                            results["partial5_match_count"] = results["partial5_match_count"] + 1
                        elif partial_index < 10:
                            result_data["match_score"] = 'P10'
                            results["partial10_match_count"] = results["partial10_match_count"] + 1
                        else:
                            result_data["match_score"] = 'P100'
                            results["partial100_match_count"] = results["partial100_match_count"] + 1

                        break

                    partial_index = partial_index + 1

                #result_data["match_score"] = 'P'
                results["partial_match_count"] = results["partial_match_count"] + 1
            else:
                result_data["match_score"] = 'D'
        elif (auto_match is not None) and isinstance(auto_match, str) and len(auto_match) > 2:
            result_data["match_score"] = 'A'
            result_data["match_side"] = "only_auto"
            results["only_auto_match_count"] = results["only_auto_match_count"] + 1
        elif manual_match is not None:
            result_data["match_score"] = 'M'
            result_data["match_side"] = "only_manual"
            results["only_manual_match_count"] = results["only_manual_match_count"] + 1

        result_data["item"] = row["attrs"]

        result_data["item_id"] = row["id"]
        if row["orig_brand"]:
            result_data["item_name"] = row["orig_brand"] + " " + row["orig_name"]
        else:
            result_data["item_name"] = row["orig_name"]


        result_data["auto_match"] = auto_match

        manual_string = ""
        if (manual_match is not None):
            manual_string = '{' + \
                        '"id": ' + str(manual_match["id"].values[0]) + ',' + \
                        '"brand": "' + str(manual_match["brand"].values[0]) + '",' + \
                        '"name": "' + str(manual_match["orig_name"].values[0]) + '",' + \
                        '"volume": ' + str(manual_match["volume"].values[0]) + '",' + \
                        '"year": ' + str(manual_match["year"].values[0]) + '"}'

        result_data["manual_match"] = manual_string

        if manual_match is not None:
            result_data["product_id"] = manual_match["id"].values[0]
            result_data["product_name"] = manual_match["orig_name"].values[0]
        else:
            result_data["product_id"] = ""
            result_data["product_name"] = ""

        result_list.append(result_data)

    qresult.put([results, result_list])


def compare_matching_with_correct(products_file, items_file, match_result_file, manual_result_file, processor, csv_result_file, counts_result_file):

    #csv_delimiter = get_delimiter(products_file)
    #products_df = pd.read_csv(products_file, sep=csv_delimiter, on_bad_lines='skip')

    prods_data = get_latest_products()
    if not prods_data or not os.path.isfile(prods_data["path"]):
        raise Exception("Actual products data not found")

    products_df = prods_data["df_products"]

    csv_delimiter = get_delimiter(items_file)
    items_df_raw = pd.read_csv(items_file, sep=csv_delimiter, on_bad_lines='skip')

    csv_delimiter = get_delimiter(match_result_file)
    match_df = pd.read_csv(match_result_file, sep=csv_delimiter)

    csv_delimiter = get_delimiter(manual_result_file)
    manual_df = pd.read_csv(manual_result_file, sep=csv_delimiter)

    #items_df_raw = items_df_raw[0:100]
    items_df = processor.preprocessor.process_items(items_df_raw.copy())
    items_df["attrs"] = items_df_raw["attrs"]

    results = {
        "item_count" : int(items_df.count()[0]),
        "product_count" : int(products_df.count()[0]),

        "total_cm_match_count_percent": 0,
        "equal_cm_match_count_percent": 0,
        "partial_cm_match_count_percent": 0,
        "partial5_cm_match_count_percent": 0,
        "partial10_cm_match_count_percent": 0,
        "partial100_cm_match_count_percent": 0,

        "total_match_count": 0,
        "total_match_count_percent": 0,

        "equal_match_count": 0,
        "equal_match_count_percent": 0,

        "partial_match_count": 0,
        "partial_match_count_percent": 0,

        "partial5_match_count": 0,
        "partial10_match_count": 0,
        "partial100_match_count": 0,

        "only_auto_match_count": 0,
        "only_auto_match_percent": 0,

        "only_manual_match_count": 0,
        "only_manual_match_percent": 0,

        "auto_match_total_count": 0,
        "auto_match_total_percent": 0,

        "manual_match_total_count" : 0,
        "manual_match_total_percent": 0,
        "manual_match_count_no_products": 0,

        "auto_match_count_no_products": 0,
        "both_match_count": 0,
    }

    result_list = []

    threads_data = list()
    chunk_size = len(items_df) // 2 + 1
    num_chunks = len(items_df) // chunk_size + 1
    for i in range(num_chunks):
        chunk = items_df[i * chunk_size:(i + 1) * chunk_size]
        data = {"index": i, "items_df": chunk, "products_df": products_df, "match_df":match_df, "manual_df":manual_df, "initial_results":results }

        q = Queue()
        p = Process(target=compare_matching_with_correct_func, args=(data, q,))
        p.start()

        threads_data.append({"index": i, "q": q})


    for td in threads_data:
        td["result"] = td["q"].get()

    for td in threads_data:
        t_result = td["result"][0]
        t_result_list = td["result"][1]
        result_list.extend(t_result_list)

        results["total_match_count"] = results["total_match_count"] + t_result["total_match_count"]
        results["equal_match_count"] = results["equal_match_count"] + t_result["equal_match_count"]
        results["partial_match_count"] = results["partial_match_count"] + t_result["partial_match_count"]
        results["partial5_match_count"] = results["partial5_match_count"] + t_result["partial5_match_count"]
        results["partial10_match_count"] = results["partial10_match_count"] + t_result["partial10_match_count"]
        results["partial100_match_count"] = results["partial100_match_count"] + t_result["partial100_match_count"]
        results["only_auto_match_count"] = results["only_auto_match_count"] + t_result["only_auto_match_count"]
        results["only_manual_match_count"] = results["only_manual_match_count"] + t_result["only_manual_match_count"]
        results["auto_match_total_count"] = results["auto_match_total_count"] + t_result["auto_match_total_count"]
        results["manual_match_total_count"] = results["manual_match_total_count"] + t_result["manual_match_total_count"]
        results["manual_match_count_no_products"] = results["manual_match_count_no_products"] + t_result["manual_match_count_no_products"]
        results["auto_match_count_no_products"] = results["auto_match_count_no_products"] + t_result["auto_match_count_no_products"]
        results["both_match_count"] = results["both_match_count"] + t_result["both_match_count"]


    results['manual_match_total_percent'] = round(results["manual_match_total_count"] * 100 / results["item_count"], 2)
    results['auto_match_total_percent'] = round(results["auto_match_total_count"] * 100 / results["item_count"], 2)

    results["only_auto_match_percent"] = round(results["only_auto_match_count"] * 100 / results["item_count"], 2)
    results["only_manual_match_percent"] = round(results["only_manual_match_count"] * 100 / results["item_count"], 2)

    results['equal_match_count_percent'] = round(results["equal_match_count"] * 100 / results["item_count"], 2)
    results['partial_match_count_percent'] = round(results["partial_match_count"] * 100 / results["item_count"], 2)


    results['total_match_count'] = results['equal_match_count'] + results['partial_match_count']
    results['total_match_count_percent'] = results['equal_match_count_percent'] + results['partial_match_count_percent']

    results['total_cm_match_count_percent'] = round(results['total_match_count'] * 100 / results["manual_match_total_count"], 2)
    results['equal_cm_match_count_percent'] = round(results['equal_match_count'] * 100 / results["manual_match_total_count"], 2)
    results['partial_cm_match_count_percent'] = round(results['partial_match_count'] * 100 / results["manual_match_total_count"], 2)

    results['partial5_cm_match_count_percent'] = round(results['partial5_match_count'] * 100 / results["manual_match_total_count"], 2)
    results['partial10_cm_match_count_percent'] = round(results['partial10_match_count'] * 100 / results["manual_match_total_count"], 2)
    results['partial100_cm_match_count_percent'] = round(results['partial100_match_count'] * 100 / results["manual_match_total_count"], 2)

    if OUTPUT_CSV:
        results_df = pd.DataFrame(result_list)
        results_df.to_csv(csv_result_file, float_format='%.2f')

    if OUTPUT_COUNTS:
        with open(counts_result_file, 'w') as fn:
            fn.write(json.dumps(results, indent=4))

    print(results)

    return results