import os
import time
import easyocr
import cv2
import io
import re
import pandas as pd
import pytesseract

from azure.ai.vision.imageanalysis import ImageAnalysisClient
from azure.ai.vision.imageanalysis.models import VisualFeatures
from azure.core.credentials import AzureKeyCredential


# ---------------- OCR MODELS ------------------------------
def run_ocr(ocr_model:str, image_path, api_key=None, endpoint=None):

    if ocr_model == "Azure":
        return azure_ocr(image_path, api_key, endpoint)
    elif ocr_model == "EasyOCR":
        return easy_ocr_detection(image_path)

    elif ocr_model == "Pytesseract":
        return pytesseract_ocr_detection(image_path)
    
def azure_ocr(image_path,api_key, endpoint):
    image = cv2.imread(image_path)
    detected_text = []

    subscription_key = api_key
    try:
        endpoint = endpoint
        key = subscription_key
    except KeyError:
        print("Missing environment variable 'VISION_ENDPOINT' or 'VISION_KEY'")
        print("Set them before running this sample.")
        exit()

    client = ImageAnalysisClient(
        endpoint=endpoint,
        credential=AzureKeyCredential(key)
    )
    
    retval, buffer = cv2.imencode('.jpg', image)

    jpeg_bytes = buffer.tobytes()


    image_data = io.BytesIO(jpeg_bytes)
    result = client.analyze(
        image_data=image_data,
        visual_features=[VisualFeatures.READ]
    )


    if result.read is not None:
        for line in result.read.blocks[0].lines:
            text = line.text
            
            x_coords = [point['x'] for point in line.bounding_polygon]
            y_coords = [point['y'] for point in line.bounding_polygon]
            x1, y1 = min(x_coords), min(y_coords)
            x2,y2= max(x_coords), max(y_coords)
            
            rect_bbox = (x1,y1,x2,y2)
            detected_text.append((text,rect_bbox))
              
    return detected_text

def easy_ocr_detection(image_path):
    """
    width_ths (float, default = 0.5) - Maximum horizontal distance to merge boxes.
    """
    image = cv2.imread(image_path)
    reader = easyocr.Reader(['no'])
    results = reader.readtext(image, width_ths=0.6)
    detected_text = []

    for result in results:
        bbox, text, prob = result
        # bbox: [[x1,y1],[x2,y2], [x3,y3], [x4,y4]]
       
        x_coords = [point[0] for point in bbox]
        y_coords = [point[1] for point in bbox]
        
        x1, y1 = min(x_coords), min(y_coords)
        x2, y2 = max(x_coords), max(y_coords)
        rect_bbox = (int(x1), int(y1), int(x2), int(y2))
        rect_bbox = (x1,y1,x2,y2)
        detected_text.append((text,rect_bbox))

    return detected_text

def pytesseract_ocr_detection(image_path):
    
    image = cv2.imread(image_path)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    data = pytesseract.image_to_data(thresh, output_type=pytesseract.Output.DICT)

    detected_text = []
    n_boxes = len(data['text'])
    for i in range(n_boxes):
        text = data['text'][i].strip()
        if text != "":
            (x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
            rect_bbox = (x, y, x + w, y + h)
            detected_text.append((text, rect_bbox))

    return detected_text

def plot_text_bboxes(image_path,detected_text):
    img = cv2.imread(image_path)


# ------------ REGEX POST-PROCESSING OF TEXT ----------------------------------
def ocr_to_pandas(detected_text):
    """
    Stores results from OCR in Pandas Dataframe

    Args:
        - detected_text: A List with tuples containing OCR text and bounding boxes.
        Ex.: [("sov", (x1,y1,x2,y2))]
    Returns:
        - Pandas Dataframe with columns "text" and "box"
    """
    list_of_dicts = [{'text': text, 'box': box} for text, box in detected_text]
    df = pd.DataFrame(list_of_dicts)
    return df
        
def regex_from_list(df, text_list, ignore_case = True):
    """
    Use regex to find text in dataframe.

    Args:
        - df: dataframe containing column "text" from OCR
        - text_list: a list with strings we want to match with. Ex: ["sov", "stue", "kjøkken"]
        - ignore_case: bool. Accept both lower and upper case
    Returns:
        - The filtered dataframe with matched text
    
    """
    text_column = df["text"]
    if ignore_case:
        pattern = re.compile("|".join(text_list), re.IGNORECASE)
    else:
        pattern = re.compile("|".join(text_list))

    match = text_column.str.match(pattern)
    df_filtered = df[match]
    return df_filtered

def regex_from_pandas(df, pattern):
    text_column = df["text"].str.lower()
    match = text_column.str.match(pattern)
    df_filtered = df[match]
    return df_filtered

def drop_duplicate_boxes(df, box_col="box"):
   
    if df is None or df.empty or box_col not in df.columns:
        return df.copy()  

    out = df.copy()

    out["__box_key"] = out[box_col].apply(lambda bl: tuple(bl))

    out = (
        out
        .drop_duplicates(subset="__box_key", keep="first")
        .reset_index(drop=True)
        .drop(columns="__box_key")
    )
    return out

# -------- OBS! OLD -> REMOVE?  ---------

def _load_txt_files(file_path):
    with open(file_path, "r") as f:
        text = [line.strip() for line in f.readlines()]
        #text = [line.strip() for line in f]
    return text

def _find_matches(target_text, ocr_text):
    matches = []
    target_sorted = sorted(target_text, key=len, reverse=True)
    pattern = r'\b(' + '|'.join(target_sorted) + r')\b'
    for text,box in ocr_text:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            matches.extend((text,box))

    return matches

def get_rooms_text(ocr_results, file_path):
    text_path = os.path.join(os.path.dirname(__file__), file_path)
    valid_rooms = _load_txt_files(text_path)
    matched_text = _find_matches(valid_rooms, ocr_results)
    
    return matched_text


def get_byggarealer(byggareal_text, arealer_text):
    pass