import re
import json
from datetime import datetime
from rapidfuzz import fuzz


def log_event(LOG_FILE, event_type: str, data: dict):
    LOG_FILE.parent.mkdir(exist_ok=True)
    entry = {
        "timestamp": datetime.utcnow().isoformat(),
        "event_type": event_type,
        "data": data
    }
    with open(LOG_FILE, "a") as f:
        f.write(json.dumps(entry, ensure_ascii=False) + "\n")


def display_to_llm_format(physician_points: list[dict]) -> list[dict]:
    return [
        {
            "quotes": [o["quote"] for o in point["offsets"]],
            "category": point["category"],
            "label": point["label"],
            "reasoning": point["reasoning"]
        }
        for point in physician_points
    ]


def normalize(text: str) -> str:
    return re.sub(r'\s+', ' ', text).strip()

def find_quote_offset(document: str, quote: str, threshold: int = 80) -> tuple[int, int] | None:
    norm_doc   = re.sub(r'\s+', ' ', document)
    norm_quote = re.sub(r'\s+', ' ', quote).strip()

    norm_to_orig = []
    orig_i = 0
    in_ws = False
    for ch in document:
        if re.match(r'\s', ch):
            if not in_ws:
                norm_to_orig.append(orig_i) 
                in_ws = True
        else:
            norm_to_orig.append(orig_i)
            in_ws = False
        orig_i += 1

    q_len = len(norm_quote)
    best_score = 0
    best_start = -1

    for i in range(len(norm_doc) - q_len + 1):
        window = norm_doc[i:i + q_len]
        score  = fuzz.ratio(norm_quote, window)
        if score > best_score:
            best_score = score
            best_start = i

    if best_score < threshold or best_start == -1:
        return None

    orig_start = norm_to_orig[best_start]
    orig_end   = norm_to_orig[min(best_start + q_len - 1, len(norm_to_orig) - 1)] + 1
    return (orig_start, orig_end)


def find_all_offsets(document: str, quotes: list[str]) -> list[dict]:
    """
    Find offsets for a list of quotes.
    """
    results = []
    for quote in quotes:
        offset = find_quote_offset(document, quote)
        results.append({
            "quote": quote,
            "start": offset[0] if offset else None,
            "end": offset[1] if offset else None,
            "found": offset is not None
        })
    return results