import os
import sys
import json
from sklearn.metrics import f1_score
import argparse


def load_jsonl(path):
    """Load a JSONL file as a list of dicts."""
    data = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            data.append(json.loads(line))
    return data


class Scorer:
    def __init__(self, not_available_string: str, language:str):
        self.not_available_string = not_available_string
        self.return_value_for_zero_division = 0
        if language not in ["en", "it"]:
            raise ValueError(f"Unsupported language: {language}. Supported languages are 'en' and 'it'.")
        self.language = language

    def calculate_score(self, reference, submission):
        scores = []
        self.TP = 0
        self.FP = 0
        self.FN = 0        
        for ref_one_patient, sub_one_patient in zip(reference, submission):
            sub_one_patient_id, lang = sub_one_patient["document_id"].split("_", 1)
            if ref_one_patient["document_id"] != sub_one_patient_id:
                raise ValueError(
                    f"Document ID mismatch: reference {ref_one_patient['document_id']} vs submission {sub_one_patient['document_id']}"
                )
            if lang != self.language:
                raise ValueError(
                    f"Language mismatch: expected {self.language} but got {lang} in submission"
                )
            score_one_patient = self.calculate_score_one_patient(
                ref_one_patient,
                sub_one_patient,
            )
            scores.append(score_one_patient)

        if not scores:
            return 0.0
        print(f"TP: {self.TP}, FP: {self.FP}, FN: {self.FN}")   

        return sum(scores) / len(scores)

    def calculate_score_one_patient(self, reference_one_patient, submission_one_patient):
        # Expected structure:
        # reference_one_patient["annotations"] = [{"ground_truth": ...}, ...]
        # submission_one_patient["predictions"] = [{"prediction": ...}, ...]
        y_true = [item["ground_truth"] for item in reference_one_patient["annotations"]]
        y_pred = [item["prediction"] for item in submission_one_patient["predictions"]]

        for i, t, p in zip(range(len(y_true)), y_true, y_pred):
            if t != self.not_available_string or p != self.not_available_string:
                if t == p:
                    self.TP += 1
                elif t == self.not_available_string and p != self.not_available_string:
                    self.FP += 1
                elif t != p and p == self.not_available_string:
                    self.FN += 1
        f1 = f1_score(
            y_true,
            y_pred,
            average="macro",
        )
        return f1


def main(your_submission_path: str, language: str, test_or_dev: str) -> None:
    print("\n=== Scoring program starting ===")
    output_dir = "your_sumbmission_scores"

    if test_or_dev == "test":
        ref_path = 'development_data/dev_gt.jsonl'
    elif test_or_dev == "development":
        ref_path = 'development_data/dev_gt.jsonl'
    else:
        raise ValueError("test_or_dev must be either 'test' or 'development'")

    sub_path = your_submission_path

    if not os.path.exists(ref_path):
        raise FileNotFoundError(f"Reference file not found at {ref_path}")
    if not os.path.exists(sub_path):
        raise FileNotFoundError(f"Submission predictions not found at {sub_path}")

    print(f"Loading reference from {ref_path}")
    try:
        reference = load_jsonl(ref_path)
    except:
        if test_or_dev == "test":
            raise ValueError(f"Test data has not been released yet.")

    print(f"Loading submission from {sub_path}")
    submission = load_jsonl(sub_path)

    scorer = Scorer(not_available_string="unknown", language=language)
    score = scorer.calculate_score(reference, submission)

    print(f"Final macro-F1 = {score}")

    os.makedirs(output_dir, exist_ok=True)

    # Codabench reads scores.json (or scores.txt). Let's use JSON:
    scores_path = os.path.join(output_dir, "scores.json")
    with open(scores_path, "w", encoding="utf-8") as f:
        json.dump({"f1_macro": float(score)}, f)

    print(f"Scores written to {scores_path}")
    print("=== Scoring program finished successfully ===\n")


if __name__ == "__main__":
    # get from argparse the agumetns pred, ref, output, language
    argparse = argparse.ArgumentParser(description="Score submission")
    argparse.add_argument("--submission_path", type=str, help="Path to the submission JSONL")
    argparse.add_argument("--language", type=str, help="Language of the submission (en or it)")
    args = argparse.parse_args()

    your_submission_path = args.submission_path
    language = args.language
    main(your_submission_path, language, test_or_dev="development")