Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| import json | |
| from sklearn.metrics import f1_score | |
| import argparse | |
| def load_jsonl(path): | |
| """Load a JSONL file as a list of dicts.""" | |
| data = [] | |
| with open(path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| data.append(json.loads(line)) | |
| return data | |
| class Scorer: | |
| def __init__(self, not_available_string: str, language:str): | |
| self.not_available_string = not_available_string | |
| self.return_value_for_zero_division = 0 | |
| if language not in ["en", "it"]: | |
| raise ValueError(f"Unsupported language: {language}. Supported languages are 'en' and 'it'.") | |
| self.language = language | |
| def calculate_score(self, reference, submission): | |
| scores = [] | |
| self.TP = 0 | |
| self.FP = 0 | |
| self.FN = 0 | |
| for ref_one_patient, sub_one_patient in zip(reference, submission): | |
| sub_one_patient_id, lang = sub_one_patient["document_id"].split("_", 1) | |
| if ref_one_patient["document_id"] != sub_one_patient_id: | |
| raise ValueError( | |
| f"Document ID mismatch: reference {ref_one_patient['document_id']} vs submission {sub_one_patient['document_id']}" | |
| ) | |
| if lang != self.language: | |
| raise ValueError( | |
| f"Language mismatch: expected {self.language} but got {lang} in submission" | |
| ) | |
| score_one_patient = self.calculate_score_one_patient( | |
| ref_one_patient, | |
| sub_one_patient, | |
| ) | |
| scores.append(score_one_patient) | |
| if not scores: | |
| return 0.0 | |
| print(f"TP: {self.TP}, FP: {self.FP}, FN: {self.FN}") | |
| return sum(scores) / len(scores) | |
| def calculate_score_one_patient(self, reference_one_patient, submission_one_patient): | |
| # Expected structure: | |
| # reference_one_patient["annotations"] = [{"ground_truth": ...}, ...] | |
| # submission_one_patient["predictions"] = [{"prediction": ...}, ...] | |
| y_true = [item["ground_truth"] for item in reference_one_patient["annotations"]] | |
| y_pred = [item["prediction"] for item in submission_one_patient["predictions"]] | |
| for i, t, p in zip(range(len(y_true)), y_true, y_pred): | |
| if t != self.not_available_string or p != self.not_available_string: | |
| if t == p: | |
| self.TP += 1 | |
| elif t == self.not_available_string and p != self.not_available_string: | |
| self.FP += 1 | |
| elif t != p and p == self.not_available_string: | |
| self.FN += 1 | |
| f1 = f1_score( | |
| y_true, | |
| y_pred, | |
| average="macro", | |
| ) | |
| return f1 | |
| def main(your_submission_path: str, language: str, test_or_dev: str) -> None: | |
| print("\n=== Scoring program starting ===") | |
| output_dir = "your_sumbmission_scores" | |
| if test_or_dev == "test": | |
| ref_path = 'development_data/dev_gt.jsonl' | |
| elif test_or_dev == "development": | |
| ref_path = 'development_data/dev_gt.jsonl' | |
| else: | |
| raise ValueError("test_or_dev must be either 'test' or 'development'") | |
| sub_path = your_submission_path | |
| if not os.path.exists(ref_path): | |
| raise FileNotFoundError(f"Reference file not found at {ref_path}") | |
| if not os.path.exists(sub_path): | |
| raise FileNotFoundError(f"Submission predictions not found at {sub_path}") | |
| print(f"Loading reference from {ref_path}") | |
| try: | |
| reference = load_jsonl(ref_path) | |
| except: | |
| if test_or_dev == "test": | |
| raise ValueError(f"Test data has not been released yet.") | |
| print(f"Loading submission from {sub_path}") | |
| submission = load_jsonl(sub_path) | |
| scorer = Scorer(not_available_string="unknown", language=language) | |
| score = scorer.calculate_score(reference, submission) | |
| print(f"Final macro-F1 = {score}") | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Codabench reads scores.json (or scores.txt). Let's use JSON: | |
| scores_path = os.path.join(output_dir, "scores.json") | |
| with open(scores_path, "w", encoding="utf-8") as f: | |
| json.dump({"f1_macro": float(score)}, f) | |
| print(f"Scores written to {scores_path}") | |
| print("=== Scoring program finished successfully ===\n") | |
| if __name__ == "__main__": | |
| # get from argparse the agumetns pred, ref, output, language | |
| argparse = argparse.ArgumentParser(description="Score submission") | |
| argparse.add_argument("--submission_path", type=str, help="Path to the submission JSONL") | |
| argparse.add_argument("--language", type=str, help="Language of the submission (en or it)") | |
| args = argparse.parse_args() | |
| your_submission_path = args.submission_path | |
| language = args.language | |
| main(your_submission_path, language, test_or_dev="development") | |