diff --git a/.ipynb_checkpoints/Gen_llm_eval_output-checkpoint.py b/.ipynb_checkpoints/Gen_llm_eval_output-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.ipynb_checkpoints/get_model_info-checkpoint.py b/.ipynb_checkpoints/get_model_info-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..bc5c6dd630dc417dac307eb6530d74cc7867d77a --- /dev/null +++ b/.ipynb_checkpoints/get_model_info-checkpoint.py @@ -0,0 +1,129 @@ +""" +MODEL METADATA EXTRACTOR + +This script processes model evaluation output files (input_folder) from the lm-eval-harness library, +extracts model identifiers, retrieves detailed metadata from HuggingFace +and saves the information as structured JSON files (output_folder). + +Input: Directory containing .out files from lm-eval-harness +Output: Directory with JSON files containing model metadata +""" + +# Example input file format (lm-eval-harness output): +''' +hf (pretrained=swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA,trust_remote_code=True), gen_kwargs: (None), limit: None, num_fewshot: 5, batch_size: 1 +| Tasks |Version|Filter|n-shot| Metric | |Value | |Stderr| +|------------------------|------:|------|-----:|--------|---|-----:|---|------| +|evalita-mp | 1|none | |acc |↑ |0.5605|± |0.0052| +... +Job completed +''' + +# Example output JSON format: +''' +{ + "model": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA", + "base_model": "LlamaForCausalLM", + "revision": "2b6e46e4c9d341dc8bf8350a167492c880116b66", + "submitted_time": "2024-04-29 09:34:12+00:00", + "num_params_billion": 8.030261248, + "language": "en_it" +} +''' + +import os +import re +import json +from huggingface_hub import HfApi + +# Configures the Hugging Face token (if needed) +# TOKEN = "YOUR_HUGGINGFACE_API_TOKEN" +api = HfApi() + +# Directory paths +# input_folder: Directory containing the output files of the lm-eval-harness library, including model accuracy metrics. +#input_folder = "../evalita_llm_models_output/" +input_folder = "/home/sfarzi/leaderboard/evalita_llm_leaderboard/task_result/" +# output_folder: Directory where JSON files with model characteristics will be saved. +output_folder = "/home/sfarzi/leaderboard/evalita_llm_leaderboard/e3c_llm_requests/" + +# Creates the output folder if it doesn't exist +os.makedirs(output_folder, exist_ok=True) + +# Regular expression to find the model name +model_pattern = re.compile(r"pretrained=([\w\-./]+)") + +# Scans files in the input folder +for filename in os.listdir(input_folder): + if filename.endswith('.out'): + file_path = os.path.join(input_folder, filename) + + # Reads the file content + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Extracts the model name + match = model_pattern.search(content) + if match: + model_name = match.group(1) + print(f"Processing model: {model_name}") + + try: + # Retrieves model information from HuggingFace + model_info = api.model_info(model_name) + + # Calculates the number of parameters in billions, if available + num_params = None + if model_info.safetensors and "BF16" in model_info.safetensors.parameters: + num_params = model_info.safetensors.parameters["BF16"] / 1e9 # Convert to billions + + # Extracts and concatenates languages + language = "_".join(model_info.card_data.get("language", [])) if model_info.card_data else "" + + #print(model_info) + + # Builds the dictionary with required metadata + model_data = { + "model": model_name, + "base_model": model_info.config.get("architectures", [""])[0] if model_info.config else "", + "revision": model_info.sha, + # "precision": "bfloat16", # If available, replace with real value + # "weight_type": "Original", + # "status": "FINISHED", + "submitted_time": str(model_info.created_at), + # "model_type": "pretrained", + # "likes": model_info.likes, + # "params": model_info.safetensors_size_in_bytes / 1e9 if model_info.safetensors_size_in_bytes else None, + # "license": model_info.license, + # "private": model_info.private, + "num_params_billion": num_params, # Number of parameters in billions + "language": language, # Extracted language + } + + # Separates the model_name into two parts: directory name and file name + if "/" in model_name: + dir_name, file_name = model_name.split("/", 1) + else: + dir_name, file_name = model_name, model_name # If no "/", use the same name + + # Creates the folder for saving the produced json files + model_output_folder = os.path.join(output_folder, dir_name) + os.makedirs(model_output_folder, exist_ok=True) + + # Saves the JSON file in the appropriate folder + output_file = os.path.join(model_output_folder, f"{file_name}.json") + + # Check if the file already exists + if os.path.exists(output_file): + print(f"File {output_file} already exists. Skipping...") + continue + + with open(output_file, "w", encoding="utf-8") as f: + json.dump(model_data, f, indent=4) + + print(f"Saved metadata for {model_name} in {output_file}") + + except Exception as e: + print(f"Error retrieving info for {model_name}: {e}") + + print("Process finished!") \ No newline at end of file diff --git a/.ipynb_checkpoints/preprocess_models_output-checkpoint.py b/.ipynb_checkpoints/preprocess_models_output-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..a5d666145eaa39d0fcd6cc411b9264996bd8da57 --- /dev/null +++ b/.ipynb_checkpoints/preprocess_models_output-checkpoint.py @@ -0,0 +1,250 @@ +""" +EVALITA LLM EVALUATION PROCESSOR + +Transforms raw model evaluation outputs into structured performance reports for leaderboard integration. + +DATA PIPELINE OVERVIEW: + +1. Inputs: + - Evaluation Results: Raw .out files from lm-eval-harness + - Model Metadata: Pre-collected .json files from HuggingFace + +2. Output: + - Comprehensive evaluation reports in JSON format + - Ready for ingestion into the evaluation leaderboard + +-------------------------------------------------------------------- +INPUT SPECIFICATION + +Evaluation Results (.out format): + hf (pretrained=model-org/model-name), num_fewshot: 5, batch_size: 1 + | Task | Metric | Value | Stderr | + |---------------|--------|--------|--------| + | main-task | acc | 0.5605 | 0.0052 | + | - sub-task | acc | 0.4640 | 0.0088 | + | - prompt-1 | acc | 0.3720 | 0.0216 | + +Model Metadata (.json format): + { + "model": "model-org/model-name", + "base_model": "ModelArchitecture", + "revision": "git_commit_hash", + "parameters": 8.03, + "language": "en_it" + } + +-------------------------------------------------------------------- +OUTPUT SPECIFICATION + +Evaluation Report (.json format): + { + "summary_metrics": { + "average_CPS": 41.74, + "num_tasks": 12 + }, + "model_config": { + "identifier": "model-org/model-name", + "architecture": "ModelArchitecture", + "parameters": 8.03, + "evaluation_settings": { + "fewshot": 5, + "batch_size": 1 + } + }, + "task_results": { + "task-name": { + "average_score": 52.60, + "best_prompt": { + "id": "prompt-6", + "score": 66.57 + }, + "prompt_analysis": [ + { + "prompt_id": "prompt-1", + "score": 37.20, + "stderr": 0.0216 + } + ] + } + } + } +""" + +import json +import os +import re + +def safe_float(value): + """Safely converts a value to float, returning None if the conversion fails.""" + try: + return float(value) + except ValueError: + return None + + +def calculate_task_metrics(task_info): + """Calculates average accuracy, best prompt accuracy, and CPS for a given task.""" + accuracies = [prompt['value'] for prompt in task_info['prompts'] if prompt['value'] is not None] + + if not accuracies: + return None + + task_info['average_accuracy'] = sum(accuracies) / len(accuracies) + best_prompt_data = max(task_info['prompts'], key=lambda x: x['value']) + task_info['best_prompt'] = best_prompt_data['value'] + task_info['prompt_id'] = best_prompt_data['prompt'] + + # Calculate CPS + avg_acc = task_info['average_accuracy'] + best_acc = task_info['best_prompt'] + task_info['CPS'] = (1 - (best_acc - avg_acc) / 100) * best_acc + + +def extract_data_from_file(file_path): + """Extracts task and prompt data from a specified file.""" + with open(file_path, 'r') as file: + lines = file.readlines() + + tasks_data = {} + current_task = None + + for line in lines: + line = line.strip() + + # Skips empty lines + if not line: + continue + + # Skips header lines + if line.startswith("| Tasks"): + continue + + # Extracts model configuration details + if line.startswith("hf (pretrained="): + start = line.find("pretrained=") + len("pretrained=") + end = line.find(",", start) + pretrained_model = line[start:end] + + num_fewshot_match = re.search(r"num_fewshot:\s*([\w\d]+)", line) + num_fewshot = num_fewshot_match.group(1) if num_fewshot_match else None + + batch_size_match = re.search(r"batch_size:\s*(\d+)", line) + batch_size = int(batch_size_match.group(1)) if batch_size_match else None + + continue + + columns = line.split('|') + if len(columns) != 11: + continue + + task_name = columns[1] + metric = columns[5].strip() + value = safe_float(columns[7]) + stderr = safe_float(columns[9]) + print (value) + # Skips normalized accuracy metrics + if metric == "acc_norm": + continue + + # Identifies task and prompt sections in the file + if task_name.startswith(" - "): + task_name = task_name[3:].strip() + current_task = task_name + tasks_data.setdefault(current_task, + {'prompts': [], 'average_accuracy': 0, 'best_prompt': None, 'prompt_id': None, + 'CPS': None}) + + elif task_name.startswith(" - ") and current_task: + prompt_name = task_name[4:].strip() + prompt_data = {'prompt': prompt_name, 'metric': metric, 'value': value * 100, + 'stderr': stderr} + tasks_data[current_task]['prompts'].append(prompt_data) + + # Special handling for evalita NER task to calculate weighted prompt averages + if "evalita NER" in tasks_data: + task_info = tasks_data["evalita NER"] + weight_map = {"ADG prompt-1": 521, "ADG prompt-2": 521, "FIC prompt-1": 1517, "FIC prompt-2": 1517, + "WN prompt-1": 2088, "WN prompt-2": 2088} + + weighted_values = {"prompt-1": 0, "prompt-2": 0} + total_weights = sum(weight_map.values()) + + for prompt in task_info['prompts']: + if prompt['prompt'] in weight_map: + if "prompt-1" in prompt['prompt']: + weighted_values["prompt-1"] += weight_map[prompt['prompt']] * prompt['value'] + elif "prompt-2" in prompt['prompt']: + weighted_values["prompt-2"] += weight_map[prompt['prompt']] * prompt['value'] + + task_info['prompts'] = [ + {"prompt": "prompt-1", "metric": "acc", "value": weighted_values["prompt-1"] / total_weights, + 'stderr': None}, + {"prompt": "prompt-2", "metric": "acc", "value": weighted_values["prompt-2"] / total_weights, + 'stderr': None}] + + # Calculates task metrics for each task + for task_info in tasks_data.values(): + calculate_task_metrics(task_info) + + # Calculates the average CPS across all tasks + tasks_with_cps = [task['CPS'] for task in tasks_data.values() if task['CPS'] is not None] + average_CPS = sum(tasks_with_cps) / len(tasks_with_cps) if tasks_with_cps else 0 + + config = { + "model_name": pretrained_model, + "num_fewshot": num_fewshot, + "batch_size": batch_size + } + + return {'average_CPS': average_CPS, 'config': config, 'tasks': tasks_data} + + +""" +MAIN PROCESSING PIPELINE + +This script executes the complete evaluation data processing workflow: + +1. Input Sources: + - Raw evaluation results (.out files) from: ../evalita_llm_models_output/ + - Model metadata JSON files from: ../evalita_llm_requests/ + +2. Processing Steps: + - Parses evaluation metrics from .out files + - Combines with model metadata + - Calculates aggregated performance statistics + +3. Output: + - Structured JSON results saved to: ../evalita_llm_results/ + - Organized by model organization/name + - Contains complete evaluation results with metadata +""" +directory_in_path = '/home/sfarzi/leaderboard/evalita_llm_leaderboard/task_result/' +directory_in_requests_path = '/home/sfarzi/leaderboard/evalita_llm_leaderboard/evalita_llm_requests/' +directory_out_results_path = '/home/sfarzi/leaderboard/evalita_llm_leaderboard/evalita_llm_results/' + +for filename in os.listdir(directory_in_path): + if filename.endswith('.out'): + file_path = os.path.join(directory_in_path, filename) + json_output = extract_data_from_file(file_path) + + model_org_name, model_name = json_output['config']['model_name'].split('/') + + + config_file_path = os.path.join(directory_in_requests_path, model_org_name, f"{model_name}.json") + + if os.path.exists(config_file_path): + with open(config_file_path, 'r', encoding='utf-8') as config_file: + additional_config = json.load(config_file) + json_output['config'].update(additional_config) + + + org_folder_path = os.path.join(directory_out_results_path, model_org_name) + os.makedirs(org_folder_path, exist_ok=True) + + file_suffix = f"{json_output['config']['num_fewshot']}" + output_file_path = os.path.join(org_folder_path, f"{model_name}_{file_suffix}.json") + + with open(output_file_path, 'w', newline="\n") as outfile: + json.dump(json_output, outfile, indent=4) + + print(f"File {filename} processed and saved to {output_file_path}") \ No newline at end of file diff --git a/Gen_llm_eval_output.py b/Gen_llm_eval_output.py new file mode 100644 index 0000000000000000000000000000000000000000..9b430b7fe2ba0e890000b87db82549f049a4b270 --- /dev/null +++ b/Gen_llm_eval_output.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 + +#python Gen_llm_eval_output.py --p1 csv_files/llm_scores_p1.xlsx --p2 csv_files/llm_scores_p2.xlsx --p3 csv_files/llm_scores_p3.xlsx --output-dir csv_files/outputs +import argparse +import os +import re +import math +import pandas as pd +import numpy as np + +REQUIRED_COLS = ["model", "task", "language", "configuration", "prompts", "f1"] + +def read_scores(path: str) -> pd.DataFrame: + df = pd.read_excel(path) + # normalize columns + df.columns = [c.strip().lower() for c in df.columns] + if "prompts" not in df.columns and "prompt" in df.columns: + df["prompts"] = df["prompt"] + missing = [c for c in REQUIRED_COLS if c not in df.columns] + if missing: + raise ValueError(f"{path} is missing required columns: {missing}") + # keep only required, coerce f1 to numeric + df = df[REQUIRED_COLS].copy() + df["f1"] = pd.to_numeric(df["f1"], errors="coerce") + df = df.dropna(subset=["f1"]) + return df + +def sanitize_filename(s: str) -> str: + return re.sub(r"[^0-9A-Za-z._\-+]+", "_", str(s).strip()) + +def format_float(x): + if x is None or (isinstance(x, float) and (math.isnan(x) or math.isinf(x))): + return "nan" + return f"{x:.4f}" + +def prompt_order_key(label: str): + # Sort by the number in "prompt-" if present; fallback to string + m = re.search(r"(\d+)", str(label)) + return (0, int(m.group(1))) if m else (1, str(label)) + +def render_group_table(g: pd.DataFrame, model: str, language: str, configuration: str) -> str: + # Collect all prompt-level f1 values (across tasks and prompts) + prompt_values = g["f1"].to_numpy(dtype=float) + if prompt_values.size > 0: + gen_value = float(np.mean(prompt_values)) + gen_stderr = float(np.std(prompt_values, ddof=1) / math.sqrt(len(prompt_values))) if len(prompt_values) > 1 else 0.0 + else: + gen_value, gen_stderr = float("nan"), 0.0 + + # Build table text + if configuration=="0shot" : configuration='0' + if configuration=="10shot" : configuration='10' + model = model.split("__")[0]+'/'+model.split("__")[1] + #if model =='Henrychur__MMed-Llama-3-8B' : model='Henrychur/MMed-Llama-3-8B' + #if model =='HiTZ__Medical-mT5-large' : model='' + #if model =='Qwen__Qwen2.5-14B-Instruct-1M' : model='Qwen/'+model + #if model =='Qwen__Qwen2.5-32B-Instruct' : model='Qwen/'+model + #if model =='Qwen__Qwen3-30B-A3B-Instruct-2507' : model='Qwen/'+model + #if model =='deepseek-ai__DeepSeek-R1-Distill-Qwen-32B' : model='' + #if model =='epfl-llm__meditron-7b' : model='' + #if model =='google__gemma-2-9b-it' : model='' + #if model =='google__gemma-3-27b-it' : model='' + #if model =='google__medgemma-27b-text-it' : model='' + #if model =='google__medgemma-4b-it' : model='' + #if model =='microsoft__MediPhi-Clinical' : model='' + #if model =='microsoft__MediPhi-Instruct' : model='' + #if model =='mistralai__Mistral-7B-Instruct-v0.2' : model='' + #if model =='mistralai__Mistral-Nemo-Instruct-2407' : model='' + #if model =='tiiuae__Falcon3-10B-Instruct' : model='' + #if model =='unsloth__phi-4' : model='' + #if model =='Henrychur__MMed-Llama-3-8B' : model='' + + header = f"hf (pretrained={model} ), num_fewshot: {configuration}, batch_size: 1" + lines = [ + "|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr|", + "|-------|-------|------|------|------|----|------|---|------|", + #f"|Gen | | | |f1 | |{format_float(gen_value)} |---| {format_float(gen_stderr)} |", + ] + + # For each task, add task row (mean over prompts) then prompt rows + for task, df_task in g.groupby("task", sort=False): + f1s = df_task["f1"].to_numpy(dtype=float) + task_mean = float(np.mean(f1s)) if f1s.size else float("nan") + lines.append(f"| - {task} | | | |f1 | | {format_float(task_mean)} | |0 |") + + # Prompt-level rows, sorted by prompt number if available + df_task = df_task.copy() + df_task["_order"] = df_task["prompts"].map(prompt_order_key) + df_task = df_task.sort_values("_order") + for _, r in df_task.iterrows(): + prompt_label = str(r["prompts"]) + lines.append(f"| - {prompt_label} | | | |f1 | | {format_float(r['f1'])} | | 0 |") + + return header + "\n" + "\n".join(lines) + "\n" + +def main(): + ap = argparse.ArgumentParser(description="Build per-(model,language,configuration) summaries from three prompt Excel files.") + ap.add_argument("--p1", required=True, help="Path to llm_scores_p1.xlsx") + ap.add_argument("--p2", required=True, help="Path to llm_scores_p2.xlsx") + ap.add_argument("--p3", required=True, help="Path to llm_scores_p3.xlsx") + ap.add_argument("--output-dir", required=True, help="Directory to write output files") + args = ap.parse_args() + + os.makedirs(args.output_dir, exist_ok=True) + + df = pd.concat([read_scores(args.p1), read_scores(args.p2), read_scores(args.p3)], ignore_index=True) + + # One file per (model, language, configuration) + for (model, language, config), g in df.groupby(["model", "language", "configuration"], sort=False): + content = render_group_table(g, model, language, config) + fname = f"{sanitize_filename(model)}__{sanitize_filename(language)}__{sanitize_filename(config)}.txt" + out_path = os.path.join(args.output_dir, fname) + with open(out_path, "w", encoding="utf-8") as f: + f.write(content) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/csv_files/llm_scores_p1.xlsx b/csv_files/llm_scores_p1.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a6d5790fd6e7b7ce5f7d819925710d57f375f3ce Binary files /dev/null and b/csv_files/llm_scores_p1.xlsx differ diff --git a/csv_files/llm_scores_p2.xlsx b/csv_files/llm_scores_p2.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..26af80a6cd45b0ec82097dafc6d4b8105945ecda Binary files /dev/null and b/csv_files/llm_scores_p2.xlsx differ diff --git a/csv_files/llm_scores_p3.xlsx b/csv_files/llm_scores_p3.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bfc0bdc185caa4e7f0530b277867503d7df485fc Binary files /dev/null and b/csv_files/llm_scores_p3.xlsx differ diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..e05928730646b5bbe21f9238db4c5f0a0d80aeb1 --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0918 | |0 | +| - p1 | | | |f1 | | 0.0629 | | 0 | +| - p2 | | | |f1 | | 0.1041 | | 0 | +| - p3 | | | |f1 | | 0.1083 | | 0 | +| - re | | | |f1 | | 0.2604 | |0 | +| - p1 | | | |f1 | | 0.1287 | | 0 | +| - p2 | | | |f1 | | 0.3394 | | 0 | +| - p3 | | | |f1 | | 0.3131 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8524c34316a59100e3a1b7fe29f11627397324e2 --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2142 | |0 | +| - p1 | | | |f1 | | 0.2189 | | 0 | +| - p2 | | | |f1 | | 0.2243 | | 0 | +| - p3 | | | |f1 | | 0.1994 | | 0 | +| - re | | | |f1 | | 0.1429 | |0 | +| - p1 | | | |f1 | | 0.1189 | | 0 | +| - p2 | | | |f1 | | 0.1668 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..060db5c37f5e74766dafb07e5bd52433091e0e1f --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0611 | |0 | +| - p1 | | | |f1 | | 0.0620 | | 0 | +| - p2 | | | |f1 | | 0.0592 | | 0 | +| - p3 | | | |f1 | | 0.0620 | | 0 | +| - re | | | |f1 | | 0.0863 | |0 | +| - p1 | | | |f1 | | 0.1017 | | 0 | +| - p2 | | | |f1 | | 0.0506 | | 0 | +| - p3 | | | |f1 | | 0.1065 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..11f686a3d7c0dcd6950a0fb8f89dd76050e16c83 --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1474 | |0 | +| - p1 | | | |f1 | | 0.1667 | | 0 | +| - p2 | | | |f1 | | 0.1089 | | 0 | +| - p3 | | | |f1 | | 0.1667 | | 0 | +| - re | | | |f1 | | 0.0937 | |0 | +| - p1 | | | |f1 | | 0.0821 | | 0 | +| - p2 | | | |f1 | | 0.1053 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..79142bf6a355f4c1b35bed6c13b7f881099a0dc1 --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0416 | |0 | +| - p1 | | | |f1 | | 0.0435 | | 0 | +| - p2 | | | |f1 | | 0.0429 | | 0 | +| - p3 | | | |f1 | | 0.0384 | | 0 | +| - re | | | |f1 | | 0.1413 | |0 | +| - p1 | | | |f1 | | 0.0672 | | 0 | +| - p2 | | | |f1 | | 0.2266 | | 0 | +| - p3 | | | |f1 | | 0.1300 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6f2990ce3c55074691eb8bd620702df9f4473b4 --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3753 | |0 | +| - p1 | | | |f1 | | 0.3299 | | 0 | +| - p2 | | | |f1 | | 0.4023 | | 0 | +| - p3 | | | |f1 | | 0.3938 | | 0 | +| - re | | | |f1 | | 0.1102 | |0 | +| - p1 | | | |f1 | | 0.0977 | | 0 | +| - p2 | | | |f1 | | 0.1226 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f77ba6027c7c615df2d261d1d686f39a2c89c34d --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0379 | |0 | +| - p1 | | | |f1 | | 0.0379 | | 0 | +| - p2 | | | |f1 | | 0.0378 | | 0 | +| - p3 | | | |f1 | | 0.0379 | | 0 | +| - re | | | |f1 | | 0.0891 | |0 | +| - p1 | | | |f1 | | 0.0602 | | 0 | +| - p2 | | | |f1 | | 0.1293 | | 0 | +| - p3 | | | |f1 | | 0.0778 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0c3def3445572250475395490871abca32b311e --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3966 | |0 | +| - p1 | | | |f1 | | 0.3992 | | 0 | +| - p2 | | | |f1 | | 0.3916 | | 0 | +| - p3 | | | |f1 | | 0.3992 | | 0 | +| - re | | | |f1 | | 0.1026 | |0 | +| - p1 | | | |f1 | | 0.0998 | | 0 | +| - p2 | | | |f1 | | 0.1055 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..77e42bb661f69d4f83bdff576a0a1fd1ce9f6b66 --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0385 | |0 | +| - p1 | | | |f1 | | 0.0387 | | 0 | +| - p2 | | | |f1 | | 0.0380 | | 0 | +| - p3 | | | |f1 | | 0.0387 | | 0 | +| - re | | | |f1 | | 0.0174 | |0 | +| - p1 | | | |f1 | | 0.0121 | | 0 | +| - p2 | | | |f1 | | 0.0280 | | 0 | +| - p3 | | | |f1 | | 0.0121 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..15a0ace9ef2a7ac547c13784312b2d11cbb1972c --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3507 | |0 | +| - p1 | | | |f1 | | 0.3444 | | 0 | +| - p2 | | | |f1 | | 0.3632 | | 0 | +| - p3 | | | |f1 | | 0.3444 | | 0 | +| - re | | | |f1 | | 0.0889 | |0 | +| - p1 | | | |f1 | | 0.0734 | | 0 | +| - p2 | | | |f1 | | 0.1045 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..45005591fb2829f67890b8537514d09a7dc4dbd9 --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0438 | |0 | +| - p1 | | | |f1 | | 0.0429 | | 0 | +| - p2 | | | |f1 | | 0.0456 | | 0 | +| - p3 | | | |f1 | | 0.0429 | | 0 | +| - re | | | |f1 | | 0.1278 | |0 | +| - p1 | | | |f1 | | 0.0967 | | 0 | +| - p2 | | | |f1 | | 0.1900 | | 0 | +| - p3 | | | |f1 | | 0.0967 | | 0 | diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..5f374b71d20e644f0671d320fec3ebab424a9b3c --- /dev/null +++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3720 | |0 | +| - p1 | | | |f1 | | 0.3558 | | 0 | +| - p2 | | | |f1 | | 0.4045 | | 0 | +| - p3 | | | |f1 | | 0.3558 | | 0 | +| - re | | | |f1 | | 0.0784 | |0 | +| - p1 | | | |f1 | | 0.0787 | | 0 | +| - p2 | | | |f1 | | 0.0781 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__en__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..94e2aee8ae1f1577bf03eb67d3b30e97a960a4cf --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0578 | |0 | +| - p1 | | | |f1 | | 0.0940 | | 0 | +| - p2 | | | |f1 | | 0.0331 | | 0 | +| - p3 | | | |f1 | | 0.0464 | | 0 | +| - re | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4f9360b72c56f3110b77d1b06a158c3c6ecca91c --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1317 | |0 | +| - p1 | | | |f1 | | 0.1215 | | 0 | +| - p2 | | | |f1 | | 0.1415 | | 0 | +| - p3 | | | |f1 | | 0.1322 | | 0 | +| - re | | | |f1 | | 0.0022 | |0 | +| - p1 | | | |f1 | | 0.0028 | | 0 | +| - p2 | | | |f1 | | 0.0016 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__gr__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1f9afed0d2736add07bd96741b281b9502a70539 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0769 | |0 | +| - p1 | | | |f1 | | 0.0859 | | 0 | +| - p2 | | | |f1 | | 0.0591 | | 0 | +| - p3 | | | |f1 | | 0.0859 | | 0 | +| - re | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e844447cbaeb4035cc3afc6dbb0324986c8d42f --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1448 | |0 | +| - p1 | | | |f1 | | 0.1455 | | 0 | +| - p2 | | | |f1 | | 0.1434 | | 0 | +| - p3 | | | |f1 | | 0.1455 | | 0 | +| - re | | | |f1 | | 0.0015 | |0 | +| - p1 | | | |f1 | | 0.0024 | | 0 | +| - p2 | | | |f1 | | 0.0007 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__it__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..822472aab17e5a54c699eb0299168e25ce68eff7 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__it__0shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0812 | |0 | +| - p1 | | | |f1 | | 0.0770 | | 0 | +| - p2 | | | |f1 | | 0.0920 | | 0 | +| - p3 | | | |f1 | | 0.0747 | | 0 | +| - re | | | |f1 | | 0.0000 | |0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1af9a4668db160d1341a685de4bbe1e2ba89c092 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1694 | |0 | +| - p1 | | | |f1 | | 0.1616 | | 0 | +| - p2 | | | |f1 | | 0.1774 | | 0 | +| - p3 | | | |f1 | | 0.1690 | | 0 | +| - re | | | |f1 | | 0.0050 | |0 | +| - p1 | | | |f1 | | 0.0035 | | 0 | +| - p2 | | | |f1 | | 0.0064 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__pl__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..372f91a2ce5382b434fbeee549424ceb5717daf6 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0308 | |0 | +| - p1 | | | |f1 | | 0.0244 | | 0 | +| - p2 | | | |f1 | | 0.0436 | | 0 | +| - p3 | | | |f1 | | 0.0244 | | 0 | +| - re | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1d6a4ccbeef1fef677e1a9104fd7232605eb8113 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1516 | |0 | +| - p1 | | | |f1 | | 0.1500 | | 0 | +| - p2 | | | |f1 | | 0.1548 | | 0 | +| - p3 | | | |f1 | | 0.1500 | | 0 | +| - re | | | |f1 | | 0.0031 | |0 | +| - p1 | | | |f1 | | 0.0040 | | 0 | +| - p2 | | | |f1 | | 0.0023 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sk__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..6d8451e3fdfc0093ad807d5613fddf15e4d63446 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0712 | |0 | +| - p1 | | | |f1 | | 0.0880 | | 0 | +| - p2 | | | |f1 | | 0.0375 | | 0 | +| - p3 | | | |f1 | | 0.0880 | | 0 | +| - re | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b69b27b4369aa5ddf3032f60ca53684cd2ce4f4 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1444 | |0 | +| - p1 | | | |f1 | | 0.1485 | | 0 | +| - p2 | | | |f1 | | 0.1360 | | 0 | +| - p3 | | | |f1 | | 0.1485 | | 0 | +| - re | | | |f1 | | 0.0031 | |0 | +| - p1 | | | |f1 | | 0.0038 | | 0 | +| - p2 | | | |f1 | | 0.0024 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sl__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..6279ec191b39a30feefe58b94e3b6aecdb33d129 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0711 | |0 | +| - p1 | | | |f1 | | 0.0777 | | 0 | +| - p2 | | | |f1 | | 0.0579 | | 0 | +| - p3 | | | |f1 | | 0.0777 | | 0 | +| - re | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..abeb73b685970065381ae98fa4cb8ba945ebb468 --- /dev/null +++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1422 | |0 | +| - p1 | | | |f1 | | 0.1470 | | 0 | +| - p2 | | | |f1 | | 0.1325 | | 0 | +| - p3 | | | |f1 | | 0.1470 | | 0 | +| - re | | | |f1 | | 0.0073 | |0 | +| - p1 | | | |f1 | | 0.0073 | | 0 | +| - p2 | | | |f1 | | 0.0074 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8ea6919fab287107f11b133a6f4f393a8742e6e5 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2500 | |0 | +| - p1 | | | |f1 | | 0.3425 | | 0 | +| - p2 | | | |f1 | | 0.1181 | | 0 | +| - p3 | | | |f1 | | 0.2893 | | 0 | +| - re | | | |f1 | | 0.4075 | |0 | +| - p1 | | | |f1 | | 0.4135 | | 0 | +| - p2 | | | |f1 | | 0.3917 | | 0 | +| - p3 | | | |f1 | | 0.4172 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8271ede54c280963d5edbe250ee11c0bb7b431fb --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5993 | |0 | +| - p1 | | | |f1 | | 0.6091 | | 0 | +| - p2 | | | |f1 | | 0.5646 | | 0 | +| - p3 | | | |f1 | | 0.6243 | | 0 | +| - re | | | |f1 | | 0.6179 | |0 | +| - p1 | | | |f1 | | 0.6332 | | 0 | +| - p2 | | | |f1 | | 0.6025 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a72d0a6eb08700f9441ae748027f10d73015687 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1290 | |0 | +| - p1 | | | |f1 | | 0.1339 | | 0 | +| - p2 | | | |f1 | | 0.1191 | | 0 | +| - p3 | | | |f1 | | 0.1339 | | 0 | +| - re | | | |f1 | | 0.3957 | |0 | +| - p1 | | | |f1 | | 0.3796 | | 0 | +| - p2 | | | |f1 | | 0.4266 | | 0 | +| - p3 | | | |f1 | | 0.3810 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce16010a359dbdd63efc64adc978c235ecc87764 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6028 | |0 | +| - p1 | | | |f1 | | 0.6119 | | 0 | +| - p2 | | | |f1 | | 0.5847 | | 0 | +| - p3 | | | |f1 | | 0.6119 | | 0 | +| - re | | | |f1 | | 0.5993 | |0 | +| - p1 | | | |f1 | | 0.5962 | | 0 | +| - p2 | | | |f1 | | 0.6024 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0564d5bfe6a8a54aec02c9377516537b66eff33e --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2137 | |0 | +| - p1 | | | |f1 | | 0.2467 | | 0 | +| - p2 | | | |f1 | | 0.1709 | | 0 | +| - p3 | | | |f1 | | 0.2234 | | 0 | +| - re | | | |f1 | | 0.4016 | |0 | +| - p1 | | | |f1 | | 0.4173 | | 0 | +| - p2 | | | |f1 | | 0.3770 | | 0 | +| - p3 | | | |f1 | | 0.4106 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1ee4c0b5a7e89753a9acf1141ebf450a99b7c92b --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6569 | |0 | +| - p1 | | | |f1 | | 0.6719 | | 0 | +| - p2 | | | |f1 | | 0.6327 | | 0 | +| - p3 | | | |f1 | | 0.6661 | | 0 | +| - re | | | |f1 | | 0.5882 | |0 | +| - p1 | | | |f1 | | 0.5767 | | 0 | +| - p2 | | | |f1 | | 0.5998 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..ae1fccd474c06d066ad5f6113cf27772e8cf3e7d --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0586 | |0 | +| - p1 | | | |f1 | | 0.0697 | | 0 | +| - p2 | | | |f1 | | 0.0364 | | 0 | +| - p3 | | | |f1 | | 0.0697 | | 0 | +| - re | | | |f1 | | 0.4022 | |0 | +| - p1 | | | |f1 | | 0.3803 | | 0 | +| - p2 | | | |f1 | | 0.4464 | | 0 | +| - p3 | | | |f1 | | 0.3800 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7d08d53f7f8da84ee76b1ccef550be7d0137bacc --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6092 | |0 | +| - p1 | | | |f1 | | 0.6226 | | 0 | +| - p2 | | | |f1 | | 0.5824 | | 0 | +| - p3 | | | |f1 | | 0.6226 | | 0 | +| - re | | | |f1 | | 0.5729 | |0 | +| - p1 | | | |f1 | | 0.5991 | | 0 | +| - p2 | | | |f1 | | 0.5466 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4e8edce7f1cc06218baf35e09e089db7a3aac7c --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0955 | |0 | +| - p1 | | | |f1 | | 0.1220 | | 0 | +| - p2 | | | |f1 | | 0.0426 | | 0 | +| - p3 | | | |f1 | | 0.1220 | | 0 | +| - re | | | |f1 | | 0.4116 | |0 | +| - p1 | | | |f1 | | 0.4027 | | 0 | +| - p2 | | | |f1 | | 0.4294 | | 0 | +| - p3 | | | |f1 | | 0.4027 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..55c60843461a0fb906dded1f9d91cbd69d051d89 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6419 | |0 | +| - p1 | | | |f1 | | 0.6386 | | 0 | +| - p2 | | | |f1 | | 0.6486 | | 0 | +| - p3 | | | |f1 | | 0.6386 | | 0 | +| - re | | | |f1 | | 0.5869 | |0 | +| - p1 | | | |f1 | | 0.5894 | | 0 | +| - p2 | | | |f1 | | 0.5845 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..91942eaddebd802b373ccbcc131ea59cecb06f2a --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3398 | |0 | +| - p1 | | | |f1 | | 0.3910 | | 0 | +| - p2 | | | |f1 | | 0.2375 | | 0 | +| - p3 | | | |f1 | | 0.3910 | | 0 | +| - re | | | |f1 | | 0.3777 | |0 | +| - p1 | | | |f1 | | 0.3775 | | 0 | +| - p2 | | | |f1 | | 0.3783 | | 0 | +| - p3 | | | |f1 | | 0.3775 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..d29c5ef26ed259e4a5c4f37fe042a47532bf0bca --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6371 | |0 | +| - p1 | | | |f1 | | 0.6467 | | 0 | +| - p2 | | | |f1 | | 0.6178 | | 0 | +| - p3 | | | |f1 | | 0.6467 | | 0 | +| - re | | | |f1 | | 0.5865 | |0 | +| - p1 | | | |f1 | | 0.5949 | | 0 | +| - p2 | | | |f1 | | 0.5782 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..67cd229daf9590c732ecee234ea4ff29e1fb915a --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3279 | |0 | +| - p1 | | | |f1 | | 0.3804 | | 0 | +| - p2 | | | |f1 | | 0.3068 | | 0 | +| - p3 | | | |f1 | | 0.2964 | | 0 | +| - re | | | |f1 | | 0.4658 | |0 | +| - p1 | | | |f1 | | 0.4734 | | 0 | +| - p2 | | | |f1 | | 0.4649 | | 0 | +| - p3 | | | |f1 | | 0.4591 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..98c263a0ce8135387f875e16815b08f66bc70bc9 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5895 | |0 | +| - p1 | | | |f1 | | 0.5970 | | 0 | +| - p2 | | | |f1 | | 0.5602 | | 0 | +| - p3 | | | |f1 | | 0.6113 | | 0 | +| - re | | | |f1 | | 0.6475 | |0 | +| - p1 | | | |f1 | | 0.6482 | | 0 | +| - p2 | | | |f1 | | 0.6469 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..05cb7403b2abda67e1498c0a9acb091c5a6657cd --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4506 | |0 | +| - p1 | | | |f1 | | 0.5976 | | 0 | +| - p2 | | | |f1 | | 0.1568 | | 0 | +| - p3 | | | |f1 | | 0.5976 | | 0 | +| - re | | | |f1 | | 0.4104 | |0 | +| - p1 | | | |f1 | | 0.4393 | | 0 | +| - p2 | | | |f1 | | 0.4083 | | 0 | +| - p3 | | | |f1 | | 0.3834 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..39118aac5734199451ebf0092c3f66c2a027e293 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6175 | |0 | +| - p1 | | | |f1 | | 0.6196 | | 0 | +| - p2 | | | |f1 | | 0.6131 | | 0 | +| - p3 | | | |f1 | | 0.6196 | | 0 | +| - re | | | |f1 | | 0.5905 | |0 | +| - p1 | | | |f1 | | 0.5913 | | 0 | +| - p2 | | | |f1 | | 0.5896 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..140b11ef154cb225fce693e5d9a1e1b236ff2aae --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2734 | |0 | +| - p1 | | | |f1 | | 0.3758 | | 0 | +| - p2 | | | |f1 | | 0.1647 | | 0 | +| - p3 | | | |f1 | | 0.2796 | | 0 | +| - re | | | |f1 | | 0.4370 | |0 | +| - p1 | | | |f1 | | 0.4505 | | 0 | +| - p2 | | | |f1 | | 0.4159 | | 0 | +| - p3 | | | |f1 | | 0.4447 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8abf4953ca9e869eaadd34cd7b1b71f0564fe64d --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.7005 | |0 | +| - p1 | | | |f1 | | 0.6934 | | 0 | +| - p2 | | | |f1 | | 0.7152 | | 0 | +| - p3 | | | |f1 | | 0.6930 | | 0 | +| - re | | | |f1 | | 0.5698 | |0 | +| - p1 | | | |f1 | | 0.5801 | | 0 | +| - p2 | | | |f1 | | 0.5595 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..74e43fcb74eaf8965a95c4edda457f3dfd9e7bb9 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2428 | |0 | +| - p1 | | | |f1 | | 0.2486 | | 0 | +| - p2 | | | |f1 | | 0.2311 | | 0 | +| - p3 | | | |f1 | | 0.2486 | | 0 | +| - re | | | |f1 | | 0.4074 | |0 | +| - p1 | | | |f1 | | 0.3865 | | 0 | +| - p2 | | | |f1 | | 0.4569 | | 0 | +| - p3 | | | |f1 | | 0.3788 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4c15da5a66a4a537cb948eac8895ab2afa19b229 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6006 | |0 | +| - p1 | | | |f1 | | 0.6008 | | 0 | +| - p2 | | | |f1 | | 0.6004 | | 0 | +| - p3 | | | |f1 | | 0.6008 | | 0 | +| - re | | | |f1 | | 0.5863 | |0 | +| - p1 | | | |f1 | | 0.5858 | | 0 | +| - p2 | | | |f1 | | 0.5868 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4b349425adec625295b2bc60e755750d9ab7a12 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3375 | |0 | +| - p1 | | | |f1 | | 0.3578 | | 0 | +| - p2 | | | |f1 | | 0.2968 | | 0 | +| - p3 | | | |f1 | | 0.3578 | | 0 | +| - re | | | |f1 | | 0.4031 | |0 | +| - p1 | | | |f1 | | 0.3971 | | 0 | +| - p2 | | | |f1 | | 0.4152 | | 0 | +| - p3 | | | |f1 | | 0.3971 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..dcbfcb3c84c61260eda9f92c142b11c06833a8e0 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6720 | |0 | +| - p1 | | | |f1 | | 0.6743 | | 0 | +| - p2 | | | |f1 | | 0.6673 | | 0 | +| - p3 | | | |f1 | | 0.6743 | | 0 | +| - re | | | |f1 | | 0.5659 | |0 | +| - p1 | | | |f1 | | 0.5733 | | 0 | +| - p2 | | | |f1 | | 0.5586 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..727e1f367a9ba18d2d48d8535269ed56ab992195 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3183 | |0 | +| - p1 | | | |f1 | | 0.3344 | | 0 | +| - p2 | | | |f1 | | 0.2863 | | 0 | +| - p3 | | | |f1 | | 0.3344 | | 0 | +| - re | | | |f1 | | 0.4048 | |0 | +| - p1 | | | |f1 | | 0.3979 | | 0 | +| - p2 | | | |f1 | | 0.4186 | | 0 | +| - p3 | | | |f1 | | 0.3979 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..6a189a78b7994ae2175efe2d4f3f45fd3750a7d1 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6373 | |0 | +| - p1 | | | |f1 | | 0.6253 | | 0 | +| - p2 | | | |f1 | | 0.6615 | | 0 | +| - p3 | | | |f1 | | 0.6253 | | 0 | +| - re | | | |f1 | | 0.5921 | |0 | +| - p1 | | | |f1 | | 0.5992 | | 0 | +| - p2 | | | |f1 | | 0.5849 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..c7a31b115206073f648d171a641a1e9adb9f0395 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - re | | | |f1 | | 0.4141 | |0 | +| - p1 | | | |f1 | | 0.4394 | | 0 | +| - p2 | | | |f1 | | 0.4031 | | 0 | +| - p3 | | | |f1 | | 0.3997 | | 0 | +| - ner | | | |f1 | | 0.4445 | |0 | +| - p2 | | | |f1 | | 0.4162 | | 0 | +| - p3 | | | |f1 | | 0.4729 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..22d1b67e335ec0c6e0d18331be46a57669999465 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5907 | |0 | +| - p1 | | | |f1 | | 0.5986 | | 0 | +| - p2 | | | |f1 | | 0.5593 | | 0 | +| - p3 | | | |f1 | | 0.6143 | | 0 | +| - re | | | |f1 | | 0.5206 | |0 | +| - p1 | | | |f1 | | 0.5150 | | 0 | +| - p2 | | | |f1 | | 0.5261 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c0489ce88c04df68da7d52a58b5b2cea1ca8fcb --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4368 | |0 | +| - p1 | | | |f1 | | 0.4291 | | 0 | +| - p2 | | | |f1 | | 0.4521 | | 0 | +| - p3 | | | |f1 | | 0.4291 | | 0 | +| - re | | | |f1 | | 0.3776 | |0 | +| - p1 | | | |f1 | | 0.3733 | | 0 | +| - p2 | | | |f1 | | 0.3799 | | 0 | +| - p3 | | | |f1 | | 0.3798 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..3508c1e76ad1b9919674ab452e90707ae71cf34c --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5999 | |0 | +| - p1 | | | |f1 | | 0.6164 | | 0 | +| - p2 | | | |f1 | | 0.5669 | | 0 | +| - p3 | | | |f1 | | 0.6164 | | 0 | +| - re | | | |f1 | | 0.5112 | |0 | +| - p1 | | | |f1 | | 0.5015 | | 0 | +| - p2 | | | |f1 | | 0.5209 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4971eb61a49dd67508ed082c97db8f1c2abf7d7 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3572 | |0 | +| - p1 | | | |f1 | | 0.0885 | | 0 | +| - p2 | | | |f1 | | 0.5316 | | 0 | +| - p3 | | | |f1 | | 0.4514 | | 0 | +| - re | | | |f1 | | 0.3959 | |0 | +| - p1 | | | |f1 | | 0.3784 | | 0 | +| - p2 | | | |f1 | | 0.4123 | | 0 | +| - p3 | | | |f1 | | 0.3972 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..92a7b91f95d2c5d73e4fa25813a905aadb2e20b9 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6673 | |0 | +| - p1 | | | |f1 | | 0.6793 | | 0 | +| - p2 | | | |f1 | | 0.6447 | | 0 | +| - p3 | | | |f1 | | 0.6778 | | 0 | +| - re | | | |f1 | | 0.5940 | |0 | +| - p1 | | | |f1 | | 0.6041 | | 0 | +| - p2 | | | |f1 | | 0.5838 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb0b6e78f15d6744550214a39f1ec2b912ae6e4e --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4235 | |0 | +| - p1 | | | |f1 | | 0.4332 | | 0 | +| - p2 | | | |f1 | | 0.4043 | | 0 | +| - p3 | | | |f1 | | 0.4332 | | 0 | +| - re | | | |f1 | | 0.4186 | |0 | +| - p1 | | | |f1 | | 0.4152 | | 0 | +| - p2 | | | |f1 | | 0.4220 | | 0 | +| - p3 | | | |f1 | | 0.4187 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2eab77cf504508cdd676550c15f1ef50adcfc17e --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6118 | |0 | +| - p1 | | | |f1 | | 0.6276 | | 0 | +| - p2 | | | |f1 | | 0.5803 | | 0 | +| - p3 | | | |f1 | | 0.6276 | | 0 | +| - re | | | |f1 | | 0.5151 | |0 | +| - p1 | | | |f1 | | 0.5103 | | 0 | +| - p2 | | | |f1 | | 0.5200 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f1aa20ba1b8dbdfbbdd9c3765af245f7dc02003 --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3287 | |0 | +| - p1 | | | |f1 | | 0.3231 | | 0 | +| - p2 | | | |f1 | | 0.3398 | | 0 | +| - p3 | | | |f1 | | 0.3231 | | 0 | +| - re | | | |f1 | | 0.3943 | |0 | +| - p1 | | | |f1 | | 0.3980 | | 0 | +| - p2 | | | |f1 | | 0.3867 | | 0 | +| - p3 | | | |f1 | | 0.3980 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1b4fdd31716963a589ea32146ec079a51ae8228d --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6030 | |0 | +| - p1 | | | |f1 | | 0.6085 | | 0 | +| - p2 | | | |f1 | | 0.5919 | | 0 | +| - p3 | | | |f1 | | 0.6085 | | 0 | +| - re | | | |f1 | | 0.4972 | |0 | +| - p1 | | | |f1 | | 0.4920 | | 0 | +| - p2 | | | |f1 | | 0.5025 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2991b4e7e5703738318d4d20a5d41b03aa8fb72a --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4501 | |0 | +| - p1 | | | |f1 | | 0.4486 | | 0 | +| - p2 | | | |f1 | | 0.4531 | | 0 | +| - p3 | | | |f1 | | 0.4486 | | 0 | +| - re | | | |f1 | | 0.4118 | |0 | +| - p1 | | | |f1 | | 0.4115 | | 0 | +| - p2 | | | |f1 | | 0.4126 | | 0 | +| - p3 | | | |f1 | | 0.4115 | | 0 | diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..e8eb40f0ec1a84778272aaa7d38475c55c5c9b6d --- /dev/null +++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6391 | |0 | +| - p1 | | | |f1 | | 0.6615 | | 0 | +| - p2 | | | |f1 | | 0.5944 | | 0 | +| - p3 | | | |f1 | | 0.6615 | | 0 | +| - re | | | |f1 | | 0.5319 | |0 | +| - p1 | | | |f1 | | 0.5062 | | 0 | +| - p2 | | | |f1 | | 0.5576 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4ef01af5dd78b8e2fb940fa106903fbc0a6bb424 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2877 | |0 | +| - p1 | | | |f1 | | 0.1963 | | 0 | +| - p2 | | | |f1 | | 0.3459 | | 0 | +| - p3 | | | |f1 | | 0.3208 | | 0 | +| - re | | | |f1 | | 0.4430 | |0 | +| - p1 | | | |f1 | | 0.4487 | | 0 | +| - p2 | | | |f1 | | 0.4492 | | 0 | +| - p3 | | | |f1 | | 0.4311 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..23149b9c36e767ce074bb8aaf7a68c7dc2c372d1 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5963 | |0 | +| - p1 | | | |f1 | | 0.6024 | | 0 | +| - p2 | | | |f1 | | 0.5929 | | 0 | +| - p3 | | | |f1 | | 0.5935 | | 0 | +| - re | | | |f1 | | 0.5195 | |0 | +| - p1 | | | |f1 | | 0.5191 | | 0 | +| - p2 | | | |f1 | | 0.5199 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..bfab0d152cb46ac0c16e55f18dbed709ae1bf3b1 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3421 | |0 | +| - p1 | | | |f1 | | 0.3455 | | 0 | +| - p2 | | | |f1 | | 0.3354 | | 0 | +| - p3 | | | |f1 | | 0.3455 | | 0 | +| - re | | | |f1 | | 0.3485 | |0 | +| - p1 | | | |f1 | | 0.2406 | | 0 | +| - p2 | | | |f1 | | 0.3947 | | 0 | +| - p3 | | | |f1 | | 0.4102 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..6c7abe0157fa639540010105a73dbf83d88a949e --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5884 | |0 | +| - p1 | | | |f1 | | 0.5928 | | 0 | +| - p2 | | | |f1 | | 0.5796 | | 0 | +| - p3 | | | |f1 | | 0.5928 | | 0 | +| - re | | | |f1 | | 0.4338 | |0 | +| - p1 | | | |f1 | | 0.4467 | | 0 | +| - p2 | | | |f1 | | 0.4210 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f1126637dc0654e0c02eb034afe79b2134085404 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3220 | |0 | +| - p1 | | | |f1 | | 0.2678 | | 0 | +| - p2 | | | |f1 | | 0.3568 | | 0 | +| - p3 | | | |f1 | | 0.3414 | | 0 | +| - re | | | |f1 | | 0.4452 | |0 | +| - p1 | | | |f1 | | 0.4519 | | 0 | +| - p2 | | | |f1 | | 0.4611 | | 0 | +| - p3 | | | |f1 | | 0.4227 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d08ae36f5f6ac1af02fb2539d068cd315347ce2 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6864 | |0 | +| - p1 | | | |f1 | | 0.6982 | | 0 | +| - p2 | | | |f1 | | 0.6679 | | 0 | +| - p3 | | | |f1 | | 0.6930 | | 0 | +| - re | | | |f1 | | 0.5536 | |0 | +| - p1 | | | |f1 | | 0.5546 | | 0 | +| - p2 | | | |f1 | | 0.5526 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..c364ad8dc8a3694cab13535c8d872a7d8267216f --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3379 | |0 | +| - p1 | | | |f1 | | 0.3204 | | 0 | +| - p2 | | | |f1 | | 0.3728 | | 0 | +| - p3 | | | |f1 | | 0.3204 | | 0 | +| - re | | | |f1 | | 0.4131 | |0 | +| - p1 | | | |f1 | | 0.3983 | | 0 | +| - p2 | | | |f1 | | 0.4327 | | 0 | +| - p3 | | | |f1 | | 0.4083 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2005f21143be43b07c295266c38acb45f2fa6f0f --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6189 | |0 | +| - p1 | | | |f1 | | 0.6214 | | 0 | +| - p2 | | | |f1 | | 0.6140 | | 0 | +| - p3 | | | |f1 | | 0.6214 | | 0 | +| - re | | | |f1 | | 0.4996 | |0 | +| - p1 | | | |f1 | | 0.4863 | | 0 | +| - p2 | | | |f1 | | 0.5129 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2152a7395c9729a83259ff1083694393d220f9df --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2521 | |0 | +| - p1 | | | |f1 | | 0.2829 | | 0 | +| - p2 | | | |f1 | | 0.1905 | | 0 | +| - p3 | | | |f1 | | 0.2829 | | 0 | +| - re | | | |f1 | | 0.3959 | |0 | +| - p1 | | | |f1 | | 0.3893 | | 0 | +| - p2 | | | |f1 | | 0.4091 | | 0 | +| - p3 | | | |f1 | | 0.3893 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..53dc3135f30992f59dba56e73962bb6811c03381 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6302 | |0 | +| - p1 | | | |f1 | | 0.6347 | | 0 | +| - p2 | | | |f1 | | 0.6211 | | 0 | +| - p3 | | | |f1 | | 0.6347 | | 0 | +| - re | | | |f1 | | 0.4625 | |0 | +| - p1 | | | |f1 | | 0.4799 | | 0 | +| - p2 | | | |f1 | | 0.4451 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2748426dd1af84f0045d8336e22925b40cc24078 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2604 | |0 | +| - p1 | | | |f1 | | 0.2810 | | 0 | +| - p2 | | | |f1 | | 0.2192 | | 0 | +| - p3 | | | |f1 | | 0.2810 | | 0 | +| - re | | | |f1 | | 0.4116 | |0 | +| - p1 | | | |f1 | | 0.4116 | | 0 | +| - p2 | | | |f1 | | 0.4115 | | 0 | +| - p3 | | | |f1 | | 0.4116 | | 0 | diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b23c800a9f3a3e4608a30f4ce34c2de4daac3546 --- /dev/null +++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6026 | |0 | +| - p1 | | | |f1 | | 0.6015 | | 0 | +| - p2 | | | |f1 | | 0.6049 | | 0 | +| - p3 | | | |f1 | | 0.6015 | | 0 | +| - re | | | |f1 | | 0.4905 | |0 | +| - p1 | | | |f1 | | 0.5137 | | 0 | +| - p2 | | | |f1 | | 0.4674 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__en__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..78db8a8821fd840feb3329f0360cc1ee8ee09efb --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0612 | |0 | +| - p1 | | | |f1 | | 0.0578 | | 0 | +| - p2 | | | |f1 | | 0.0410 | | 0 | +| - p3 | | | |f1 | | 0.0848 | | 0 | +| - re | | | |f1 | | 0.0313 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0442 | | 0 | +| - p3 | | | |f1 | | 0.0497 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8a24c1cdf686f8f22339741eea15c847a529ff4b --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1245 | |0 | +| - p1 | | | |f1 | | 0.0803 | | 0 | +| - p2 | | | |f1 | | 0.1479 | | 0 | +| - p3 | | | |f1 | | 0.1454 | | 0 | +| - re | | | |f1 | | 0.0707 | |0 | +| - p1 | | | |f1 | | 0.0722 | | 0 | +| - p2 | | | |f1 | | 0.0692 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__gr__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..90f380aafaa9694b769f94a1f4c39bda959769a0 --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2426 | |0 | +| - p1 | | | |f1 | | 0.2417 | | 0 | +| - p2 | | | |f1 | | 0.2443 | | 0 | +| - p3 | | | |f1 | | 0.2417 | | 0 | +| - re | | | |f1 | | 0.0592 | |0 | +| - p1 | | | |f1 | | 0.1556 | | 0 | +| - p2 | | | |f1 | | 0.0161 | | 0 | +| - p3 | | | |f1 | | 0.0058 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fcaa1628b92a900182158ef40a0f79e7f2da7e7 --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | +| - re | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__it__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..477407b0788a1fd7b0424b9bfbe8e8ec2b5027d1 --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0639 | |0 | +| - p1 | | | |f1 | | 0.0773 | | 0 | +| - p2 | | | |f1 | | 0.0612 | | 0 | +| - p3 | | | |f1 | | 0.0531 | | 0 | +| - re | | | |f1 | | 0.1072 | |0 | +| - p1 | | | |f1 | | 0.0020 | | 0 | +| - p2 | | | |f1 | | 0.1929 | | 0 | +| - p3 | | | |f1 | | 0.1268 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..70f1ca5e4a9cd49b89f1cffb9a2514616006cbfc --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3288 | |0 | +| - p1 | | | |f1 | | 0.2991 | | 0 | +| - p2 | | | |f1 | | 0.3563 | | 0 | +| - p3 | | | |f1 | | 0.3311 | | 0 | +| - re | | | |f1 | | 0.0859 | |0 | +| - p1 | | | |f1 | | 0.0832 | | 0 | +| - p2 | | | |f1 | | 0.0887 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__pl__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..fc17086240d17d361a344298c66abb3a31227ff9 --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1161 | |0 | +| - p1 | | | |f1 | | 0.1140 | | 0 | +| - p2 | | | |f1 | | 0.1203 | | 0 | +| - p3 | | | |f1 | | 0.1140 | | 0 | +| - re | | | |f1 | | 0.0025 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0076 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b49704f897b67e8bbfbfda2d599a9cea322c0e6d --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3222 | |0 | +| - p1 | | | |f1 | | 0.3184 | | 0 | +| - p2 | | | |f1 | | 0.3297 | | 0 | +| - p3 | | | |f1 | | 0.3184 | | 0 | +| - re | | | |f1 | | 0.0497 | |0 | +| - p1 | | | |f1 | | 0.0533 | | 0 | +| - p2 | | | |f1 | | 0.0461 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sk__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2992e79edb39887634ef426287d1a8de4142fb50 --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0778 | |0 | +| - p1 | | | |f1 | | 0.0874 | | 0 | +| - p2 | | | |f1 | | 0.0586 | | 0 | +| - p3 | | | |f1 | | 0.0874 | | 0 | +| - re | | | |f1 | | 0.0034 | |0 | +| - p1 | | | |f1 | | 0.0036 | | 0 | +| - p2 | | | |f1 | | 0.0031 | | 0 | +| - p3 | | | |f1 | | 0.0036 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..28126a48bf5fc2bcc223674fae2771730fdbfb54 --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2993 | |0 | +| - p1 | | | |f1 | | 0.3004 | | 0 | +| - p2 | | | |f1 | | 0.2970 | | 0 | +| - p3 | | | |f1 | | 0.3004 | | 0 | +| - re | | | |f1 | | 0.0419 | |0 | +| - p1 | | | |f1 | | 0.0445 | | 0 | +| - p2 | | | |f1 | | 0.0393 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sl__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e6829ac6b5697fbe26df6ba9884b9378fb79fca --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0951 | |0 | +| - p1 | | | |f1 | | 0.1197 | | 0 | +| - p2 | | | |f1 | | 0.0460 | | 0 | +| - p3 | | | |f1 | | 0.1197 | | 0 | +| - re | | | |f1 | | 0.0445 | |0 | +| - p1 | | | |f1 | | 0.0598 | | 0 | +| - p2 | | | |f1 | | 0.0137 | | 0 | +| - p3 | | | |f1 | | 0.0598 | | 0 | diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..48a1c3857e71b30d72bff58a732d669cae455200 --- /dev/null +++ b/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3052 | |0 | +| - p1 | | | |f1 | | 0.3119 | | 0 | +| - p2 | | | |f1 | | 0.2916 | | 0 | +| - p3 | | | |f1 | | 0.3119 | | 0 | +| - re | | | |f1 | | 0.0489 | |0 | +| - p1 | | | |f1 | | 0.0477 | | 0 | +| - p2 | | | |f1 | | 0.0501 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__en__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..e19636f5cb99384931c747797c8af46f51f3d6db --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4603 | |0 | +| - p1 | | | |f1 | | 0.3267 | | 0 | +| - p2 | | | |f1 | | 0.5174 | | 0 | +| - p3 | | | |f1 | | 0.5370 | | 0 | +| - re | | | |f1 | | 0.4211 | |0 | +| - p1 | | | |f1 | | 0.4360 | | 0 | +| - p2 | | | |f1 | | 0.4205 | | 0 | +| - p3 | | | |f1 | | 0.4067 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7ec0a5068d2d9de19d471da673153ba727f16b87 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5919 | |0 | +| - p1 | | | |f1 | | 0.6200 | | 0 | +| - p2 | | | |f1 | | 0.5639 | | 0 | +| - p3 | | | |f1 | | 0.5918 | | 0 | +| - re | | | |f1 | | 0.5250 | |0 | +| - p1 | | | |f1 | | 0.5163 | | 0 | +| - p2 | | | |f1 | | 0.5337 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__gr__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..048dfca18dd05e8044de8048244a8584f4827f55 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5292 | |0 | +| - p1 | | | |f1 | | 0.5549 | | 0 | +| - p2 | | | |f1 | | 0.4777 | | 0 | +| - p3 | | | |f1 | | 0.5549 | | 0 | +| - re | | | |f1 | | 0.4008 | |0 | +| - p1 | | | |f1 | | 0.4124 | | 0 | +| - p2 | | | |f1 | | 0.3957 | | 0 | +| - p3 | | | |f1 | | 0.3943 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..acda7e64b8db71bd62f4747ef0e6e75ceba2fda9 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5943 | |0 | +| - p1 | | | |f1 | | 0.6083 | | 0 | +| - p2 | | | |f1 | | 0.5663 | | 0 | +| - p3 | | | |f1 | | 0.6083 | | 0 | +| - re | | | |f1 | | 0.5020 | |0 | +| - p1 | | | |f1 | | 0.5070 | | 0 | +| - p2 | | | |f1 | | 0.4971 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__it__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..257bab96b1cdba6c3e1750389c0becb81478fd17 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6158 | |0 | +| - p1 | | | |f1 | | 0.5739 | | 0 | +| - p2 | | | |f1 | | 0.6524 | | 0 | +| - p3 | | | |f1 | | 0.6210 | | 0 | +| - re | | | |f1 | | 0.4298 | |0 | +| - p1 | | | |f1 | | 0.4585 | | 0 | +| - p2 | | | |f1 | | 0.4113 | | 0 | +| - p3 | | | |f1 | | 0.4196 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0632325531b1375a8db66e753ac57fe9f67aa6c3 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6707 | |0 | +| - p1 | | | |f1 | | 0.6910 | | 0 | +| - p2 | | | |f1 | | 0.6643 | | 0 | +| - p3 | | | |f1 | | 0.6569 | | 0 | +| - re | | | |f1 | | 0.5162 | |0 | +| - p1 | | | |f1 | | 0.4958 | | 0 | +| - p2 | | | |f1 | | 0.5365 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__pl__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0dd544b253e5cfec98214be12bd48f0ad373669 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4092 | |0 | +| - p1 | | | |f1 | | 0.4060 | | 0 | +| - p2 | | | |f1 | | 0.4155 | | 0 | +| - p3 | | | |f1 | | 0.4060 | | 0 | +| - re | | | |f1 | | 0.3891 | |0 | +| - p1 | | | |f1 | | 0.3674 | | 0 | +| - p2 | | | |f1 | | 0.4271 | | 0 | +| - p3 | | | |f1 | | 0.3729 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..dce8e217433d79bf5a23a3bbbfc88de644f5db4d --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5893 | |0 | +| - p1 | | | |f1 | | 0.5908 | | 0 | +| - p2 | | | |f1 | | 0.5862 | | 0 | +| - p3 | | | |f1 | | 0.5908 | | 0 | +| - re | | | |f1 | | 0.4988 | |0 | +| - p1 | | | |f1 | | 0.5168 | | 0 | +| - p2 | | | |f1 | | 0.4808 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__sk__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..fd0dcda59f3d4db2ccb3c171b9673cc8777a7dbd --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4775 | |0 | +| - p1 | | | |f1 | | 0.4875 | | 0 | +| - p2 | | | |f1 | | 0.4575 | | 0 | +| - p3 | | | |f1 | | 0.4875 | | 0 | +| - re | | | |f1 | | 0.4106 | |0 | +| - p1 | | | |f1 | | 0.3989 | | 0 | +| - p2 | | | |f1 | | 0.4340 | | 0 | +| - p3 | | | |f1 | | 0.3989 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..d861112dbb48892d26a8c978c99e01b89e5eeef3 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6135 | |0 | +| - p1 | | | |f1 | | 0.6141 | | 0 | +| - p2 | | | |f1 | | 0.6122 | | 0 | +| - p3 | | | |f1 | | 0.6141 | | 0 | +| - re | | | |f1 | | 0.4953 | |0 | +| - p1 | | | |f1 | | 0.5153 | | 0 | +| - p2 | | | |f1 | | 0.4754 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__sl__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7dbedbb5cba1e8cbcff506c960d746ce4fd56b4d --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4487 | |0 | +| - p1 | | | |f1 | | 0.4707 | | 0 | +| - p2 | | | |f1 | | 0.4046 | | 0 | +| - p3 | | | |f1 | | 0.4707 | | 0 | +| - re | | | |f1 | | 0.4058 | |0 | +| - p1 | | | |f1 | | 0.4079 | | 0 | +| - p2 | | | |f1 | | 0.4016 | | 0 | +| - p3 | | | |f1 | | 0.4079 | | 0 | diff --git a/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4971239c4c88c5c27f915bb8144431389bc77bd6 --- /dev/null +++ b/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6156 | |0 | +| - p1 | | | |f1 | | 0.6365 | | 0 | +| - p2 | | | |f1 | | 0.5737 | | 0 | +| - p3 | | | |f1 | | 0.6365 | | 0 | +| - re | | | |f1 | | 0.4839 | |0 | +| - p1 | | | |f1 | | 0.4801 | | 0 | +| - p2 | | | |f1 | | 0.4878 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__en__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..31b15abdd5c41472d9458080c7abcad9d60923a0 --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5490 | |0 | +| - p1 | | | |f1 | | 0.5446 | | 0 | +| - p2 | | | |f1 | | 0.5830 | | 0 | +| - p3 | | | |f1 | | 0.5194 | | 0 | +| - re | | | |f1 | | 0.4623 | |0 | +| - p1 | | | |f1 | | 0.4543 | | 0 | +| - p2 | | | |f1 | | 0.4582 | | 0 | +| - p3 | | | |f1 | | 0.4743 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4c211585d3bad9b76f69797b829c4ce01b1d4eef --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6187 | |0 | +| - p1 | | | |f1 | | 0.6160 | | 0 | +| - p2 | | | |f1 | | 0.6308 | | 0 | +| - p3 | | | |f1 | | 0.6094 | | 0 | +| - re | | | |f1 | | 0.5395 | |0 | +| - p1 | | | |f1 | | 0.5191 | | 0 | +| - p2 | | | |f1 | | 0.5600 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__gr__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..85b855c2ce31543c10a48d92f353901284140c68 --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5151 | |0 | +| - p1 | | | |f1 | | 0.4866 | | 0 | +| - p2 | | | |f1 | | 0.5721 | | 0 | +| - p3 | | | |f1 | | 0.4866 | | 0 | +| - re | | | |f1 | | 0.4473 | |0 | +| - p1 | | | |f1 | | 0.3955 | | 0 | +| - p2 | | | |f1 | | 0.4695 | | 0 | +| - p3 | | | |f1 | | 0.4769 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f9352d7ebc748c3d31505626a53cbd79984d0345 --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6570 | |0 | +| - p1 | | | |f1 | | 0.6551 | | 0 | +| - p2 | | | |f1 | | 0.6608 | | 0 | +| - p3 | | | |f1 | | 0.6551 | | 0 | +| - re | | | |f1 | | 0.5316 | |0 | +| - p1 | | | |f1 | | 0.5083 | | 0 | +| - p2 | | | |f1 | | 0.5550 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__it__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1d27d823cf04238dd6448c8d887746a77c94522c --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6065 | |0 | +| - p1 | | | |f1 | | 0.5543 | | 0 | +| - p2 | | | |f1 | | 0.6697 | | 0 | +| - p3 | | | |f1 | | 0.5954 | | 0 | +| - re | | | |f1 | | 0.4737 | |0 | +| - p1 | | | |f1 | | 0.4390 | | 0 | +| - p2 | | | |f1 | | 0.4895 | | 0 | +| - p3 | | | |f1 | | 0.4927 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..324ead0872b04bf57f22a2b8eb4d48f1a28a2d29 --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.7115 | |0 | +| - p1 | | | |f1 | | 0.7142 | | 0 | +| - p2 | | | |f1 | | 0.6992 | | 0 | +| - p3 | | | |f1 | | 0.7212 | | 0 | +| - re | | | |f1 | | 0.5530 | |0 | +| - p1 | | | |f1 | | 0.5223 | | 0 | +| - p2 | | | |f1 | | 0.5837 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__pl__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b3aad8ed444ad79e33ecc050259e21001bc221a --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4508 | |0 | +| - p1 | | | |f1 | | 0.4506 | | 0 | +| - p2 | | | |f1 | | 0.4511 | | 0 | +| - p3 | | | |f1 | | 0.4506 | | 0 | +| - re | | | |f1 | | 0.4307 | |0 | +| - p1 | | | |f1 | | 0.4384 | | 0 | +| - p2 | | | |f1 | | 0.4267 | | 0 | +| - p3 | | | |f1 | | 0.4271 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..ba2ab175a07a602745838e903e5784c52ee2778d --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6618 | |0 | +| - p1 | | | |f1 | | 0.6591 | | 0 | +| - p2 | | | |f1 | | 0.6672 | | 0 | +| - p3 | | | |f1 | | 0.6591 | | 0 | +| - re | | | |f1 | | 0.5698 | |0 | +| - p1 | | | |f1 | | 0.5795 | | 0 | +| - p2 | | | |f1 | | 0.5601 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__sk__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..6f23d39956ebbb09bdb54019cc382d647d59358d --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2841 | |0 | +| - p1 | | | |f1 | | 0.3183 | | 0 | +| - p2 | | | |f1 | | 0.2157 | | 0 | +| - p3 | | | |f1 | | 0.3183 | | 0 | +| - re | | | |f1 | | 0.4369 | |0 | +| - p1 | | | |f1 | | 0.4373 | | 0 | +| - p2 | | | |f1 | | 0.4360 | | 0 | +| - p3 | | | |f1 | | 0.4373 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..fbb70a69ac24c59928a6403337fe39bb680e3e3c --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6786 | |0 | +| - p1 | | | |f1 | | 0.6737 | | 0 | +| - p2 | | | |f1 | | 0.6885 | | 0 | +| - p3 | | | |f1 | | 0.6737 | | 0 | +| - re | | | |f1 | | 0.5091 | |0 | +| - p1 | | | |f1 | | 0.5121 | | 0 | +| - p2 | | | |f1 | | 0.5061 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__sl__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5373e7bf56c7d6beb16eb1374952f5e2c4b6499 --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4508 | |0 | +| - p1 | | | |f1 | | 0.4370 | | 0 | +| - p2 | | | |f1 | | 0.4783 | | 0 | +| - p3 | | | |f1 | | 0.4370 | | 0 | +| - re | | | |f1 | | 0.4301 | |0 | +| - p1 | | | |f1 | | 0.4255 | | 0 | +| - p2 | | | |f1 | | 0.4391 | | 0 | +| - p3 | | | |f1 | | 0.4255 | | 0 | diff --git a/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..115771abd5af83727d0b2fe790099678b144b94e --- /dev/null +++ b/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6806 | |0 | +| - p1 | | | |f1 | | 0.6750 | | 0 | +| - p2 | | | |f1 | | 0.6918 | | 0 | +| - p3 | | | |f1 | | 0.6750 | | 0 | +| - re | | | |f1 | | 0.4926 | |0 | +| - p1 | | | |f1 | | 0.5149 | | 0 | +| - p2 | | | |f1 | | 0.4703 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__en__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1b2577292e61c9a10baa8c2540fd0a459d8c6329 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5011 | |0 | +| - p1 | | | |f1 | | 0.3842 | | 0 | +| - p2 | | | |f1 | | 0.6035 | | 0 | +| - p3 | | | |f1 | | 0.5156 | | 0 | +| - re | | | |f1 | | 0.4681 | |0 | +| - p1 | | | |f1 | | 0.4836 | | 0 | +| - p2 | | | |f1 | | 0.4763 | | 0 | +| - p3 | | | |f1 | | 0.4443 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7623ca72ca820866eb81231f86b3d548626a8d19 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6324 | |0 | +| - p1 | | | |f1 | | 0.6355 | | 0 | +| - p2 | | | |f1 | | 0.6161 | | 0 | +| - p3 | | | |f1 | | 0.6455 | | 0 | +| - re | | | |f1 | | 0.5528 | |0 | +| - p1 | | | |f1 | | 0.5562 | | 0 | +| - p2 | | | |f1 | | 0.5494 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__gr__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..72927e1065723bce9e442c8bb5d582ff904d0b5d --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5585 | |0 | +| - p1 | | | |f1 | | 0.5314 | | 0 | +| - p2 | | | |f1 | | 0.6126 | | 0 | +| - p3 | | | |f1 | | 0.5314 | | 0 | +| - re | | | |f1 | | 0.4199 | |0 | +| - p1 | | | |f1 | | 0.4069 | | 0 | +| - p2 | | | |f1 | | 0.4332 | | 0 | +| - p3 | | | |f1 | | 0.4197 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..6692e6a059fce094e12834da11ea94e21e453ead --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6839 | |0 | +| - p1 | | | |f1 | | 0.6836 | | 0 | +| - p2 | | | |f1 | | 0.6846 | | 0 | +| - p3 | | | |f1 | | 0.6836 | | 0 | +| - re | | | |f1 | | 0.5629 | |0 | +| - p1 | | | |f1 | | 0.5392 | | 0 | +| - p2 | | | |f1 | | 0.5867 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__it__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..5717be3b30aac245a3a2a71d19b763a9420bd321 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5351 | |0 | +| - p1 | | | |f1 | | 0.4261 | | 0 | +| - p2 | | | |f1 | | 0.6212 | | 0 | +| - p3 | | | |f1 | | 0.5582 | | 0 | +| - re | | | |f1 | | 0.4521 | |0 | +| - p1 | | | |f1 | | 0.4042 | | 0 | +| - p2 | | | |f1 | | 0.4916 | | 0 | +| - p3 | | | |f1 | | 0.4604 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f97518ce47c0c838a28aaedac0ae202b9f93d543 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt @@ -0,0 +1,9 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.7133 | |0 | +| - p1 | | | |f1 | | 0.7262 | | 0 | +| - p2 | | | |f1 | | 0.7005 | | 0 | +| - re | | | |f1 | | 0.6077 | |0 | +| - p1 | | | |f1 | | 0.5919 | | 0 | +| - p2 | | | |f1 | | 0.6235 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__pl__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..96a3e0f389382f42c12871e3aaec5214e7b81ca6 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4245 | |0 | +| - p1 | | | |f1 | | 0.4216 | | 0 | +| - p2 | | | |f1 | | 0.4303 | | 0 | +| - p3 | | | |f1 | | 0.4216 | | 0 | +| - re | | | |f1 | | 0.4332 | |0 | +| - p1 | | | |f1 | | 0.4325 | | 0 | +| - p2 | | | |f1 | | 0.4424 | | 0 | +| - p3 | | | |f1 | | 0.4246 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..841307e4d812da81a7fbca9681489aa1dd6b9ffd --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6791 | |0 | +| - p1 | | | |f1 | | 0.6829 | | 0 | +| - p2 | | | |f1 | | 0.6715 | | 0 | +| - p3 | | | |f1 | | 0.6829 | | 0 | +| - re | | | |f1 | | 0.6036 | |0 | +| - p1 | | | |f1 | | 0.5940 | | 0 | +| - p2 | | | |f1 | | 0.6133 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sk__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a68cf5152499faafd28f303a0956f103869d574 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2336 | |0 | +| - p1 | | | |f1 | | 0.2971 | | 0 | +| - p2 | | | |f1 | | 0.1066 | | 0 | +| - p3 | | | |f1 | | 0.2971 | | 0 | +| - re | | | |f1 | | 0.4440 | |0 | +| - p1 | | | |f1 | | 0.4395 | | 0 | +| - p2 | | | |f1 | | 0.4531 | | 0 | +| - p3 | | | |f1 | | 0.4395 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..db98f396eb2efd3f8a738328dccd5f86e85a761d --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.7137 | |0 | +| - p1 | | | |f1 | | 0.7143 | | 0 | +| - p2 | | | |f1 | | 0.7127 | | 0 | +| - p3 | | | |f1 | | 0.7143 | | 0 | +| - re | | | |f1 | | 0.5149 | |0 | +| - p1 | | | |f1 | | 0.5111 | | 0 | +| - p2 | | | |f1 | | 0.5188 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sl__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..763eee8744af6f1d60a97ef9a00cbb098a837cf5 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4863 | |0 | +| - p1 | | | |f1 | | 0.4675 | | 0 | +| - p2 | | | |f1 | | 0.5238 | | 0 | +| - p3 | | | |f1 | | 0.4675 | | 0 | +| - re | | | |f1 | | 0.4201 | |0 | +| - p1 | | | |f1 | | 0.4182 | | 0 | +| - p2 | | | |f1 | | 0.4239 | | 0 | +| - p3 | | | |f1 | | 0.4182 | | 0 | diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8d78f14f5a8ee72f5800cb860ae7867ef2121ce5 --- /dev/null +++ b/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6887 | |0 | +| - p1 | | | |f1 | | 0.6947 | | 0 | +| - p2 | | | |f1 | | 0.6765 | | 0 | +| - p3 | | | |f1 | | 0.6947 | | 0 | +| - re | | | |f1 | | 0.5457 | |0 | +| - p1 | | | |f1 | | 0.5323 | | 0 | +| - p2 | | | |f1 | | 0.5590 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__en__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c6bf6c8db8f6569c5b34f55ef22ac9906fb386f --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2625 | |0 | +| - p1 | | | |f1 | | 0.2635 | | 0 | +| - p2 | | | |f1 | | 0.2503 | | 0 | +| - p3 | | | |f1 | | 0.2737 | | 0 | +| - re | | | |f1 | | 0.2851 | |0 | +| - p1 | | | |f1 | | 0.2095 | | 0 | +| - p2 | | | |f1 | | 0.3257 | | 0 | +| - p3 | | | |f1 | | 0.3203 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7eb28cd4a6646d5d77b90a4ccdc39516251d0583 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4930 | |0 | +| - p1 | | | |f1 | | 0.4833 | | 0 | +| - p2 | | | |f1 | | 0.5005 | | 0 | +| - p3 | | | |f1 | | 0.4951 | | 0 | +| - re | | | |f1 | | 0.1101 | |0 | +| - p1 | | | |f1 | | 0.0964 | | 0 | +| - p2 | | | |f1 | | 0.1237 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__gr__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..88b8e821fdc2e1a20bcb011ab95ccf2024f44b33 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2688 | |0 | +| - p1 | | | |f1 | | 0.2705 | | 0 | +| - p2 | | | |f1 | | 0.2654 | | 0 | +| - p3 | | | |f1 | | 0.2705 | | 0 | +| - re | | | |f1 | | 0.2053 | |0 | +| - p1 | | | |f1 | | 0.2381 | | 0 | +| - p2 | | | |f1 | | 0.3024 | | 0 | +| - p3 | | | |f1 | | 0.0754 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..754c292e1e0a45a63de235cd27170c12a2da7975 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4953 | |0 | +| - p1 | | | |f1 | | 0.4910 | | 0 | +| - p2 | | | |f1 | | 0.5039 | | 0 | +| - p3 | | | |f1 | | 0.4910 | | 0 | +| - re | | | |f1 | | 0.1404 | |0 | +| - p1 | | | |f1 | | 0.1204 | | 0 | +| - p2 | | | |f1 | | 0.1605 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__it__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..1146e596eccebec747d71d3d9f1b5e42d75cb535 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2929 | |0 | +| - p1 | | | |f1 | | 0.3157 | | 0 | +| - p2 | | | |f1 | | 0.2627 | | 0 | +| - p3 | | | |f1 | | 0.3004 | | 0 | +| - re | | | |f1 | | 0.1767 | |0 | +| - p1 | | | |f1 | | 0.2154 | | 0 | +| - p2 | | | |f1 | | 0.2461 | | 0 | +| - p3 | | | |f1 | | 0.0688 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..d07b5dd61e8606d9f7f3eda226ef534b3aee0982 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5454 | |0 | +| - p1 | | | |f1 | | 0.5633 | | 0 | +| - p2 | | | |f1 | | 0.5377 | | 0 | +| - p3 | | | |f1 | | 0.5352 | | 0 | +| - re | | | |f1 | | 0.1754 | |0 | +| - p1 | | | |f1 | | 0.1592 | | 0 | +| - p2 | | | |f1 | | 0.1917 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__pl__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..a27658a2356685999114b0b67b59a745c9f24872 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2231 | |0 | +| - p1 | | | |f1 | | 0.2255 | | 0 | +| - p2 | | | |f1 | | 0.2183 | | 0 | +| - p3 | | | |f1 | | 0.2255 | | 0 | +| - re | | | |f1 | | 0.1173 | |0 | +| - p1 | | | |f1 | | 0.1150 | | 0 | +| - p2 | | | |f1 | | 0.1314 | | 0 | +| - p3 | | | |f1 | | 0.1054 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..00950db0ea757d5870850f97f2086d3650e46db0 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5193 | |0 | +| - p1 | | | |f1 | | 0.5186 | | 0 | +| - p2 | | | |f1 | | 0.5206 | | 0 | +| - p3 | | | |f1 | | 0.5186 | | 0 | +| - re | | | |f1 | | 0.1084 | |0 | +| - p1 | | | |f1 | | 0.1171 | | 0 | +| - p2 | | | |f1 | | 0.0997 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__sk__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..239962b493c557ae41d417f924fa550434337b38 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2427 | |0 | +| - p1 | | | |f1 | | 0.2447 | | 0 | +| - p2 | | | |f1 | | 0.2387 | | 0 | +| - p3 | | | |f1 | | 0.2447 | | 0 | +| - re | | | |f1 | | 0.1212 | |0 | +| - p1 | | | |f1 | | 0.1119 | | 0 | +| - p2 | | | |f1 | | 0.1399 | | 0 | +| - p3 | | | |f1 | | 0.1119 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3d195a3ff62fe98fb857bcfefa6a0131f83fcad --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4654 | |0 | +| - p1 | | | |f1 | | 0.4756 | | 0 | +| - p2 | | | |f1 | | 0.4449 | | 0 | +| - p3 | | | |f1 | | 0.4756 | | 0 | +| - re | | | |f1 | | 0.1052 | |0 | +| - p1 | | | |f1 | | 0.1095 | | 0 | +| - p2 | | | |f1 | | 0.1009 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__sl__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a697f8a782fb9367aaef7232d34dddd2d425cf3 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2569 | |0 | +| - p1 | | | |f1 | | 0.2574 | | 0 | +| - p2 | | | |f1 | | 0.2558 | | 0 | +| - p3 | | | |f1 | | 0.2574 | | 0 | +| - re | | | |f1 | | 0.1012 | |0 | +| - p1 | | | |f1 | | 0.0973 | | 0 | +| - p2 | | | |f1 | | 0.1089 | | 0 | +| - p3 | | | |f1 | | 0.0973 | | 0 | diff --git a/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..e1f5609bbc5bd493634499b48a23116288685483 --- /dev/null +++ b/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5063 | |0 | +| - p1 | | | |f1 | | 0.5117 | | 0 | +| - p2 | | | |f1 | | 0.4955 | | 0 | +| - p3 | | | |f1 | | 0.5117 | | 0 | +| - re | | | |f1 | | 0.1139 | |0 | +| - p1 | | | |f1 | | 0.1178 | | 0 | +| - p2 | | | |f1 | | 0.1101 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__en__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..013cdc87b441c877106718774718193dafef8e47 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2786 | |0 | +| - p1 | | | |f1 | | 0.2502 | | 0 | +| - p2 | | | |f1 | | 0.3089 | | 0 | +| - p3 | | | |f1 | | 0.2768 | | 0 | +| - re | | | |f1 | | 0.3248 | |0 | +| - p1 | | | |f1 | | 0.2274 | | 0 | +| - p2 | | | |f1 | | 0.3929 | | 0 | +| - p3 | | | |f1 | | 0.3542 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..08a27fe55fd3552bcd5cab21470309efc0c83420 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5008 | |0 | +| - p1 | | | |f1 | | 0.5009 | | 0 | +| - p2 | | | |f1 | | 0.4966 | | 0 | +| - p3 | | | |f1 | | 0.5049 | | 0 | +| - re | | | |f1 | | 0.1135 | |0 | +| - p1 | | | |f1 | | 0.1175 | | 0 | +| - p2 | | | |f1 | | 0.1095 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__gr__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..31ecf07da62ee131ebf0e5e5ebff458a367e510e --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1717 | |0 | +| - p1 | | | |f1 | | 0.1641 | | 0 | +| - p2 | | | |f1 | | 0.1869 | | 0 | +| - p3 | | | |f1 | | 0.1641 | | 0 | +| - re | | | |f1 | | 0.0977 | |0 | +| - p1 | | | |f1 | | 0.0736 | | 0 | +| - p2 | | | |f1 | | 0.0778 | | 0 | +| - p3 | | | |f1 | | 0.1418 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e67d7603de9b38880b82f8683d0585a4840fa8f --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3384 | |0 | +| - p1 | | | |f1 | | 0.3375 | | 0 | +| - p2 | | | |f1 | | 0.3403 | | 0 | +| - p3 | | | |f1 | | 0.3375 | | 0 | +| - re | | | |f1 | | 0.0554 | |0 | +| - p1 | | | |f1 | | 0.0427 | | 0 | +| - p2 | | | |f1 | | 0.0681 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__it__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7e91df597747586979868f6704d661fa4003ed78 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3307 | |0 | +| - p1 | | | |f1 | | 0.3397 | | 0 | +| - p2 | | | |f1 | | 0.3300 | | 0 | +| - p3 | | | |f1 | | 0.3226 | | 0 | +| - re | | | |f1 | | 0.0792 | |0 | +| - p1 | | | |f1 | | 0.1489 | | 0 | +| - p2 | | | |f1 | | 0.0736 | | 0 | +| - p3 | | | |f1 | | 0.0149 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..121f203a01339f5e925b9737f899607005ea9600 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5257 | |0 | +| - p1 | | | |f1 | | 0.5195 | | 0 | +| - p2 | | | |f1 | | 0.5301 | | 0 | +| - p3 | | | |f1 | | 0.5275 | | 0 | +| - re | | | |f1 | | 0.1537 | |0 | +| - p1 | | | |f1 | | 0.2114 | | 0 | +| - p2 | | | |f1 | | 0.0961 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__pl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..422658378f9a5071d891cc05d71f0b63c57792a5 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2831 | |0 | +| - p1 | | | |f1 | | 0.2815 | | 0 | +| - p2 | | | |f1 | | 0.2861 | | 0 | +| - p3 | | | |f1 | | 0.2815 | | 0 | +| - re | | | |f1 | | 0.2693 | |0 | +| - p1 | | | |f1 | | 0.2109 | | 0 | +| - p2 | | | |f1 | | 0.2908 | | 0 | +| - p3 | | | |f1 | | 0.3061 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4d12a5ca302c46250b4364b11751aa53774d14c --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3986 | |0 | +| - p1 | | | |f1 | | 0.3913 | | 0 | +| - p2 | | | |f1 | | 0.4132 | | 0 | +| - p3 | | | |f1 | | 0.3913 | | 0 | +| - re | | | |f1 | | 0.1231 | |0 | +| - p1 | | | |f1 | | 0.1255 | | 0 | +| - p2 | | | |f1 | | 0.1207 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sk__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..5b9076bc8bc73843bc160a1befda68db75117943 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2710 | |0 | +| - p1 | | | |f1 | | 0.2571 | | 0 | +| - p2 | | | |f1 | | 0.2987 | | 0 | +| - p3 | | | |f1 | | 0.2571 | | 0 | +| - re | | | |f1 | | 0.1062 | |0 | +| - p1 | | | |f1 | | 0.1554 | | 0 | +| - p2 | | | |f1 | | 0.0077 | | 0 | +| - p3 | | | |f1 | | 0.1554 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b57188ffe0c2040715c0e6497ab2516ac8670233 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4025 | |0 | +| - p1 | | | |f1 | | 0.4106 | | 0 | +| - p2 | | | |f1 | | 0.3861 | | 0 | +| - p3 | | | |f1 | | 0.4106 | | 0 | +| - re | | | |f1 | | 0.0557 | |0 | +| - p1 | | | |f1 | | 0.0509 | | 0 | +| - p2 | | | |f1 | | 0.0606 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..d887894954db8e785d54f3102955aca48e0af8d0 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2892 | |0 | +| - p1 | | | |f1 | | 0.2998 | | 0 | +| - p2 | | | |f1 | | 0.2680 | | 0 | +| - p3 | | | |f1 | | 0.2998 | | 0 | +| - re | | | |f1 | | 0.0304 | |0 | +| - p1 | | | |f1 | | 0.0395 | | 0 | +| - p2 | | | |f1 | | 0.0121 | | 0 | +| - p3 | | | |f1 | | 0.0395 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..a2b1dd7d9912197af51948b750974c585ef723b4 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4021 | |0 | +| - p1 | | | |f1 | | 0.4036 | | 0 | +| - p2 | | | |f1 | | 0.3990 | | 0 | +| - p3 | | | |f1 | | 0.4036 | | 0 | +| - re | | | |f1 | | 0.0752 | |0 | +| - p1 | | | |f1 | | 0.0829 | | 0 | +| - p2 | | | |f1 | | 0.0674 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__en__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b0a1fea4cfc488be5c9595119965c6b6f0736525 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1598 | |0 | +| - p1 | | | |f1 | | 0.0761 | | 0 | +| - p2 | | | |f1 | | 0.2410 | | 0 | +| - p3 | | | |f1 | | 0.1625 | | 0 | +| - re | | | |f1 | | 0.2982 | |0 | +| - p1 | | | |f1 | | 0.1135 | | 0 | +| - p2 | | | |f1 | | 0.4006 | | 0 | +| - p3 | | | |f1 | | 0.3804 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..75d3fa27c7ce99c685520899f83d5c67397a8c6c --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5216 | |0 | +| - p1 | | | |f1 | | 0.5357 | | 0 | +| - p2 | | | |f1 | | 0.5227 | | 0 | +| - p3 | | | |f1 | | 0.5063 | | 0 | +| - re | | | |f1 | | 0.1660 | |0 | +| - p1 | | | |f1 | | 0.1432 | | 0 | +| - p2 | | | |f1 | | 0.1888 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__gr__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..23f202e0c23d9690d048b28b3974736f3a0b7bde --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1159 | |0 | +| - p1 | | | |f1 | | 0.1294 | | 0 | +| - p2 | | | |f1 | | 0.0890 | | 0 | +| - p3 | | | |f1 | | 0.1294 | | 0 | +| - re | | | |f1 | | 0.1184 | |0 | +| - p1 | | | |f1 | | 0.0962 | | 0 | +| - p2 | | | |f1 | | 0.0673 | | 0 | +| - p3 | | | |f1 | | 0.1916 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..9e2ead33340d5e2a561d0e7037780afe79f6b98e --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2881 | |0 | +| - p1 | | | |f1 | | 0.2822 | | 0 | +| - p2 | | | |f1 | | 0.2999 | | 0 | +| - p3 | | | |f1 | | 0.2822 | | 0 | +| - re | | | |f1 | | 0.0625 | |0 | +| - p1 | | | |f1 | | 0.0576 | | 0 | +| - p2 | | | |f1 | | 0.0674 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__it__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..ca19f82340898a6e6cee1bcb3f62d2ab4307d0a8 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2023 | |0 | +| - p1 | | | |f1 | | 0.0867 | | 0 | +| - p2 | | | |f1 | | 0.2484 | | 0 | +| - p3 | | | |f1 | | 0.2717 | | 0 | +| - re | | | |f1 | | 0.2623 | |0 | +| - p1 | | | |f1 | | 0.1712 | | 0 | +| - p2 | | | |f1 | | 0.2896 | | 0 | +| - p3 | | | |f1 | | 0.3261 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..87aadc06deaedb8c71f2e5ddde03e98261c851ca --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5715 | |0 | +| - p1 | | | |f1 | | 0.5729 | | 0 | +| - p2 | | | |f1 | | 0.5627 | | 0 | +| - p3 | | | |f1 | | 0.5790 | | 0 | +| - re | | | |f1 | | 0.2590 | |0 | +| - p1 | | | |f1 | | 0.2873 | | 0 | +| - p2 | | | |f1 | | 0.2307 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__pl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..aebf1a17c31f3208d7ad2f0dda353780d5fd624f --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1567 | |0 | +| - p1 | | | |f1 | | 0.1510 | | 0 | +| - p2 | | | |f1 | | 0.1680 | | 0 | +| - p3 | | | |f1 | | 0.1510 | | 0 | +| - re | | | |f1 | | 0.2881 | |0 | +| - p1 | | | |f1 | | 0.2683 | | 0 | +| - p2 | | | |f1 | | 0.3126 | | 0 | +| - p3 | | | |f1 | | 0.2832 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee60b957d0e7fb3d63ffcd076242aeb0c2e87cf9 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4447 | |0 | +| - p1 | | | |f1 | | 0.4417 | | 0 | +| - p2 | | | |f1 | | 0.4506 | | 0 | +| - p3 | | | |f1 | | 0.4417 | | 0 | +| - re | | | |f1 | | 0.2105 | |0 | +| - p1 | | | |f1 | | 0.1525 | | 0 | +| - p2 | | | |f1 | | 0.2686 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sk__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..5974f7286d4850f71ec33abd4a49a15c85c1d6a8 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1788 | |0 | +| - p1 | | | |f1 | | 0.1641 | | 0 | +| - p2 | | | |f1 | | 0.2081 | | 0 | +| - p3 | | | |f1 | | 0.1641 | | 0 | +| - re | | | |f1 | | 0.1221 | |0 | +| - p1 | | | |f1 | | 0.1776 | | 0 | +| - p2 | | | |f1 | | 0.0112 | | 0 | +| - p3 | | | |f1 | | 0.1776 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..67861309c2309406060c621903e95b4d4d36b9c4 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4226 | |0 | +| - p1 | | | |f1 | | 0.4327 | | 0 | +| - p2 | | | |f1 | | 0.4023 | | 0 | +| - p3 | | | |f1 | | 0.4327 | | 0 | +| - re | | | |f1 | | 0.1233 | |0 | +| - p1 | | | |f1 | | 0.1070 | | 0 | +| - p2 | | | |f1 | | 0.1395 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..30eae1f944b43a56e40bfd388434c8750039220f --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1792 | |0 | +| - p1 | | | |f1 | | 0.1758 | | 0 | +| - p2 | | | |f1 | | 0.1860 | | 0 | +| - p3 | | | |f1 | | 0.1758 | | 0 | +| - re | | | |f1 | | 0.1325 | |0 | +| - p1 | | | |f1 | | 0.1446 | | 0 | +| - p2 | | | |f1 | | 0.1084 | | 0 | +| - p3 | | | |f1 | | 0.1446 | | 0 | diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..450874e9bed331799c2f958afd58e7222f42b0d9 --- /dev/null +++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3837 | |0 | +| - p1 | | | |f1 | | 0.3973 | | 0 | +| - p2 | | | |f1 | | 0.3564 | | 0 | +| - p3 | | | |f1 | | 0.3973 | | 0 | +| - re | | | |f1 | | 0.1312 | |0 | +| - p1 | | | |f1 | | 0.1155 | | 0 | +| - p2 | | | |f1 | | 0.1468 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..8e502b0469c74a107e33036e293f44cc2fa4b63e --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2278 | |0 | +| - p1 | | | |f1 | | 0.2529 | | 0 | +| - p2 | | | |f1 | | 0.2144 | | 0 | +| - p3 | | | |f1 | | 0.2162 | | 0 | +| - re | | | |f1 | | 0.3007 | |0 | +| - p1 | | | |f1 | | 0.3688 | | 0 | +| - p2 | | | |f1 | | 0.3642 | | 0 | +| - p3 | | | |f1 | | 0.1693 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3a5a39e1e6dba5602444c93a8a19333b259469c --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4753 | |0 | +| - p1 | | | |f1 | | 0.4725 | | 0 | +| - p2 | | | |f1 | | 0.4730 | | 0 | +| - p3 | | | |f1 | | 0.4805 | | 0 | +| - re | | | |f1 | | 0.3314 | |0 | +| - p1 | | | |f1 | | 0.2593 | | 0 | +| - p2 | | | |f1 | | 0.4034 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..16b5a9165aad174a6a36147b6996cc7fa1e6604d --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1705 | |0 | +| - p1 | | | |f1 | | 0.1603 | | 0 | +| - p2 | | | |f1 | | 0.1909 | | 0 | +| - p3 | | | |f1 | | 0.1603 | | 0 | +| - re | | | |f1 | | 0.0592 | |0 | +| - p1 | | | |f1 | | 0.0432 | | 0 | +| - p2 | | | |f1 | | 0.0348 | | 0 | +| - p3 | | | |f1 | | 0.0994 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a6d3a6d74f4e46818d3bc6b0cd9e8a03530db07 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3548 | |0 | +| - p1 | | | |f1 | | 0.3498 | | 0 | +| - p2 | | | |f1 | | 0.3648 | | 0 | +| - p3 | | | |f1 | | 0.3498 | | 0 | +| - re | | | |f1 | | 0.1699 | |0 | +| - p1 | | | |f1 | | 0.1055 | | 0 | +| - p2 | | | |f1 | | 0.2343 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..e0cf20ff46d2445183d7c850dfe015d3e0f0f3ba --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2433 | |0 | +| - p1 | | | |f1 | | 0.2788 | | 0 | +| - p2 | | | |f1 | | 0.2030 | | 0 | +| - p3 | | | |f1 | | 0.2481 | | 0 | +| - re | | | |f1 | | 0.0561 | |0 | +| - p1 | | | |f1 | | 0.1382 | | 0 | +| - p2 | | | |f1 | | 0.0163 | | 0 | +| - p3 | | | |f1 | | 0.0140 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c4d23b35f6e63a6ec9591f9f4a72438fe617193 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5176 | |0 | +| - p1 | | | |f1 | | 0.5147 | | 0 | +| - p2 | | | |f1 | | 0.5232 | | 0 | +| - p3 | | | |f1 | | 0.5149 | | 0 | +| - re | | | |f1 | | 0.3811 | |0 | +| - p1 | | | |f1 | | 0.3092 | | 0 | +| - p2 | | | |f1 | | 0.4530 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..e3a6ccb53c1fbdee54552c727abc293c311e9246 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2953 | |0 | +| - p1 | | | |f1 | | 0.3024 | | 0 | +| - p2 | | | |f1 | | 0.2811 | | 0 | +| - p3 | | | |f1 | | 0.3024 | | 0 | +| - re | | | |f1 | | 0.1006 | |0 | +| - p1 | | | |f1 | | 0.0863 | | 0 | +| - p2 | | | |f1 | | 0.1292 | | 0 | +| - p3 | | | |f1 | | 0.0863 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..781a606a64f7cc4449961c855e8ee8cf0ceacd37 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4956 | |0 | +| - p1 | | | |f1 | | 0.4911 | | 0 | +| - p2 | | | |f1 | | 0.5046 | | 0 | +| - p3 | | | |f1 | | 0.4911 | | 0 | +| - re | | | |f1 | | 0.3603 | |0 | +| - p1 | | | |f1 | | 0.3895 | | 0 | +| - p2 | | | |f1 | | 0.3311 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc1036659d5c18203c2be94655e756dba8a8c106 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2144 | |0 | +| - p1 | | | |f1 | | 0.2143 | | 0 | +| - p2 | | | |f1 | | 0.2146 | | 0 | +| - p3 | | | |f1 | | 0.2143 | | 0 | +| - re | | | |f1 | | 0.0782 | |0 | +| - p1 | | | |f1 | | 0.0756 | | 0 | +| - p2 | | | |f1 | | 0.0835 | | 0 | +| - p3 | | | |f1 | | 0.0756 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..81cfa0c7032d7533575224dc8c121a7e54038b71 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3951 | |0 | +| - p1 | | | |f1 | | 0.4029 | | 0 | +| - p2 | | | |f1 | | 0.3794 | | 0 | +| - p3 | | | |f1 | | 0.4029 | | 0 | +| - re | | | |f1 | | 0.2051 | |0 | +| - p1 | | | |f1 | | 0.2155 | | 0 | +| - p2 | | | |f1 | | 0.1948 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee93bfc18a44967177d18c70bb865dbc6b2ab75e --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1826 | |0 | +| - p1 | | | |f1 | | 0.1766 | | 0 | +| - p2 | | | |f1 | | 0.1947 | | 0 | +| - p3 | | | |f1 | | 0.1766 | | 0 | +| - re | | | |f1 | | 0.1076 | |0 | +| - p1 | | | |f1 | | 0.0766 | | 0 | +| - p2 | | | |f1 | | 0.1695 | | 0 | +| - p3 | | | |f1 | | 0.0766 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..bd8a356a7c3cc706edb631e160103a2a498fa6ad --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4194 | |0 | +| - p1 | | | |f1 | | 0.4204 | | 0 | +| - p2 | | | |f1 | | 0.4174 | | 0 | +| - p3 | | | |f1 | | 0.4204 | | 0 | +| - re | | | |f1 | | 0.1970 | |0 | +| - p1 | | | |f1 | | 0.1990 | | 0 | +| - p2 | | | |f1 | | 0.1950 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4f9ec631fcea66bfe04d508e14ea80d723c51acb --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2271 | |0 | +| - p1 | | | |f1 | | 0.2767 | | 0 | +| - p2 | | | |f1 | | 0.2299 | | 0 | +| - p3 | | | |f1 | | 0.1748 | | 0 | +| - re | | | |f1 | | 0.3472 | |0 | +| - p1 | | | |f1 | | 0.3694 | | 0 | +| - p2 | | | |f1 | | 0.3482 | | 0 | +| - p3 | | | |f1 | | 0.3241 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b479ea31786ab215ef11aae9395ed0e12f7d21fe --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5762 | |0 | +| - p1 | | | |f1 | | 0.5777 | | 0 | +| - p2 | | | |f1 | | 0.5841 | | 0 | +| - p3 | | | |f1 | | 0.5668 | | 0 | +| - re | | | |f1 | | 0.4245 | |0 | +| - p1 | | | |f1 | | 0.3482 | | 0 | +| - p2 | | | |f1 | | 0.5008 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..29bdd0e9132787a1125ae29d9f42a6961388e7f2 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0717 | |0 | +| - p1 | | | |f1 | | 0.0732 | | 0 | +| - p2 | | | |f1 | | 0.0687 | | 0 | +| - p3 | | | |f1 | | 0.0732 | | 0 | +| - re | | | |f1 | | 0.2326 | |0 | +| - p1 | | | |f1 | | 0.1575 | | 0 | +| - p2 | | | |f1 | | 0.2117 | | 0 | +| - p3 | | | |f1 | | 0.3287 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..9822eb38961b00cead5fcd9e98a85a6cad536133 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5050 | |0 | +| - p1 | | | |f1 | | 0.5081 | | 0 | +| - p2 | | | |f1 | | 0.4988 | | 0 | +| - p3 | | | |f1 | | 0.5081 | | 0 | +| - re | | | |f1 | | 0.2162 | |0 | +| - p1 | | | |f1 | | 0.2029 | | 0 | +| - p2 | | | |f1 | | 0.2296 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..67704668b629b8df7abedf42ca4d673b6a5bbc7c --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1960 | |0 | +| - p1 | | | |f1 | | 0.2792 | | 0 | +| - p2 | | | |f1 | | 0.1772 | | 0 | +| - p3 | | | |f1 | | 0.1316 | | 0 | +| - re | | | |f1 | | 0.2365 | |0 | +| - p1 | | | |f1 | | 0.2849 | | 0 | +| - p2 | | | |f1 | | 0.2384 | | 0 | +| - p3 | | | |f1 | | 0.1861 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b4bb613279f3b93b6e1f2cc9eb5aa447350fa8e --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6441 | |0 | +| - p1 | | | |f1 | | 0.6430 | | 0 | +| - p2 | | | |f1 | | 0.6437 | | 0 | +| - p3 | | | |f1 | | 0.6457 | | 0 | +| - re | | | |f1 | | 0.3404 | |0 | +| - p1 | | | |f1 | | 0.2708 | | 0 | +| - p2 | | | |f1 | | 0.4099 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..99b46d553f57aaed0a8017a7a5911af7bb40f6d7 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0468 | |0 | +| - p1 | | | |f1 | | 0.0483 | | 0 | +| - p2 | | | |f1 | | 0.0439 | | 0 | +| - p3 | | | |f1 | | 0.0483 | | 0 | +| - re | | | |f1 | | 0.1823 | |0 | +| - p1 | | | |f1 | | 0.2123 | | 0 | +| - p2 | | | |f1 | | 0.1686 | | 0 | +| - p3 | | | |f1 | | 0.1661 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4909ab6216254be068a90c6f672fa19e16ba1202 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5375 | |0 | +| - p1 | | | |f1 | | 0.5352 | | 0 | +| - p2 | | | |f1 | | 0.5421 | | 0 | +| - p3 | | | |f1 | | 0.5352 | | 0 | +| - re | | | |f1 | | 0.1859 | |0 | +| - p1 | | | |f1 | | 0.1863 | | 0 | +| - p2 | | | |f1 | | 0.1855 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb8f25d0decd15173485364726a4ed5b5da885d0 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0738 | |0 | +| - p1 | | | |f1 | | 0.0685 | | 0 | +| - p2 | | | |f1 | | 0.0844 | | 0 | +| - p3 | | | |f1 | | 0.0685 | | 0 | +| - re | | | |f1 | | 0.1596 | |0 | +| - p1 | | | |f1 | | 0.1696 | | 0 | +| - p2 | | | |f1 | | 0.1396 | | 0 | +| - p3 | | | |f1 | | 0.1696 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..026f8af475c0bd2d37a6b8ea25b4cf67c2ca0c88 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5030 | |0 | +| - p1 | | | |f1 | | 0.5025 | | 0 | +| - p2 | | | |f1 | | 0.5040 | | 0 | +| - p3 | | | |f1 | | 0.5025 | | 0 | +| - re | | | |f1 | | 0.1702 | |0 | +| - p1 | | | |f1 | | 0.1237 | | 0 | +| - p2 | | | |f1 | | 0.2166 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..62298f12aa29d9fea6f35a71f1cb1c7d20d1d4e5 --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0842 | |0 | +| - p1 | | | |f1 | | 0.0861 | | 0 | +| - p2 | | | |f1 | | 0.0805 | | 0 | +| - p3 | | | |f1 | | 0.0861 | | 0 | +| - re | | | |f1 | | 0.1905 | |0 | +| - p1 | | | |f1 | | 0.2309 | | 0 | +| - p2 | | | |f1 | | 0.1096 | | 0 | +| - p3 | | | |f1 | | 0.2309 | | 0 | diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..389511b1661481c55f6cca90c523c9814540a17d --- /dev/null +++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5327 | |0 | +| - p1 | | | |f1 | | 0.5323 | | 0 | +| - p2 | | | |f1 | | 0.5335 | | 0 | +| - p3 | | | |f1 | | 0.5323 | | 0 | +| - re | | | |f1 | | 0.1723 | |0 | +| - p1 | | | |f1 | | 0.1390 | | 0 | +| - p2 | | | |f1 | | 0.2057 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..a0a6321951108e682662973e28de8c885a2ec6ac --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2658 | |0 | +| - p1 | | | |f1 | | 0.2270 | | 0 | +| - p2 | | | |f1 | | 0.2709 | | 0 | +| - p3 | | | |f1 | | 0.2996 | | 0 | +| - re | | | |f1 | | 0.3280 | |0 | +| - p1 | | | |f1 | | 0.2157 | | 0 | +| - p2 | | | |f1 | | 0.3835 | | 0 | +| - p3 | | | |f1 | | 0.3848 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d7afc8c15ec13c18d8b6ddc2e8179614789e85b --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5730 | |0 | +| - p1 | | | |f1 | | 0.5840 | | 0 | +| - p2 | | | |f1 | | 0.5421 | | 0 | +| - p3 | | | |f1 | | 0.5928 | | 0 | +| - re | | | |f1 | | 0.4960 | |0 | +| - p1 | | | |f1 | | 0.4335 | | 0 | +| - p2 | | | |f1 | | 0.5586 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..f82fdd2d52536ceb420fb68f7fdc509d1e0571ce --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1585 | |0 | +| - p1 | | | |f1 | | 0.2130 | | 0 | +| - p2 | | | |f1 | | 0.0495 | | 0 | +| - p3 | | | |f1 | | 0.2130 | | 0 | +| - re | | | |f1 | | 0.0506 | |0 | +| - p1 | | | |f1 | | 0.0401 | | 0 | +| - p2 | | | |f1 | | 0.0250 | | 0 | +| - p3 | | | |f1 | | 0.0867 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..043f6fdaf361b4449451b4723c2d212109272003 --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.3448 | |0 | +| - p1 | | | |f1 | | 0.3345 | | 0 | +| - p2 | | | |f1 | | 0.3655 | | 0 | +| - p3 | | | |f1 | | 0.3345 | | 0 | +| - re | | | |f1 | | 0.3752 | |0 | +| - p1 | | | |f1 | | 0.3749 | | 0 | +| - p2 | | | |f1 | | 0.3755 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..35603a5cf7326d49057dfc282d07cb3bcee22f05 --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2011 | |0 | +| - p1 | | | |f1 | | 0.1261 | | 0 | +| - p2 | | | |f1 | | 0.2327 | | 0 | +| - p3 | | | |f1 | | 0.2444 | | 0 | +| - re | | | |f1 | | 0.1865 | |0 | +| - p1 | | | |f1 | | 0.2404 | | 0 | +| - p2 | | | |f1 | | 0.1699 | | 0 | +| - p3 | | | |f1 | | 0.1492 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b5c661f33ce3164c556af215c55b5594d6e6a667 --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5625 | |0 | +| - p1 | | | |f1 | | 0.5821 | | 0 | +| - p2 | | | |f1 | | 0.5432 | | 0 | +| - p3 | | | |f1 | | 0.5622 | | 0 | +| - re | | | |f1 | | 0.5040 | |0 | +| - p1 | | | |f1 | | 0.4622 | | 0 | +| - p2 | | | |f1 | | 0.5458 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..cfe1ab055abc55ac21f308106812a31c03dd75d8 --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2414 | |0 | +| - p1 | | | |f1 | | 0.2452 | | 0 | +| - p2 | | | |f1 | | 0.2338 | | 0 | +| - p3 | | | |f1 | | 0.2452 | | 0 | +| - re | | | |f1 | | 0.0963 | |0 | +| - p1 | | | |f1 | | 0.1501 | | 0 | +| - p2 | | | |f1 | | 0.0123 | | 0 | +| - p3 | | | |f1 | | 0.1264 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..cd0f486fb1b90e0b4b851363edfb01a16cebda16 --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4244 | |0 | +| - p1 | | | |f1 | | 0.4304 | | 0 | +| - p2 | | | |f1 | | 0.4123 | | 0 | +| - p3 | | | |f1 | | 0.4304 | | 0 | +| - re | | | |f1 | | 0.5350 | |0 | +| - p1 | | | |f1 | | 0.5129 | | 0 | +| - p2 | | | |f1 | | 0.5571 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..70be76745b95ffbda8aac22be805f33ae11b8f0d --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2871 | |0 | +| - p1 | | | |f1 | | 0.2717 | | 0 | +| - p2 | | | |f1 | | 0.3178 | | 0 | +| - p3 | | | |f1 | | 0.2717 | | 0 | +| - re | | | |f1 | | 0.0182 | |0 | +| - p1 | | | |f1 | | 0.0143 | | 0 | +| - p2 | | | |f1 | | 0.0260 | | 0 | +| - p3 | | | |f1 | | 0.0143 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf61cdbb7ef141477d6986bd57ffed0bdad70cbd --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4402 | |0 | +| - p1 | | | |f1 | | 0.4545 | | 0 | +| - p2 | | | |f1 | | 0.4116 | | 0 | +| - p3 | | | |f1 | | 0.4545 | | 0 | +| - re | | | |f1 | | 0.4222 | |0 | +| - p1 | | | |f1 | | 0.3750 | | 0 | +| - p2 | | | |f1 | | 0.4695 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..a9943589714a47225564e83a9d2210e3af8dcff2 --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2297 | |0 | +| - p1 | | | |f1 | | 0.2519 | | 0 | +| - p2 | | | |f1 | | 0.1853 | | 0 | +| - p3 | | | |f1 | | 0.2519 | | 0 | +| - re | | | |f1 | | 0.0050 | |0 | +| - p1 | | | |f1 | | 0.0047 | | 0 | +| - p2 | | | |f1 | | 0.0058 | | 0 | +| - p3 | | | |f1 | | 0.0047 | | 0 | diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..a62c9f9481ff3aeeca9ee989a959904168f62616 --- /dev/null +++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.4050 | |0 | +| - p1 | | | |f1 | | 0.4121 | | 0 | +| - p2 | | | |f1 | | 0.3909 | | 0 | +| - p3 | | | |f1 | | 0.4121 | | 0 | +| - re | | | |f1 | | 0.2668 | |0 | +| - p1 | | | |f1 | | 0.2323 | | 0 | +| - p2 | | | |f1 | | 0.3012 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__en__0shot.txt b/csv_files/outputs/unsloth__phi-4__en__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..af80bdf2b861abf5a192d75b7889b3a166394a2c --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__en__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0275 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0252 | | 0 | +| - p3 | | | |f1 | | 0.0572 | | 0 | +| - re | | | |f1 | | 0.4090 | |0 | +| - p1 | | | |f1 | | 0.4022 | | 0 | +| - p2 | | | |f1 | | 0.4219 | | 0 | +| - p3 | | | |f1 | | 0.4030 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__en__10shot.txt b/csv_files/outputs/unsloth__phi-4__en__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..67bac9316eb541f0cf2567e21a2c0ba8c3cd4065 --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__en__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5984 | |0 | +| - p1 | | | |f1 | | 0.6098 | | 0 | +| - p2 | | | |f1 | | 0.5711 | | 0 | +| - p3 | | | |f1 | | 0.6141 | | 0 | +| - re | | | |f1 | | 0.5269 | |0 | +| - p1 | | | |f1 | | 0.4912 | | 0 | +| - p2 | | | |f1 | | 0.5626 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__gr__0shot.txt b/csv_files/outputs/unsloth__phi-4__gr__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..7969180e3473a886d42e4f4be8a510d9266856ae --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__gr__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0000 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.0000 | | 0 | +| - p3 | | | |f1 | | 0.0000 | | 0 | +| - re | | | |f1 | | 0.2011 | |0 | +| - p1 | | | |f1 | | 0.2901 | | 0 | +| - p2 | | | |f1 | | 0.2208 | | 0 | +| - p3 | | | |f1 | | 0.0925 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__gr__10shot.txt b/csv_files/outputs/unsloth__phi-4__gr__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..c69aa2d41494640e91af2c22fcd7397f5e1c3dcf --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__gr__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5682 | |0 | +| - p1 | | | |f1 | | 0.5717 | | 0 | +| - p2 | | | |f1 | | 0.5611 | | 0 | +| - p3 | | | |f1 | | 0.5717 | | 0 | +| - re | | | |f1 | | 0.5098 | |0 | +| - p1 | | | |f1 | | 0.4935 | | 0 | +| - p2 | | | |f1 | | 0.5261 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__it__0shot.txt b/csv_files/outputs/unsloth__phi-4__it__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..4ee83bc7873e14aefaae1669bf86dcc0e8005f95 --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__it__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.1717 | |0 | +| - p1 | | | |f1 | | 0.0000 | | 0 | +| - p2 | | | |f1 | | 0.1724 | | 0 | +| - p3 | | | |f1 | | 0.3428 | | 0 | +| - re | | | |f1 | | 0.3589 | |0 | +| - p1 | | | |f1 | | 0.3354 | | 0 | +| - p2 | | | |f1 | | 0.3737 | | 0 | +| - p3 | | | |f1 | | 0.3677 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__it__10shot.txt b/csv_files/outputs/unsloth__phi-4__it__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..9eb081ce4505d7cc1d729c40b2d9e1ee89d84318 --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__it__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.6759 | |0 | +| - p1 | | | |f1 | | 0.6647 | | 0 | +| - p2 | | | |f1 | | 0.6732 | | 0 | +| - p3 | | | |f1 | | 0.6897 | | 0 | +| - re | | | |f1 | | 0.5714 | |0 | +| - p1 | | | |f1 | | 0.5608 | | 0 | +| - p2 | | | |f1 | | 0.5820 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__pl__0shot.txt b/csv_files/outputs/unsloth__phi-4__pl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..b0cbde5ebdb58d95bb90c08f4a5252e17f7d1a71 --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__pl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0279 | |0 | +| - p1 | | | |f1 | | 0.0236 | | 0 | +| - p2 | | | |f1 | | 0.0366 | | 0 | +| - p3 | | | |f1 | | 0.0236 | | 0 | +| - re | | | |f1 | | 0.3814 | |0 | +| - p1 | | | |f1 | | 0.3799 | | 0 | +| - p2 | | | |f1 | | 0.3829 | | 0 | +| - p3 | | | |f1 | | 0.3813 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__pl__10shot.txt b/csv_files/outputs/unsloth__phi-4__pl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..415465038e12d91334c2f8044bb91ee130a5fb4b --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__pl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5474 | |0 | +| - p1 | | | |f1 | | 0.5549 | | 0 | +| - p2 | | | |f1 | | 0.5324 | | 0 | +| - p3 | | | |f1 | | 0.5549 | | 0 | +| - re | | | |f1 | | 0.5591 | |0 | +| - p1 | | | |f1 | | 0.5423 | | 0 | +| - p2 | | | |f1 | | 0.5760 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__sk__0shot.txt b/csv_files/outputs/unsloth__phi-4__sk__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..54a528becb31213a509950e7b1c281dc3f2fe23c --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__sk__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.0567 | |0 | +| - p1 | | | |f1 | | 0.0316 | | 0 | +| - p2 | | | |f1 | | 0.1070 | | 0 | +| - p3 | | | |f1 | | 0.0316 | | 0 | +| - re | | | |f1 | | 0.3277 | |0 | +| - p1 | | | |f1 | | 0.3252 | | 0 | +| - p2 | | | |f1 | | 0.3326 | | 0 | +| - p3 | | | |f1 | | 0.3252 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__sk__10shot.txt b/csv_files/outputs/unsloth__phi-4__sk__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..830947c5e11d99fe023efdd797f9651bb1af6ff8 --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__sk__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5524 | |0 | +| - p1 | | | |f1 | | 0.5561 | | 0 | +| - p2 | | | |f1 | | 0.5449 | | 0 | +| - p3 | | | |f1 | | 0.5561 | | 0 | +| - re | | | |f1 | | 0.5050 | |0 | +| - p1 | | | |f1 | | 0.5106 | | 0 | +| - p2 | | | |f1 | | 0.4994 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__sl__0shot.txt b/csv_files/outputs/unsloth__phi-4__sl__0shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ae5cf81614f1de4df39c8a8404617317d393f7a --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__sl__0shot.txt @@ -0,0 +1,11 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.2241 | |0 | +| - p1 | | | |f1 | | 0.2870 | | 0 | +| - p2 | | | |f1 | | 0.0981 | | 0 | +| - p3 | | | |f1 | | 0.2870 | | 0 | +| - re | | | |f1 | | 0.2721 | |0 | +| - p1 | | | |f1 | | 0.3209 | | 0 | +| - p2 | | | |f1 | | 0.1744 | | 0 | +| - p3 | | | |f1 | | 0.3209 | | 0 | diff --git a/csv_files/outputs/unsloth__phi-4__sl__10shot.txt b/csv_files/outputs/unsloth__phi-4__sl__10shot.txt new file mode 100644 index 0000000000000000000000000000000000000000..d661ccb90e76eaeaab250e84160ff590ff8c5718 --- /dev/null +++ b/csv_files/outputs/unsloth__phi-4__sl__10shot.txt @@ -0,0 +1,10 @@ +hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1 +|Tasks |Version|Filter|n-shot|Metric| |Value | |Stderr| +|-------|-------|------|------|------|----|------|---|------| +| - ner | | | |f1 | | 0.5577 | |0 | +| - p1 | | | |f1 | | 0.5586 | | 0 | +| - p2 | | | |f1 | | 0.5558 | | 0 | +| - p3 | | | |f1 | | 0.5586 | | 0 | +| - re | | | |f1 | | 0.5175 | |0 | +| - p1 | | | |f1 | | 0.5117 | | 0 | +| - p2 | | | |f1 | | 0.5232 | | 0 | diff --git a/e3c_llm_requests/Henrychur/MMed-Llama-3-8B.json b/e3c_llm_requests/Henrychur/MMed-Llama-3-8B.json new file mode 100644 index 0000000000000000000000000000000000000000..86b0019fb62092325cdc79c7eb8218aed3bad09f --- /dev/null +++ b/e3c_llm_requests/Henrychur/MMed-Llama-3-8B.json @@ -0,0 +1,8 @@ +{ + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" +} \ No newline at end of file diff --git a/e3c_llm_requests/HiTZ/Medical-mT5-large.json b/e3c_llm_requests/HiTZ/Medical-mT5-large.json new file mode 100644 index 0000000000000000000000000000000000000000..42da6bb5dbb7d478648d28988f549c4c3e885a7c --- /dev/null +++ b/e3c_llm_requests/HiTZ/Medical-mT5-large.json @@ -0,0 +1,8 @@ +{ + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" +} \ No newline at end of file diff --git a/e3c_llm_requests/Qwen/Qwen2.5-14B-Instruct-1M.json b/e3c_llm_requests/Qwen/Qwen2.5-14B-Instruct-1M.json new file mode 100644 index 0000000000000000000000000000000000000000..e4460926779e971e3317af33665cf9278980c10d --- /dev/null +++ b/e3c_llm_requests/Qwen/Qwen2.5-14B-Instruct-1M.json @@ -0,0 +1,8 @@ +{ + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" +} \ No newline at end of file diff --git a/e3c_llm_requests/Qwen/Qwen2.5-32B-Instruct.json b/e3c_llm_requests/Qwen/Qwen2.5-32B-Instruct.json new file mode 100644 index 0000000000000000000000000000000000000000..3bfb3ff4c28797c9aad9070719f798119c2784e3 --- /dev/null +++ b/e3c_llm_requests/Qwen/Qwen2.5-32B-Instruct.json @@ -0,0 +1,8 @@ +{ + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" +} \ No newline at end of file diff --git a/e3c_llm_requests/Qwen/Qwen3-30B-A3B-Instruct-2507.json b/e3c_llm_requests/Qwen/Qwen3-30B-A3B-Instruct-2507.json new file mode 100644 index 0000000000000000000000000000000000000000..8c19206d89e4c8cc448322bcf562cdeea4f686ba --- /dev/null +++ b/e3c_llm_requests/Qwen/Qwen3-30B-A3B-Instruct-2507.json @@ -0,0 +1,8 @@ +{ + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B-checkpoint.json b/e3c_llm_requests/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B-checkpoint.json new file mode 100644 index 0000000000000000000000000000000000000000..3a574a6dcb81e49041fa0f592ee86930e1b43847 --- /dev/null +++ b/e3c_llm_requests/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B-checkpoint.json @@ -0,0 +1,8 @@ +{ + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B.json b/e3c_llm_requests/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B.json new file mode 100644 index 0000000000000000000000000000000000000000..3a574a6dcb81e49041fa0f592ee86930e1b43847 --- /dev/null +++ b/e3c_llm_requests/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B.json @@ -0,0 +1,8 @@ +{ + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/epfl-llm/meditron-7b.json b/e3c_llm_requests/epfl-llm/meditron-7b.json new file mode 100644 index 0000000000000000000000000000000000000000..773ee8fbfdfa8623fdc11b71a79ef5122ea682b7 --- /dev/null +++ b/e3c_llm_requests/epfl-llm/meditron-7b.json @@ -0,0 +1,8 @@ +{ + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" +} \ No newline at end of file diff --git a/e3c_llm_requests/google/gemma-2-9b-it.json b/e3c_llm_requests/google/gemma-2-9b-it.json new file mode 100644 index 0000000000000000000000000000000000000000..bc6b860d3bd567948054e01f7630043dd7220af4 --- /dev/null +++ b/e3c_llm_requests/google/gemma-2-9b-it.json @@ -0,0 +1,8 @@ +{ + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/google/gemma-3-27b-it.json b/e3c_llm_requests/google/gemma-3-27b-it.json new file mode 100644 index 0000000000000000000000000000000000000000..0cbbf8fd214db1aa63941c6685829be3c1ef47a6 --- /dev/null +++ b/e3c_llm_requests/google/gemma-3-27b-it.json @@ -0,0 +1,8 @@ +{ + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/google/medgemma-27b-text-it.json b/e3c_llm_requests/google/medgemma-27b-text-it.json new file mode 100644 index 0000000000000000000000000000000000000000..0eadf57aa2ee6563dba193b1a1fd16ebc0362a65 --- /dev/null +++ b/e3c_llm_requests/google/medgemma-27b-text-it.json @@ -0,0 +1,8 @@ +{ + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/google/medgemma-4b-it.json b/e3c_llm_requests/google/medgemma-4b-it.json new file mode 100644 index 0000000000000000000000000000000000000000..7c6a467cac7dee24972df120b27cdd729a1e75fd --- /dev/null +++ b/e3c_llm_requests/google/medgemma-4b-it.json @@ -0,0 +1,8 @@ +{ + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/microsoft/MediPhi-Clinical.json b/e3c_llm_requests/microsoft/MediPhi-Clinical.json new file mode 100644 index 0000000000000000000000000000000000000000..24031b2427e47fc919f0fda9c2570e2c55afafa6 --- /dev/null +++ b/e3c_llm_requests/microsoft/MediPhi-Clinical.json @@ -0,0 +1,8 @@ +{ + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" +} \ No newline at end of file diff --git a/e3c_llm_requests/microsoft/MediPhi-Instruct.json b/e3c_llm_requests/microsoft/MediPhi-Instruct.json new file mode 100644 index 0000000000000000000000000000000000000000..fd36894ce7cc6bbb938d3639a927ebb3c277254d --- /dev/null +++ b/e3c_llm_requests/microsoft/MediPhi-Instruct.json @@ -0,0 +1,8 @@ +{ + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" +} \ No newline at end of file diff --git a/e3c_llm_requests/mistralai/Mistral-7B-Instruct-v0.2.json b/e3c_llm_requests/mistralai/Mistral-7B-Instruct-v0.2.json new file mode 100644 index 0000000000000000000000000000000000000000..b36579fb429f3b744a46c6a84fed781411b85cc7 --- /dev/null +++ b/e3c_llm_requests/mistralai/Mistral-7B-Instruct-v0.2.json @@ -0,0 +1,8 @@ +{ + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/mistralai/Mistral-Nemo-Instruct-2407.json b/e3c_llm_requests/mistralai/Mistral-Nemo-Instruct-2407.json new file mode 100644 index 0000000000000000000000000000000000000000..c46a7da06e59d841593280fb14969e4182c18d95 --- /dev/null +++ b/e3c_llm_requests/mistralai/Mistral-Nemo-Instruct-2407.json @@ -0,0 +1,8 @@ +{ + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" +} \ No newline at end of file diff --git a/e3c_llm_requests/tiiuae/Falcon3-10B-Instruct.json b/e3c_llm_requests/tiiuae/Falcon3-10B-Instruct.json new file mode 100644 index 0000000000000000000000000000000000000000..6fb21c1511b3e96748b3e779985cbcbbfca44186 --- /dev/null +++ b/e3c_llm_requests/tiiuae/Falcon3-10B-Instruct.json @@ -0,0 +1,8 @@ +{ + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" +} \ No newline at end of file diff --git a/e3c_llm_requests/unsloth/phi-4.json b/e3c_llm_requests/unsloth/phi-4.json new file mode 100644 index 0000000000000000000000000000000000000000..1aacd86f19c5a378deb37004884fd52f579b6daf --- /dev/null +++ b/e3c_llm_requests/unsloth/phi-4.json @@ -0,0 +1,8 @@ +{ + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_EN.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..2223b25d2d50eee0dbb18c844258689efb189c10 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 20.954842, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 6.29, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.41, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 10.83, + "stderr": 0.0 + } + ], + "average_accuracy": 9.176666666666668, + "best_prompt": 10.83, + "prompt_id": "p3", + "CPS": 10.650944 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.870000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 33.94, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 31.31, + "stderr": 0.0 + } + ], + "average_accuracy": 26.040000000000003, + "best_prompt": 33.94, + "prompt_id": "p2", + "CPS": 31.25874 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_GR.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..ace63ca20b3d70142e4daf17f411d4b272b2534f --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 8.314364166666667, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 6.2, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 5.92, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 6.2, + "stderr": 0.0 + } + ], + "average_accuracy": 6.1066666666666665, + "best_prompt": 6.2, + "prompt_id": "p1", + "CPS": 6.194213333333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 10.17, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 5.06, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 10.65, + "stderr": 0.0 + } + ], + "average_accuracy": 8.626666666666667, + "best_prompt": 10.65, + "prompt_id": "p3", + "CPS": 10.434515000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_IT.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..751d803c11a77df8c3cb25104fc0e40e2ce49039 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 12.534040833333332, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.35, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.29, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 3.84, + "stderr": 0.0 + } + ], + "average_accuracy": 4.16, + "best_prompt": 4.35, + "prompt_id": "p1", + "CPS": 4.341735 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 6.72, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 22.66, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 13.0, + "stderr": 0.0 + } + ], + "average_accuracy": 14.126666666666667, + "best_prompt": 22.66, + "prompt_id": "p2", + "CPS": 20.726346666666664 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_PL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..48a53fe5be2f937e408f8fe6491e4fd9cbd86421 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 8.100043833333334, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 3.7900000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.7800000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 3.7900000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 3.786666666666667, + "best_prompt": 3.7900000000000005, + "prompt_id": "p1", + "CPS": 3.7898736666666673 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 6.02, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 12.93, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.779999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 8.909999999999998, + "best_prompt": 12.93, + "prompt_id": "p2", + "CPS": 12.410214 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SK.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..31975398d337d6036af1e177744f0559e63e6b23 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 3.3197085, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 3.8699999999999997, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.8, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 3.8699999999999997, + "stderr": 0.0 + } + ], + "average_accuracy": 3.8466666666666662, + "best_prompt": 3.8699999999999997, + "prompt_id": "p1", + "CPS": 3.8690969999999996 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 1.21, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 2.8000000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 1.21, + "stderr": 0.0 + } + ], + "average_accuracy": 1.7400000000000002, + "best_prompt": 2.8000000000000003, + "prompt_id": "p2", + "CPS": 2.77032 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..5927ff7b3634f50ff5fb3661bfda533bd7018936 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 11.184996000000002, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.29, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.5600000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 4.29, + "stderr": 0.0 + } + ], + "average_accuracy": 4.38, + "best_prompt": 4.5600000000000005, + "prompt_id": "p2", + "CPS": 4.551792000000001 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 9.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 9.67, + "stderr": 0.0 + } + ], + "average_accuracy": 12.780000000000001, + "best_prompt": 19.0, + "prompt_id": "p2", + "CPS": 17.8182 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..dd0d69af45a60908b4826c488192817e762b6440 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 19.2419855, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.89, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 22.43, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 19.939999999999998, + "stderr": 0.0 + } + ], + "average_accuracy": 21.419999999999998, + "best_prompt": 22.43, + "prompt_id": "p2", + "CPS": 22.203457 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.89, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 16.68, + "stderr": 0.0 + } + ], + "average_accuracy": 14.285, + "best_prompt": 16.68, + "prompt_id": "p2", + "CPS": 16.280514 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..3f20942216ac328fa6a0a7a428069b719a43064e --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 13.378338333333332, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 16.669999999999998, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.89, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.669999999999998, + "stderr": 0.0 + } + ], + "average_accuracy": 14.743333333333332, + "best_prompt": 16.669999999999998, + "prompt_id": "p1", + "CPS": 16.348824666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.21, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.530000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 9.370000000000001, + "best_prompt": 10.530000000000001, + "prompt_id": "p2", + "CPS": 10.407852 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..358d60201a9ae6fa373e07f60101f10d944b9298 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 25.626247, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 32.99, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.23, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.379999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 37.53333333333333, + "best_prompt": 40.23, + "prompt_id": "p2", + "CPS": 39.145131 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 9.77, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 12.26, + "stderr": 0.0 + } + ], + "average_accuracy": 11.015, + "best_prompt": 12.26, + "prompt_id": "p2", + "CPS": 12.107363 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..fd9f2220c73c3d0f107be883ed542c8aa0848b7a --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 25.169400916666667, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.92, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 39.160000000000004, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.92, + "stderr": 0.0 + } + ], + "average_accuracy": 39.666666666666664, + "best_prompt": 39.92, + "prompt_id": "p1", + "CPS": 39.81886933333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 9.98, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.549999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 10.265, + "best_prompt": 10.549999999999999, + "prompt_id": "p2", + "CPS": 10.5199325 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..5064fd7a0db765a5fc1b37ecb99f508152229002 --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 23.07614591666667, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 34.44, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 36.32, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 34.44, + "stderr": 0.0 + } + ], + "average_accuracy": 35.06666666666666, + "best_prompt": 36.32, + "prompt_id": "p2", + "CPS": 35.864789333333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.340000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.45, + "stderr": 0.0 + } + ], + "average_accuracy": 8.895, + "best_prompt": 10.45, + "prompt_id": "p2", + "CPS": 10.2875025 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..636595445e801787847de9ab439dd6218cd3634c --- /dev/null +++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 23.50218116666667, + "config": { + "model_name": "Henrychur/MMed-Llama-3-8B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "Henrychur/MMed-Llama-3-8B", + "base_model": "LlamaForCausalLM", + "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943", + "submitted_time": "2024-05-22 09:17:24+00:00", + "num_params_billion": null, + "language": "en_zh_ja_fr_ru_es" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 35.58, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.45, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 35.58, + "stderr": 0.0 + } + ], + "average_accuracy": 37.20333333333333, + "best_prompt": 40.45, + "prompt_id": "p2", + "CPS": 39.136723333333336 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.870000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 7.8100000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 7.840000000000001, + "best_prompt": 7.870000000000001, + "prompt_id": "p1", + "CPS": 7.867639000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_EN.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..0f6673eff28267f3ed17fc96cdce3cdb19f23983 --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 4.530016666666667, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 9.4, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.3099999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 4.64, + "stderr": 0.0 + } + ], + "average_accuracy": 5.783333333333334, + "best_prompt": 9.4, + "prompt_id": "p1", + "CPS": 9.060033333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_GR.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..9a9c25b44bfc28936ac14fc1d0b5fc61ca0e587b --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 4.256631333333333, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.59, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 5.91, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.59, + "stderr": 0.0 + } + ], + "average_accuracy": 7.696666666666666, + "best_prompt": 8.59, + "prompt_id": "p1", + "CPS": 8.513262666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_IT.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..609769757840e31031c216c83609c0fb58e68bfd --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 4.550473333333333, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.7, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 9.2, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.470000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 8.123333333333333, + "best_prompt": 9.2, + "prompt_id": "p2", + "CPS": 9.100946666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p2", + "CPS": 0.0 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_PL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..b6b0fdfc2798f7d2ed8318fc57fd21219dd26dd9 --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 2.1520960000000002, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 2.44, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.36, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 2.44, + "stderr": 0.0 + } + ], + "average_accuracy": 3.08, + "best_prompt": 4.36, + "prompt_id": "p2", + "CPS": 4.3041920000000005 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_SK.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..76abdb62c09417382bf399e210b1a6286f4db319 --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 4.3259333333333325, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.799999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.75, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.799999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 7.116666666666666, + "best_prompt": 8.799999999999999, + "prompt_id": "p1", + "CPS": 8.651866666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_SL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..7c704fbf7b543a114e7fbd6588af79615ce5da4b --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 3.859359, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.7700000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 5.79, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.7700000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 7.11, + "best_prompt": 7.7700000000000005, + "prompt_id": "p1", + "CPS": 7.718718 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..0b869eb5e9b8e61c9c610546869108ebd6e6cb70 --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 7.145816833333333, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.15, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 14.149999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 13.22, + "stderr": 0.0 + } + ], + "average_accuracy": 13.173333333333332, + "best_prompt": 14.149999999999999, + "prompt_id": "p2", + "CPS": 14.011801666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.27999999999999997, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.16, + "stderr": 0.0 + } + ], + "average_accuracy": 0.21999999999999997, + "best_prompt": 0.27999999999999997, + "prompt_id": "p1", + "CPS": 0.27983199999999997 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..d64a9939172ac6520b0e1898f50ba1e4d82c7952 --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 7.3898055, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 14.549999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 14.34, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.549999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 14.479999999999999, + "best_prompt": 14.549999999999999, + "prompt_id": "p1", + "CPS": 14.539814999999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.24, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.06999999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 0.155, + "best_prompt": 0.24, + "prompt_id": "p1", + "CPS": 0.23979599999999998 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..95a213df07cc69129318f22ea3b28fdec2d133a0 --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 9.117984666666667, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 16.16, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 17.740000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.900000000000002, + "stderr": 0.0 + } + ], + "average_accuracy": 16.933333333333334, + "best_prompt": 17.740000000000002, + "prompt_id": "p2", + "CPS": 17.596897333333335 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.35000000000000003, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.64, + "stderr": 0.0 + } + ], + "average_accuracy": 0.495, + "best_prompt": 0.64, + "prompt_id": "p2", + "CPS": 0.6390720000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..c491e33d913e9fc01bc75d043ae67e6b2b066dda --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 7.915062, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 15.479999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 15.0, + "stderr": 0.0 + } + ], + "average_accuracy": 15.159999999999998, + "best_prompt": 15.479999999999999, + "prompt_id": "p2", + "CPS": 15.430463999999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.4, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.22999999999999998, + "stderr": 0.0 + } + ], + "average_accuracy": 0.315, + "best_prompt": 0.4, + "prompt_id": "p1", + "CPS": 0.39966 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..0a7f9335270706d66ddcb363625ae35dd4800d28 --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 7.5839295, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 14.85, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 13.600000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.85, + "stderr": 0.0 + } + ], + "average_accuracy": 14.433333333333332, + "best_prompt": 14.85, + "prompt_id": "p1", + "CPS": 14.788124999999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.38, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.24, + "stderr": 0.0 + } + ], + "average_accuracy": 0.31, + "best_prompt": 0.38, + "prompt_id": "p1", + "CPS": 0.379734 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..84cb9a99e7cd6d0d20fcd5d2462344fe4145a13b --- /dev/null +++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 7.6844565, + "config": { + "model_name": "HiTZ/Medical-mT5-large", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "HiTZ/Medical-mT5-large", + "base_model": "MT5ForConditionalGeneration", + "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47", + "submitted_time": "2023-10-31 15:15:15+00:00", + "num_params_billion": null, + "language": "en_es_fr_it" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 14.7, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 13.25, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.7, + "stderr": 0.0 + } + ], + "average_accuracy": 14.216666666666667, + "best_prompt": 14.7, + "prompt_id": "p1", + "CPS": 14.62895 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.73, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.74, + "stderr": 0.0 + } + ], + "average_accuracy": 0.735, + "best_prompt": 0.74, + "prompt_id": "p2", + "CPS": 0.739963 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_EN.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..a706e139d17d9ff81b46001fabd5ceb7f30bdea8 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 36.19732933333333, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 34.25, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 11.81, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.93, + "stderr": 0.0 + } + ], + "average_accuracy": 24.996666666666666, + "best_prompt": 34.25, + "prompt_id": "p1", + "CPS": 31.08073333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.349999999999994, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 39.17, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.72, + "stderr": 0.0 + } + ], + "average_accuracy": 40.74666666666666, + "best_prompt": 41.72, + "prompt_id": "p3", + "CPS": 41.31392533333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_GR.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..8b68b4988fb52492c123856b885cc73bb4f74b0d --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 27.333585333333332, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 13.389999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 11.91, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 13.389999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 12.896666666666667, + "best_prompt": 13.389999999999999, + "prompt_id": "p1", + "CPS": 13.323942666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.96, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.66, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.1, + "stderr": 0.0 + } + ], + "average_accuracy": 39.57333333333333, + "best_prompt": 42.66, + "prompt_id": "p2", + "CPS": 41.343227999999996 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_IT.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..1438305a5f81b631d8a0ce7a110df4d757e02607 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 32.46564883333333, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 24.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 17.09, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 22.34, + "stderr": 0.0 + } + ], + "average_accuracy": 21.366666666666664, + "best_prompt": 24.67, + "prompt_id": "p1", + "CPS": 23.855067666666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.730000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.7, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.06, + "stderr": 0.0 + } + ], + "average_accuracy": 40.163333333333334, + "best_prompt": 41.730000000000004, + "prompt_id": "p1", + "CPS": 41.07623 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_PL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..1e8a88dd8ce4f09c1cb9f885c44df045e55df81d --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 24.780516499999997, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 6.97, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.64, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 6.97, + "stderr": 0.0 + } + ], + "average_accuracy": 5.859999999999999, + "best_prompt": 6.97, + "prompt_id": "p1", + "CPS": 6.892633 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 38.03, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 44.64, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.0, + "stderr": 0.0 + } + ], + "average_accuracy": 40.223333333333336, + "best_prompt": 44.64, + "prompt_id": "p2", + "CPS": 42.6684 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SK.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..a8b2ba628e0d14263023f4c8c3fa847baa11d7b0 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 27.026387333333332, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.2, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.26, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 12.2, + "stderr": 0.0 + } + ], + "average_accuracy": 9.553333333333333, + "best_prompt": 12.2, + "prompt_id": "p1", + "CPS": 11.877106666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.27, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.94, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.27, + "stderr": 0.0 + } + ], + "average_accuracy": 41.160000000000004, + "best_prompt": 42.94, + "prompt_id": "p2", + "CPS": 42.175668 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..4a09d6ee51209714a91c6cd104e2971306495289 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 37.45460366666667, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.1, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.75, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.1, + "stderr": 0.0 + } + ], + "average_accuracy": 33.983333333333334, + "best_prompt": 39.1, + "prompt_id": "p1", + "CPS": 37.099383333333336 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.75, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.830000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 37.75, + "stderr": 0.0 + } + ], + "average_accuracy": 37.77666666666667, + "best_prompt": 37.830000000000005, + "prompt_id": "p2", + "CPS": 37.809824000000006 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..980d4ef11372d23cc9f73b762777115140f8dac7 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 61.6096845, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.91, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.46, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 62.43, + "stderr": 0.0 + } + ], + "average_accuracy": 59.93333333333334, + "best_prompt": 62.43, + "prompt_id": "p3", + "CPS": 60.871331000000005 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 63.32, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 60.25, + "stderr": 0.0 + } + ], + "average_accuracy": 61.785, + "best_prompt": 63.32, + "prompt_id": "p1", + "CPS": 62.348037999999995 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..ff45aaadb12973ee26b2663970ee69b45b925ffa --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 60.344233333333335, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 61.19, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.47, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.19, + "stderr": 0.0 + } + ], + "average_accuracy": 60.28333333333333, + "best_prompt": 61.19, + "prompt_id": "p1", + "CPS": 60.635210666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.62, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 60.24, + "stderr": 0.0 + } + ], + "average_accuracy": 59.93, + "best_prompt": 60.24, + "prompt_id": "p2", + "CPS": 60.053256000000005 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..582c43f241edc629f90213f1df382d05da2c9513 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.7346905, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 67.19000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 63.27, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 66.61, + "stderr": 0.0 + } + ], + "average_accuracy": 65.69000000000001, + "best_prompt": 67.19000000000001, + "prompt_id": "p1", + "CPS": 66.18215000000001 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 57.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 59.98, + "stderr": 0.0 + } + ], + "average_accuracy": 58.825, + "best_prompt": 59.98, + "prompt_id": "p2", + "CPS": 59.287231 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..ebbd4f0ad92f29d5842d4b6cb8b40611d66be29f --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 59.88153925, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 62.260000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.24, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 62.260000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 60.92000000000001, + "best_prompt": 62.260000000000005, + "prompt_id": "p1", + "CPS": 61.42571600000001 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.91, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 54.66, + "stderr": 0.0 + } + ], + "average_accuracy": 57.285, + "best_prompt": 59.91, + "prompt_id": "p1", + "CPS": 58.3373625 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..f69ceea68cf007d4ead062f7cc21849bbae460cd --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 61.6115985, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 63.85999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 64.86, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 63.85999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 64.19333333333333, + "best_prompt": 64.86, + "prompt_id": "p2", + "CPS": 64.4276 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 58.940000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.45, + "stderr": 0.0 + } + ], + "average_accuracy": 58.69500000000001, + "best_prompt": 58.940000000000005, + "prompt_id": "p1", + "CPS": 58.79559700000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..ffb8f7ad74ac2dda6b10b542e2bb20535c6f55a1 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 61.52013541666667, + "config": { + "model_name": "Qwen/Qwen2.5-14B-Instruct-1M", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "Qwen/Qwen2.5-14B-Instruct-1M", + "base_model": "Qwen2ForCausalLM", + "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438", + "submitted_time": "2025-01-23 13:23:24+00:00", + "num_params_billion": 14.770033664, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 64.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 61.78, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 64.67, + "stderr": 0.0 + } + ], + "average_accuracy": 63.70666666666667, + "best_prompt": 64.67, + "prompt_id": "p1", + "CPS": 64.04701233333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.489999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 57.82000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 58.655, + "best_prompt": 59.489999999999995, + "prompt_id": "p1", + "CPS": 58.993258499999996 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_EN.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..04564e846875360b719d094bf3d3ee67400dc071 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 41.510924, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 38.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 30.680000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 29.64, + "stderr": 0.0 + } + ], + "average_accuracy": 32.78666666666667, + "best_prompt": 38.04, + "prompt_id": "p1", + "CPS": 36.041632 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 47.339999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 46.489999999999995, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 45.910000000000004, + "stderr": 0.0 + } + ], + "average_accuracy": 46.580000000000005, + "best_prompt": 47.339999999999996, + "prompt_id": "p1", + "CPS": 46.980216 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_GR.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..166ba224c7cc65815b0e1aa019e58e49ab5d7dc5 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 46.818379166666666, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.760000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 15.68, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.760000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 45.06666666666667, + "best_prompt": 59.760000000000005, + "prompt_id": "p1", + "CPS": 50.979264 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.93, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.83, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.34, + "stderr": 0.0 + } + ], + "average_accuracy": 41.03333333333333, + "best_prompt": 43.93, + "prompt_id": "p1", + "CPS": 42.65749433333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_IT.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..031c07d7816c6ae3a50cb79289053a4a8f3545d5 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 39.086941, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.580000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 16.470000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 27.96, + "stderr": 0.0 + } + ], + "average_accuracy": 27.33666666666667, + "best_prompt": 37.580000000000005, + "prompt_id": "p1", + "CPS": 33.730555333333335 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 45.050000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.589999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 44.47, + "stderr": 0.0 + } + ], + "average_accuracy": 43.70333333333334, + "best_prompt": 45.050000000000004, + "prompt_id": "p1", + "CPS": 44.44332666666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_PL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..4fc14f016f561c435a27524256933199f0c28220 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 34.071664166666665, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 24.86, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.11, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 24.86, + "stderr": 0.0 + } + ], + "average_accuracy": 24.276666666666667, + "best_prompt": 24.86, + "prompt_id": "p1", + "CPS": 24.714983333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 38.65, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.69, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 37.88, + "stderr": 0.0 + } + ], + "average_accuracy": 40.74, + "best_prompt": 45.69, + "prompt_id": "p2", + "CPS": 43.428345 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SK.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..20192716e42bbf13a2fd56c63f17a01176555830 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 38.03573266666667, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 35.78, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 29.68, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 35.78, + "stderr": 0.0 + } + ], + "average_accuracy": 33.74666666666667, + "best_prompt": 35.78, + "prompt_id": "p1", + "CPS": 35.05247333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.71, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.52, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.71, + "stderr": 0.0 + } + ], + "average_accuracy": 40.31333333333333, + "best_prompt": 41.52, + "prompt_id": "p2", + "CPS": 41.018992000000004 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..cb4678a9321a1e07d919ed0b840772d5d4e09ce4 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 37.09308866666666, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 33.44, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 28.63, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 33.44, + "stderr": 0.0 + } + ], + "average_accuracy": 31.836666666666662, + "best_prompt": 33.44, + "prompt_id": "p1", + "CPS": 32.90384533333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.79, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.86, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.79, + "stderr": 0.0 + } + ], + "average_accuracy": 40.48, + "best_prompt": 41.86, + "prompt_id": "p2", + "CPS": 41.282332 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..eb6c844ebf0d4143c2dc53553d81f7c3d1fb265a --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.2876165, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.699999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.02, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.129999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 58.949999999999996, + "best_prompt": 61.129999999999995, + "prompt_id": "p3", + "CPS": 59.79736599999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 64.82, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 64.69, + "stderr": 0.0 + } + ], + "average_accuracy": 64.755, + "best_prompt": 64.82, + "prompt_id": "p1", + "CPS": 64.777867 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..a4146a594d84cf4ebdcd77f1818a421df34d4160 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 60.45274641666667, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 61.96, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 61.309999999999995, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.96, + "stderr": 0.0 + } + ], + "average_accuracy": 61.74333333333333, + "best_prompt": 61.96, + "prompt_id": "p1", + "CPS": 61.82575333333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.13, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.96, + "stderr": 0.0 + } + ], + "average_accuracy": 59.045, + "best_prompt": 59.13, + "prompt_id": "p1", + "CPS": 59.0797395 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..429676352665ce1f5771e1e1ed66c1a390811909 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 63.941768499999995, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 69.34, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 71.52, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 69.3, + "stderr": 0.0 + } + ], + "average_accuracy": 70.05333333333333, + "best_prompt": 71.52, + "prompt_id": "p2", + "CPS": 70.47103999999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 58.01, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.95, + "stderr": 0.0 + } + ], + "average_accuracy": 56.980000000000004, + "best_prompt": 58.01, + "prompt_id": "p1", + "CPS": 57.412497 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..deb5f66c85e0db50021374a4e1873310f5b7c1ab --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 59.36132466666666, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.08, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 60.040000000000006, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 60.08, + "stderr": 0.0 + } + ], + "average_accuracy": 60.06666666666666, + "best_prompt": 60.08, + "prompt_id": "p1", + "CPS": 60.071989333333335 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 58.58, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.68, + "stderr": 0.0 + } + ], + "average_accuracy": 58.629999999999995, + "best_prompt": 58.68, + "prompt_id": "p2", + "CPS": 58.650659999999995 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..cc26f154c3f7e80b6c26dc12eb8b0424a50ff72d --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.09064391666667, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 67.43, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 66.73, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 67.43, + "stderr": 0.0 + } + ], + "average_accuracy": 67.19666666666667, + "best_prompt": 67.43, + "prompt_id": "p1", + "CPS": 67.27266333333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 57.330000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.86, + "stderr": 0.0 + } + ], + "average_accuracy": 56.595, + "best_prompt": 57.330000000000005, + "prompt_id": "p1", + "CPS": 56.9086245 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..741b9be7ed23884e80038bb99df58d42bd70f5c1 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.022576, + "config": { + "model_name": "Qwen/Qwen2.5-32B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "Qwen/Qwen2.5-32B-Instruct", + "base_model": "Qwen2ForCausalLM", + "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", + "submitted_time": "2024-09-17 04:17:55+00:00", + "num_params_billion": 32.763876352, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 62.529999999999994, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 66.14999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 62.529999999999994, + "stderr": 0.0 + } + ], + "average_accuracy": 63.73666666666666, + "best_prompt": 66.14999999999999, + "prompt_id": "p2", + "CPS": 64.55358 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.919999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.489999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 59.205, + "best_prompt": 59.919999999999995, + "prompt_id": "p1", + "CPS": 59.491572 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_EN.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..b88657c50b39d6a4d0a9a6c730e9894895550724 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 44.38809091666667, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.94, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.31, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.97, + "stderr": 0.0 + } + ], + "average_accuracy": 41.406666666666666, + "best_prompt": 43.94, + "prompt_id": "p1", + "CPS": 42.82685333333333 + }, + "ner": { + "prompts": [ + { + "prompt": "p2", + "metric": "f1", + "value": 41.620000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 47.29, + "stderr": 0.0 + } + ], + "average_accuracy": 44.455, + "best_prompt": 47.29, + "prompt_id": "p3", + "CPS": 45.9493285 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_GR.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..0d5cc50b06a1c87e13cc0d2720d09f0701a34980 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 41.21096783333334, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 42.91, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.21, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.91, + "stderr": 0.0 + } + ], + "average_accuracy": 43.67666666666667, + "best_prompt": 45.21, + "prompt_id": "p2", + "CPS": 44.516780000000004 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.330000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.99, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 37.980000000000004, + "stderr": 0.0 + } + ], + "average_accuracy": 37.76666666666667, + "best_prompt": 37.99, + "prompt_id": "p2", + "CPS": 37.90515566666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_IT.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..55582b6ebaff4465652b9f3d6d02a406bdbc7329 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 42.221850333333336, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.85, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 53.16, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 45.14, + "stderr": 0.0 + } + ], + "average_accuracy": 35.71666666666666, + "best_prompt": 53.16, + "prompt_id": "p2", + "CPS": 43.887124 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.84, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.23, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.72, + "stderr": 0.0 + } + ], + "average_accuracy": 39.596666666666664, + "best_prompt": 41.23, + "prompt_id": "p2", + "CPS": 40.556576666666665 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_PL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..82a28507a049c402386737409bf573b89b5c0bd4 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 42.480305333333334, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.32, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.43, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.32, + "stderr": 0.0 + } + ], + "average_accuracy": 42.35666666666666, + "best_prompt": 43.32, + "prompt_id": "p1", + "CPS": 42.902684 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.52, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.199999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.870000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 41.86333333333334, + "best_prompt": 42.199999999999996, + "prompt_id": "p2", + "CPS": 42.05792666666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SK.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..afc45673f792c9780bd0ab49b8c3ce1a66121e9e --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 36.625888, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 32.31, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 33.98, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.31, + "stderr": 0.0 + } + ], + "average_accuracy": 32.86666666666667, + "best_prompt": 33.98, + "prompt_id": "p2", + "CPS": 33.60168933333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.800000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 38.67, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.800000000000004, + "stderr": 0.0 + } + ], + "average_accuracy": 39.42333333333334, + "best_prompt": 39.800000000000004, + "prompt_id": "p1", + "CPS": 39.65008666666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..825a3e075a631aff84efe4658f0c10f160085f7c --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 43.20190633333334, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 44.86, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.31, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 44.86, + "stderr": 0.0 + } + ], + "average_accuracy": 45.01, + "best_prompt": 45.31, + "prompt_id": "p2", + "CPS": 45.17407 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.15, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.260000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.15, + "stderr": 0.0 + } + ], + "average_accuracy": 41.18666666666667, + "best_prompt": 41.260000000000005, + "prompt_id": "p2", + "CPS": 41.229742666666674 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..d945752b0b2f3fafdd4ccbc5f40e2b675c2d7346 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 56.15015708333333, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.86, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.93, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.42999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 59.07333333333333, + "best_prompt": 61.42999999999999, + "prompt_id": "p3", + "CPS": 59.98229966666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.5, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.61, + "stderr": 0.0 + } + ], + "average_accuracy": 52.055, + "best_prompt": 52.61, + "prompt_id": "p2", + "CPS": 52.318014500000004 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..5f9858db1245a5517bf487198d10072acca3ed03 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 56.10383349999999, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 61.63999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.69, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.63999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 59.98999999999999, + "best_prompt": 61.63999999999999, + "prompt_id": "p1", + "CPS": 60.62293999999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 50.14999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.09, + "stderr": 0.0 + } + ], + "average_accuracy": 51.12, + "best_prompt": 52.09, + "prompt_id": "p2", + "CPS": 51.584727 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..734bf0a8b86320baa08570dac56bf8fecc43169c --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 63.45470708333333, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 67.93, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 64.47, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 67.78, + "stderr": 0.0 + } + ], + "average_accuracy": 66.72666666666667, + "best_prompt": 67.93, + "prompt_id": "p1", + "CPS": 67.11257566666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.41, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.379999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 59.394999999999996, + "best_prompt": 60.41, + "prompt_id": "p1", + "CPS": 59.7968385 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..dcbfd8c20f0c7af2980f30dcb6f2ea439943ff34 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 56.759142, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 62.760000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.03, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 62.760000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 61.18333333333334, + "best_prompt": 62.760000000000005, + "prompt_id": "p1", + "CPS": 61.770484 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.03, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.0, + "stderr": 0.0 + } + ], + "average_accuracy": 51.515, + "best_prompt": 52.0, + "prompt_id": "p2", + "CPS": 51.7478 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..704b15ee32eea9298093444a0f88131607359c59 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.24974208333333, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.85, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 59.19, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 60.85, + "stderr": 0.0 + } + ], + "average_accuracy": 60.29666666666666, + "best_prompt": 60.85, + "prompt_id": "p1", + "CPS": 60.51329666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 49.2, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 50.24999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 49.724999999999994, + "best_prompt": 50.24999999999999, + "prompt_id": "p2", + "CPS": 49.98618749999999 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..c6c624617bd67c5a31629add1d33abb9288b2051 --- /dev/null +++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 59.49870649999999, + "config": { + "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "base_model": "Qwen3MoeForCausalLM", + "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad", + "submitted_time": "2025-07-28 07:31:27+00:00", + "num_params_billion": 30.532122624, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 66.14999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 59.440000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 66.14999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 63.91333333333333, + "best_prompt": 66.14999999999999, + "prompt_id": "p1", + "CPS": 64.67044499999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 50.62, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.76, + "stderr": 0.0 + } + ], + "average_accuracy": 53.19, + "best_prompt": 55.76, + "prompt_id": "p2", + "CPS": 54.326967999999994 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_PL-checkpoint.json b/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_PL-checkpoint.json new file mode 100644 index 0000000000000000000000000000000000000000..68dccc70ad202c63fec1d9c87405a8c9b2abba98 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_PL-checkpoint.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 39.199796666666664, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 32.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.28, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.04, + "stderr": 0.0 + } + ], + "average_accuracy": 33.78666666666667, + "best_prompt": 37.28, + "prompt_id": "p2", + "CPS": 35.97768533333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.83, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 43.269999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.83, + "stderr": 0.0 + } + ], + "average_accuracy": 41.309999999999995, + "best_prompt": 43.269999999999996, + "prompt_id": "p2", + "CPS": 42.421907999999995 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_EN.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..1115fa70ea48e2159da9da0029bf77500d1eeacf --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 38.6086025, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 19.63, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 34.589999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.08, + "stderr": 0.0 + } + ], + "average_accuracy": 28.766666666666666, + "best_prompt": 34.589999999999996, + "prompt_id": "p2", + "CPS": 32.575708999999996 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 44.87, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 44.92, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.11, + "stderr": 0.0 + } + ], + "average_accuracy": 44.300000000000004, + "best_prompt": 44.92, + "prompt_id": "p2", + "CPS": 44.641496000000004 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_GR.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..26984211d447010e940703c23b7c13b204043789 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 36.46137383333333, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 34.55, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 33.54, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 34.55, + "stderr": 0.0 + } + ], + "average_accuracy": 34.21333333333333, + "best_prompt": 34.55, + "prompt_id": "p1", + "CPS": 34.433681666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 24.060000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 39.47, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.02, + "stderr": 0.0 + } + ], + "average_accuracy": 34.85, + "best_prompt": 41.02, + "prompt_id": "p3", + "CPS": 38.489066 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_IT.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..817874aed3d653ea123194a098b4c69676baadf9 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 39.908362, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 26.779999999999998, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 35.68, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 34.14, + "stderr": 0.0 + } + ], + "average_accuracy": 32.199999999999996, + "best_prompt": 35.68, + "prompt_id": "p2", + "CPS": 34.438336 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 45.190000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 46.11, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.27, + "stderr": 0.0 + } + ], + "average_accuracy": 44.52333333333333, + "best_prompt": 46.11, + "prompt_id": "p2", + "CPS": 45.378388 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_PL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..68dccc70ad202c63fec1d9c87405a8c9b2abba98 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 39.199796666666664, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 32.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.28, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.04, + "stderr": 0.0 + } + ], + "average_accuracy": 33.78666666666667, + "best_prompt": 37.28, + "prompt_id": "p2", + "CPS": 35.97768533333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.83, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 43.269999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.83, + "stderr": 0.0 + } + ], + "average_accuracy": 41.309999999999995, + "best_prompt": 43.269999999999996, + "prompt_id": "p2", + "CPS": 42.421907999999995 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SK.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..f12f032decd06a533a7c615b24ca92ee9c913d1e --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 33.894328, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 28.29, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.05, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.29, + "stderr": 0.0 + } + ], + "average_accuracy": 25.209999999999997, + "best_prompt": 28.29, + "prompt_id": "p1", + "CPS": 27.418667999999997 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 38.93, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.910000000000004, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.93, + "stderr": 0.0 + } + ], + "average_accuracy": 39.59, + "best_prompt": 40.910000000000004, + "prompt_id": "p2", + "CPS": 40.369988000000006 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..de06a880e84bd661d7fa15e62b449db205eb358c --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 34.339884000000005, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 28.1, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 21.92, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.1, + "stderr": 0.0 + } + ], + "average_accuracy": 26.040000000000003, + "best_prompt": 28.1, + "prompt_id": "p1", + "CPS": 27.521140000000003 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.160000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.15, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.160000000000004, + "stderr": 0.0 + } + ], + "average_accuracy": 41.156666666666666, + "best_prompt": 41.160000000000004, + "prompt_id": "p1", + "CPS": 41.15862800000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..c94eeacb050d30d24874bb8e98b60d2e9f07be97 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.919866, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.24, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 59.29, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.35, + "stderr": 0.0 + } + ], + "average_accuracy": 59.626666666666665, + "best_prompt": 60.24, + "prompt_id": "p1", + "CPS": 59.870528 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.910000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 51.99, + "stderr": 0.0 + } + ], + "average_accuracy": 51.95, + "best_prompt": 51.99, + "prompt_id": "p2", + "CPS": 51.969204000000005 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..5c4d8c668c6d4eee0f9417d199ea0721ee501bf8 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 51.55757925, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.28, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 57.96, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.28, + "stderr": 0.0 + } + ], + "average_accuracy": 58.84, + "best_prompt": 59.28, + "prompt_id": "p1", + "CPS": 59.019168 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 44.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.1, + "stderr": 0.0 + } + ], + "average_accuracy": 43.385000000000005, + "best_prompt": 44.67, + "prompt_id": "p1", + "CPS": 44.095990500000006 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..c99f3f127ff28ed47526ce1e73e17864ab53470a --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.19916833333333, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 69.82000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 66.79, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 69.3, + "stderr": 0.0 + } + ], + "average_accuracy": 68.63666666666667, + "best_prompt": 69.82000000000001, + "prompt_id": "p1", + "CPS": 68.99379666666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 55.46, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.26, + "stderr": 0.0 + } + ], + "average_accuracy": 55.36, + "best_prompt": 55.46, + "prompt_id": "p1", + "CPS": 55.404540000000004 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..34dd20506cfa325fb264db8313cfb4ffa2771791 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 56.29728216666666, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 62.13999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 61.4, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 62.13999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 61.893333333333324, + "best_prompt": 62.13999999999999, + "prompt_id": "p1", + "CPS": 61.98672133333332 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 48.63, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 51.29, + "stderr": 0.0 + } + ], + "average_accuracy": 49.96, + "best_prompt": 51.29, + "prompt_id": "p2", + "CPS": 50.607843 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..012b6c30601a091071e9134aa3b0e484f453ba4b --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.168621666666674, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 63.470000000000006, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 62.11, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 63.470000000000006, + "stderr": 0.0 + } + ], + "average_accuracy": 63.01666666666667, + "best_prompt": 63.470000000000006, + "prompt_id": "p1", + "CPS": 63.182269333333345 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 47.99, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 44.51, + "stderr": 0.0 + } + ], + "average_accuracy": 46.25, + "best_prompt": 47.99, + "prompt_id": "p1", + "CPS": 47.154974 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..ff670060bd8319e46bffe886a80b2b27a4a5ad37 --- /dev/null +++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.26683691666667, + "config": { + "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "base_model": "Qwen2ForCausalLM", + "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746", + "submitted_time": "2025-01-20 09:19:00+00:00", + "num_params_billion": 32.763876352, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.150000000000006, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 60.49, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 60.150000000000006, + "stderr": 0.0 + } + ], + "average_accuracy": 60.26333333333334, + "best_prompt": 60.49, + "prompt_id": "p2", + "CPS": 60.35288933333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.370000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 46.739999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 49.055, + "best_prompt": 51.370000000000005, + "prompt_id": "p1", + "CPS": 50.1807845 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_EN.json b/e3c_llm_results/epfl-llm/meditron-7b_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..331643983228b9ce4b7e3fe2e5fc467134f796e7 --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 6.579212, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 5.779999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.1000000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.48, + "stderr": 0.0 + } + ], + "average_accuracy": 6.12, + "best_prompt": 8.48, + "prompt_id": "p3", + "CPS": 8.279872000000001 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.42, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 4.97, + "stderr": 0.0 + } + ], + "average_accuracy": 3.1300000000000003, + "best_prompt": 4.97, + "prompt_id": "p3", + "CPS": 4.878552 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_GR.json b/e3c_llm_results/epfl-llm/meditron-7b_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..d80de856369cab5f2fd8a820e7b70264003b667f --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 19.223575999999998, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 24.169999999999998, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 24.43, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 24.169999999999998, + "stderr": 0.0 + } + ], + "average_accuracy": 24.256666666666664, + "best_prompt": 24.43, + "prompt_id": "p2", + "CPS": 24.387654666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.559999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 1.6099999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.58, + "stderr": 0.0 + } + ], + "average_accuracy": 5.916666666666667, + "best_prompt": 15.559999999999999, + "prompt_id": "p1", + "CPS": 14.059497333333331 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_IT.json b/e3c_llm_results/epfl-llm/meditron-7b_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..db8552c2e5205148ada5a7aeabbcda9e5403c472 --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 12.631825166666665, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.7299999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.12, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 5.3100000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 6.386666666666667, + "best_prompt": 7.7299999999999995, + "prompt_id": "p1", + "CPS": 7.626160333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.2, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.29, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 12.68, + "stderr": 0.0 + } + ], + "average_accuracy": 10.723333333333334, + "best_prompt": 19.29, + "prompt_id": "p2", + "CPS": 17.63749 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_PL.json b/e3c_llm_results/epfl-llm/meditron-7b_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..d621f76c801ec1cf9de0d0d7de2d8213b1768a2d --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 6.367811666666667, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.4, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 12.030000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 11.4, + "stderr": 0.0 + } + ], + "average_accuracy": 11.61, + "best_prompt": 12.030000000000001, + "prompt_id": "p2", + "CPS": 11.979474000000002 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.76, + "stderr": 0.0 + } + ], + "average_accuracy": 0.25333333333333335, + "best_prompt": 0.76, + "prompt_id": "p3", + "CPS": 0.7561493333333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_SK.json b/e3c_llm_results/epfl-llm/meditron-7b_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..86b454df6aa8be6d9fdf83599932ab839a4b3da4 --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 4.508018, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.74, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 5.86, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.74, + "stderr": 0.0 + } + ], + "average_accuracy": 7.78, + "best_prompt": 8.74, + "prompt_id": "p1", + "CPS": 8.656096 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.36, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.31, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.36, + "stderr": 0.0 + } + ], + "average_accuracy": 0.3433333333333333, + "best_prompt": 0.36, + "prompt_id": "p1", + "CPS": 0.35994 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_SL.json b/e3c_llm_results/epfl-llm/meditron-7b_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..00247b291ae9201b1e8121580372b79ab0658fa0 --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 8.782022166666668, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.97, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.6, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 11.97, + "stderr": 0.0 + } + ], + "average_accuracy": 9.513333333333334, + "best_prompt": 11.97, + "prompt_id": "p1", + "CPS": 11.675937000000001 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 5.9799999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 1.37, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 5.9799999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 4.4433333333333325, + "best_prompt": 5.9799999999999995, + "prompt_id": "p1", + "CPS": 5.888107333333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json b/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..96677630ce61b99e58ea81b8e93cd5a76128dc4a --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 10.826788500000001, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.03, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 14.790000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.540000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 12.453333333333333, + "best_prompt": 14.790000000000001, + "prompt_id": "p2", + "CPS": 14.444407000000002 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.22, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.92, + "stderr": 0.0 + } + ], + "average_accuracy": 7.07, + "best_prompt": 7.22, + "prompt_id": "p1", + "CPS": 7.20917 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json b/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..e4d86751d45f813987e29672710c1f53186a0dcc --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 0.0, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json b/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..059144e00a5496a1ccf7640177e1e0063bd39b2c --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 21.748485083333332, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 29.909999999999997, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 35.63, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 33.11, + "stderr": 0.0 + } + ], + "average_accuracy": 32.88333333333333, + "best_prompt": 35.63, + "prompt_id": "p2", + "CPS": 34.651362666666664 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.32, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 8.870000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 8.595, + "best_prompt": 8.870000000000001, + "prompt_id": "p2", + "CPS": 8.8456075 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json b/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..0f954175b73d882e48dd8a76b6bb7d771971cdcd --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 19.016219, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 31.840000000000003, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 32.97, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 31.840000000000003, + "stderr": 0.0 + } + ], + "average_accuracy": 32.21666666666667, + "best_prompt": 32.97, + "prompt_id": "p2", + "CPS": 32.721626 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 5.33, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.61, + "stderr": 0.0 + } + ], + "average_accuracy": 4.970000000000001, + "best_prompt": 5.33, + "prompt_id": "p1", + "CPS": 5.310811999999999 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json b/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..ee8e0981aa2d878b6166738df9c83b7b2e7a07df --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 17.222192333333332, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 30.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 29.7, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 30.04, + "stderr": 0.0 + } + ], + "average_accuracy": 29.926666666666666, + "best_prompt": 30.04, + "prompt_id": "p1", + "CPS": 30.005954666666668 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.45, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.93, + "stderr": 0.0 + } + ], + "average_accuracy": 4.19, + "best_prompt": 4.45, + "prompt_id": "p1", + "CPS": 4.43843 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json b/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..b1d0c6247efcb7da38104f3b91a5aad8a2b8a629 --- /dev/null +++ b/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 17.99146783333333, + "config": { + "model_name": "epfl-llm/meditron-7b", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "epfl-llm/meditron-7b", + "base_model": "LlamaForCausalLM", + "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76", + "submitted_time": "2023-11-08 16:03:23+00:00", + "num_params_billion": 6.73855488, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 31.19, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 29.160000000000004, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 31.19, + "stderr": 0.0 + } + ], + "average_accuracy": 30.513333333333335, + "best_prompt": 31.19, + "prompt_id": "p1", + "CPS": 30.978947666666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.77, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 5.01, + "stderr": 0.0 + } + ], + "average_accuracy": 4.89, + "best_prompt": 5.01, + "prompt_id": "p2", + "CPS": 5.003988 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_EN.json b/e3c_llm_results/google/gemma-2-9b-it_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..bf45d6b410264a81d53d1e0689390e8a1cbd6d8b --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 46.266848333333336, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 32.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 51.739999999999995, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.7, + "stderr": 0.0 + } + ], + "average_accuracy": 46.03666666666667, + "best_prompt": 53.7, + "prompt_id": "p3", + "CPS": 49.584790000000005 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.6, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.05, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.67, + "stderr": 0.0 + } + ], + "average_accuracy": 42.10666666666666, + "best_prompt": 43.6, + "prompt_id": "p1", + "CPS": 42.948906666666666 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_GR.json b/e3c_llm_results/google/gemma-2-9b-it_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..eb3cad84202f239566048211c3ab20d87c812cb0 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 47.411836666666666, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 55.489999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 47.77, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.489999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 52.916666666666664, + "best_prompt": 55.489999999999995, + "prompt_id": "p1", + "CPS": 54.06205733333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.24, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 39.57, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.43, + "stderr": 0.0 + } + ], + "average_accuracy": 40.080000000000005, + "best_prompt": 41.24, + "prompt_id": "p1", + "CPS": 40.761616000000004 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_IT.json b/e3c_llm_results/google/gemma-2-9b-it_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..338e0244dedd57856946c40010229327fc27e344 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 53.69207316666666, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 57.38999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 65.24, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 62.1, + "stderr": 0.0 + } + ], + "average_accuracy": 61.57666666666666, + "best_prompt": 65.24, + "prompt_id": "p2", + "CPS": 62.85004133333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 45.85, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.13, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.959999999999994, + "stderr": 0.0 + } + ], + "average_accuracy": 42.98, + "best_prompt": 45.85, + "prompt_id": "p1", + "CPS": 44.534105 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_PL.json b/e3c_llm_results/google/gemma-2-9b-it_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..b0688f7b2df1b4ad1b2e8098f9feaa75a2c2dd81 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 41.18764683333333, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.6, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.55, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.6, + "stderr": 0.0 + } + ], + "average_accuracy": 40.916666666666664, + "best_prompt": 41.55, + "prompt_id": "p2", + "CPS": 41.28685 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 36.74, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.71, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 37.29, + "stderr": 0.0 + } + ], + "average_accuracy": 38.913333333333334, + "best_prompt": 42.71, + "prompt_id": "p2", + "CPS": 41.08844366666666 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_SK.json b/e3c_llm_results/google/gemma-2-9b-it_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..aa861a173d7daaf73ab92a101e58293814c6026d --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 45.32347, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 48.75, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.75, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 48.75, + "stderr": 0.0 + } + ], + "average_accuracy": 47.75, + "best_prompt": 48.75, + "prompt_id": "p1", + "CPS": 48.2625 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.89, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 43.4, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.89, + "stderr": 0.0 + } + ], + "average_accuracy": 41.06, + "best_prompt": 43.4, + "prompt_id": "p2", + "CPS": 42.38444 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_SL.json b/e3c_llm_results/google/gemma-2-9b-it_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..a62fbfa62d7292d0fad7e189a09e02b6239819c1 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 43.368616, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 47.07, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.46, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 47.07, + "stderr": 0.0 + } + ], + "average_accuracy": 44.86666666666667, + "best_prompt": 47.07, + "prompt_id": "p1", + "CPS": 46.032891 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.79, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.160000000000004, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.79, + "stderr": 0.0 + } + ], + "average_accuracy": 40.580000000000005, + "best_prompt": 40.79, + "prompt_id": "p1", + "CPS": 40.704341 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_EN.json b/e3c_llm_results/google/gemma-2-9b-it_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..2a1c47e896bc69b1abe3c369899c40a0199913b2 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 56.581740499999995, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 62.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.38999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.18, + "stderr": 0.0 + } + ], + "average_accuracy": 59.19, + "best_prompt": 62.0, + "prompt_id": "p1", + "CPS": 60.257799999999996 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.629999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 53.37, + "stderr": 0.0 + } + ], + "average_accuracy": 52.5, + "best_prompt": 53.37, + "prompt_id": "p2", + "CPS": 52.905681 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_GR.json b/e3c_llm_results/google/gemma-2-9b-it_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..d72281ce42496d5e93ba1fa1bfdea23bae73581c --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.2137075, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.83, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.63, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 60.83, + "stderr": 0.0 + } + ], + "average_accuracy": 59.43, + "best_prompt": 60.83, + "prompt_id": "p1", + "CPS": 59.978379999999994 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 50.7, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 49.71, + "stderr": 0.0 + } + ], + "average_accuracy": 50.205, + "best_prompt": 50.7, + "prompt_id": "p1", + "CPS": 50.449035 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_IT.json b/e3c_llm_results/google/gemma-2-9b-it_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..61a38ce67f438143898a12d83df180d28a3912b9 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 60.12889791666666, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 69.1, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 66.43, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 65.69, + "stderr": 0.0 + } + ], + "average_accuracy": 67.07333333333334, + "best_prompt": 69.1, + "prompt_id": "p1", + "CPS": 67.69957333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 49.58, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 53.65, + "stderr": 0.0 + } + ], + "average_accuracy": 51.614999999999995, + "best_prompt": 53.65, + "prompt_id": "p2", + "CPS": 52.55822249999999 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_PL.json b/e3c_llm_results/google/gemma-2-9b-it_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..e9f069f5bcc8f192dc889af8b7c2c2f45a4fbca7 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 54.86958533333334, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.08, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.620000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.08, + "stderr": 0.0 + } + ], + "average_accuracy": 58.92666666666667, + "best_prompt": 59.08, + "prompt_id": "p1", + "CPS": 58.98941066666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.68000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 48.08, + "stderr": 0.0 + } + ], + "average_accuracy": 49.88, + "best_prompt": 51.68000000000001, + "prompt_id": "p1", + "CPS": 50.74976000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_SK.json b/e3c_llm_results/google/gemma-2-9b-it_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..2fbe3145867d01e281bb2b66559db9eff2af319e --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.936541749999996, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 61.41, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 61.22, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.41, + "stderr": 0.0 + } + ], + "average_accuracy": 61.346666666666664, + "best_prompt": 61.41, + "prompt_id": "p1", + "CPS": 61.371106999999995 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.53, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 47.54, + "stderr": 0.0 + } + ], + "average_accuracy": 49.535, + "best_prompt": 51.53, + "prompt_id": "p1", + "CPS": 50.5019765 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_SL.json b/e3c_llm_results/google/gemma-2-9b-it_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..57eb906afa46c50d46f49f42ab3b6fe949d1d954 --- /dev/null +++ b/e3c_llm_results/google/gemma-2-9b-it_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.45489516666666, + "config": { + "model_name": "google/gemma-2-9b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "google/gemma-2-9b-it", + "base_model": "Gemma2ForCausalLM", + "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819", + "submitted_time": "2024-06-24 08:05:41+00:00", + "num_params_billion": 9.241705984, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 63.65, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 57.37, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 63.65, + "stderr": 0.0 + } + ], + "average_accuracy": 61.556666666666665, + "best_prompt": 63.65, + "prompt_id": "p1", + "CPS": 62.31759333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 48.010000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 48.78, + "stderr": 0.0 + } + ], + "average_accuracy": 48.395, + "best_prompt": 48.78, + "prompt_id": "p2", + "CPS": 48.592197 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_EN.json b/e3c_llm_results/google/gemma-3-27b-it_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..d64ce0e5f4b1a3a1c7a110fc652dbb3067648ba4 --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 51.5885295, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 54.459999999999994, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.3, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.94, + "stderr": 0.0 + } + ], + "average_accuracy": 54.9, + "best_prompt": 58.3, + "prompt_id": "p2", + "CPS": 56.3178 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 45.43, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.82, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 47.43, + "stderr": 0.0 + } + ], + "average_accuracy": 46.22666666666667, + "best_prompt": 47.43, + "prompt_id": "p3", + "CPS": 46.859259 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_GR.json b/e3c_llm_results/google/gemma-3-27b-it_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..c7608c89b95a9b573aa92dfcb6b71a711794db0a --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 50.113703, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 48.66, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 57.21000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 48.66, + "stderr": 0.0 + } + ], + "average_accuracy": 51.51, + "best_prompt": 57.21000000000001, + "prompt_id": "p2", + "CPS": 53.94903000000001 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.550000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 46.949999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 47.69, + "stderr": 0.0 + } + ], + "average_accuracy": 44.73, + "best_prompt": 47.69, + "prompt_id": "p3", + "CPS": 46.278376 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_IT.json b/e3c_llm_results/google/gemma-3-27b-it_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..a1c986af3a03e9510c0d853057a23bbc21b3ad5f --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 55.535388, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 55.43, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 66.97, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.540000000000006, + "stderr": 0.0 + } + ], + "average_accuracy": 60.64666666666667, + "best_prompt": 66.97, + "prompt_id": "p2", + "CPS": 62.73526366666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.9, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 48.949999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 49.27, + "stderr": 0.0 + } + ], + "average_accuracy": 47.373333333333335, + "best_prompt": 49.27, + "prompt_id": "p3", + "CPS": 48.335512333333334 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_PL.json b/e3c_llm_results/google/gemma-3-27b-it_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..d40e27aa90d3d6c757f72ebdb7bcccec5282dbdb --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 44.29942833333334, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 45.06, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.11, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 45.06, + "stderr": 0.0 + } + ], + "average_accuracy": 45.076666666666675, + "best_prompt": 45.11, + "prompt_id": "p2", + "CPS": 45.09496333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.84, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.67, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.71, + "stderr": 0.0 + } + ], + "average_accuracy": 43.07333333333333, + "best_prompt": 43.84, + "prompt_id": "p1", + "CPS": 43.50389333333334 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_SK.json b/e3c_llm_results/google/gemma-3-27b-it_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..d47ed61406c00ff429886925305611ba87a9eeeb --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 37.22623216666667, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 31.830000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 21.57, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 31.830000000000002, + "stderr": 0.0 + } + ], + "average_accuracy": 28.41, + "best_prompt": 31.830000000000002, + "prompt_id": "p1", + "CPS": 30.741414000000002 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.730000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 43.6, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.730000000000004, + "stderr": 0.0 + } + ], + "average_accuracy": 43.68666666666667, + "best_prompt": 43.730000000000004, + "prompt_id": "p1", + "CPS": 43.71105033333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_SL.json b/e3c_llm_results/google/gemma-3-27b-it_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..6bd284fccd9162348b3657f15ec506ff69fda58e --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 45.01248166666667, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.7, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 47.83, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.7, + "stderr": 0.0 + } + ], + "average_accuracy": 45.076666666666675, + "best_prompt": 47.83, + "prompt_id": "p2", + "CPS": 46.513080666666674 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 42.55, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 43.91, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.55, + "stderr": 0.0 + } + ], + "average_accuracy": 43.00333333333333, + "best_prompt": 43.91, + "prompt_id": "p2", + "CPS": 43.511882666666665 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_EN.json b/e3c_llm_results/google/gemma-3-27b-it_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..8a68c7d453d7a914032504b668104de36291df97 --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 58.58681733333334, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 61.6, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 63.080000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 60.940000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 61.873333333333335, + "best_prompt": 63.080000000000005, + "prompt_id": "p2", + "CPS": 62.318834666666675 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.910000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.00000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 53.955000000000005, + "best_prompt": 56.00000000000001, + "prompt_id": "p2", + "CPS": 54.85480000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_GR.json b/e3c_llm_results/google/gemma-3-27b-it_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..fb65103d364a4feae35a872c0677bd4d92da6f4b --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 60.0164855, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 65.51, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 66.08000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 65.51, + "stderr": 0.0 + } + ], + "average_accuracy": 65.7, + "best_prompt": 66.08000000000001, + "prompt_id": "p2", + "CPS": 65.828896 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 50.83, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.50000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 53.165000000000006, + "best_prompt": 55.50000000000001, + "prompt_id": "p2", + "CPS": 54.20407500000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_IT.json b/e3c_llm_results/google/gemma-3-27b-it_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..dc6f89824afe7665fc4d1adf165cd4a2c8de28f7 --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 64.0004405, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 71.41999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 69.92, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 72.11999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 71.15333333333332, + "best_prompt": 72.11999999999999, + "prompt_id": "p3", + "CPS": 71.42284 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 52.23, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.37, + "stderr": 0.0 + } + ], + "average_accuracy": 55.3, + "best_prompt": 58.37, + "prompt_id": "p2", + "CPS": 56.578041 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_PL.json b/e3c_llm_results/google/gemma-3-27b-it_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..bead1cad147ec714c27d9740da60d6e809aa882b --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 61.87379849999999, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 65.91, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 66.72, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 65.91, + "stderr": 0.0 + } + ], + "average_accuracy": 66.17999999999999, + "best_prompt": 66.72, + "prompt_id": "p2", + "CPS": 66.35971199999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 57.95, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.010000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 56.980000000000004, + "best_prompt": 57.95, + "prompt_id": "p1", + "CPS": 57.387885 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_SK.json b/e3c_llm_results/google/gemma-3-27b-it_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..1c04043b51947a6fd1f3e318d625de621a85f219 --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 59.613524999999996, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 67.36999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 68.85, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 67.36999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 67.86333333333333, + "best_prompt": 68.85, + "prompt_id": "p2", + "CPS": 68.17067999999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.21, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 50.61, + "stderr": 0.0 + } + ], + "average_accuracy": 50.91, + "best_prompt": 51.21, + "prompt_id": "p1", + "CPS": 51.05637 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_SL.json b/e3c_llm_results/google/gemma-3-27b-it_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..6d2ace6f7eb48518649c7907ae57aec4a4469b8d --- /dev/null +++ b/e3c_llm_results/google/gemma-3-27b-it_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 59.373478500000004, + "config": { + "model_name": "google/gemma-3-27b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "google/gemma-3-27b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "005ad3404e59d6023443cb575daa05336842228a", + "submitted_time": "2025-03-01 19:10:19+00:00", + "num_params_billion": 27.43240664, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 67.5, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 69.17999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 67.5, + "stderr": 0.0 + } + ], + "average_accuracy": 68.06, + "best_prompt": 69.17999999999999, + "prompt_id": "p2", + "CPS": 68.405184 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.49, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 47.03, + "stderr": 0.0 + } + ], + "average_accuracy": 49.260000000000005, + "best_prompt": 51.49, + "prompt_id": "p1", + "CPS": 50.341773 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_EN.json b/e3c_llm_results/google/medgemma-27b-text-it_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..3f4cd952f4fda1e9ce50f89d395a8d60bf6957ac --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 50.889483999999996, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 38.42, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 60.35, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.559999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 50.10999999999999, + "best_prompt": 60.35, + "prompt_id": "p2", + "CPS": 54.170159999999996 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 48.36, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 47.63, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 44.43, + "stderr": 0.0 + } + ], + "average_accuracy": 46.80666666666667, + "best_prompt": 48.36, + "prompt_id": "p1", + "CPS": 47.608808 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_GR.json b/e3c_llm_results/google/medgemma-27b-text-it_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..f13143d9ac5e2e7eb7aa339f7117855e133da2cf --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 50.34454, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 53.14, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 61.260000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.14, + "stderr": 0.0 + } + ], + "average_accuracy": 55.84666666666667, + "best_prompt": 61.260000000000005, + "prompt_id": "p2", + "CPS": 57.943792 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.69, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 43.32, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.97, + "stderr": 0.0 + } + ], + "average_accuracy": 41.99333333333333, + "best_prompt": 43.32, + "prompt_id": "p2", + "CPS": 42.745288 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_IT.json b/e3c_llm_results/google/medgemma-27b-text-it_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..85d4c2b955d9109dac24600175bc519b95b330a9 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 51.99607533333334, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 42.61, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 62.12, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.82, + "stderr": 0.0 + } + ], + "average_accuracy": 53.51666666666667, + "best_prompt": 62.12, + "prompt_id": "p2", + "CPS": 56.77560933333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.42, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 49.16, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 46.04, + "stderr": 0.0 + } + ], + "average_accuracy": 45.20666666666667, + "best_prompt": 49.16, + "prompt_id": "p2", + "CPS": 47.21654133333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_PL.json b/e3c_llm_results/google/medgemma-27b-text-it_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..1594c071a37c857b3035ebd0c555f749048d7d30 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 43.305971666666665, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 42.16, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 43.03, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.16, + "stderr": 0.0 + } + ], + "average_accuracy": 42.449999999999996, + "best_prompt": 43.03, + "prompt_id": "p2", + "CPS": 42.780426 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.25, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 44.24, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.46, + "stderr": 0.0 + } + ], + "average_accuracy": 43.31666666666666, + "best_prompt": 44.24, + "prompt_id": "p2", + "CPS": 43.83151733333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_SK.json b/e3c_llm_results/google/medgemma-27b-text-it_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..33d47a87d2b164424187aced2cd531d8aadf1aaf --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 36.36130216666666, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 29.709999999999997, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.66, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 29.709999999999997, + "stderr": 0.0 + } + ], + "average_accuracy": 23.36, + "best_prompt": 29.709999999999997, + "prompt_id": "p1", + "CPS": 27.823414999999997 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.95, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.31, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.95, + "stderr": 0.0 + } + ], + "average_accuracy": 44.403333333333336, + "best_prompt": 45.31, + "prompt_id": "p2", + "CPS": 44.89918933333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_SL.json b/e3c_llm_results/google/medgemma-27b-text-it_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..0f7f99959d4458ce3cb4757738c2f10f8ff54739 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 46.321461, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 46.75, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.38, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 46.75, + "stderr": 0.0 + } + ], + "average_accuracy": 48.626666666666665, + "best_prompt": 52.38, + "prompt_id": "p2", + "CPS": 50.414004 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.82, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.39, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.82, + "stderr": 0.0 + } + ], + "average_accuracy": 42.01, + "best_prompt": 42.39, + "prompt_id": "p2", + "CPS": 42.228918 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json b/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..f2aa5f156979a7fe28ac6861596dce98dfdaa1c3 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 59.56656766666666, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 63.55, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 61.61, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 64.55, + "stderr": 0.0 + } + ], + "average_accuracy": 63.23666666666666, + "best_prompt": 64.55, + "prompt_id": "p3", + "CPS": 63.70224333333332 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 55.620000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 54.94, + "stderr": 0.0 + } + ], + "average_accuracy": 55.28, + "best_prompt": 55.620000000000005, + "prompt_id": "p1", + "CPS": 55.430892 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json b/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..d38f87e279e73c26d67480863631bda50390dc06 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.84547375, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 68.36, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 68.46, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 68.36, + "stderr": 0.0 + } + ], + "average_accuracy": 68.39333333333333, + "best_prompt": 68.46, + "prompt_id": "p2", + "CPS": 68.41436 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 53.92, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.67, + "stderr": 0.0 + } + ], + "average_accuracy": 56.295, + "best_prompt": 58.67, + "prompt_id": "p2", + "CPS": 57.2765875 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json b/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..76310e0c5141b518b3e17ed4f9473cc823df72a9 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json @@ -0,0 +1,57 @@ +{ + "average_CPS": 66.5258515, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 72.61999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 70.05, + "stderr": 0.0 + } + ], + "average_accuracy": 71.335, + "best_prompt": 72.61999999999999, + "prompt_id": "p1", + "CPS": 71.686833 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.19, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 62.35000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 60.77, + "best_prompt": 62.35000000000001, + "prompt_id": "p2", + "CPS": 61.36487 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json b/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..531f67b03eb325cc8de1b028a7f10c2956a3d5c4 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 64.38433175, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 68.28999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 67.15, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 68.28999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 67.91, + "best_prompt": 68.28999999999999, + "prompt_id": "p1", + "CPS": 68.030498 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 59.4, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 61.33, + "stderr": 0.0 + } + ], + "average_accuracy": 60.364999999999995, + "best_prompt": 61.33, + "prompt_id": "p2", + "CPS": 60.738165499999994 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json b/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..1cb35f7ef94b0af9408747636209ed338ac3ebcc --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 61.536083, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 71.43, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 71.27, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 71.43, + "stderr": 0.0 + } + ], + "average_accuracy": 71.37666666666667, + "best_prompt": 71.43, + "prompt_id": "p1", + "CPS": 71.391904 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.11, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 51.88, + "stderr": 0.0 + } + ], + "average_accuracy": 51.495000000000005, + "best_prompt": 51.88, + "prompt_id": "p2", + "CPS": 51.680262 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json b/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..03feb9fefb01b93cb04084e4f3b5355eeb571be5 --- /dev/null +++ b/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.10114183333333, + "config": { + "model_name": "google/medgemma-27b-text-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "google/medgemma-27b-text-it", + "base_model": "Gemma3ForCausalLM", + "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d", + "submitted_time": "2025-05-19 20:53:04+00:00", + "num_params_billion": 27.00900224, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 69.47, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 67.65, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 69.47, + "stderr": 0.0 + } + ], + "average_accuracy": 68.86333333333333, + "best_prompt": 69.47, + "prompt_id": "p1", + "CPS": 69.04854866666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 53.23, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.900000000000006, + "stderr": 0.0 + } + ], + "average_accuracy": 54.565, + "best_prompt": 55.900000000000006, + "prompt_id": "p2", + "CPS": 55.153735 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_0_EN.json b/e3c_llm_results/google/medgemma-4b-it_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..18fd4074023cd571b288201cd24ff20e9302a517 --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 29.15664266666667, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 26.35, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 25.03, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 27.37, + "stderr": 0.0 + } + ], + "average_accuracy": 26.25, + "best_prompt": 27.37, + "prompt_id": "p3", + "CPS": 27.063456000000002 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 20.95, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 32.57, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.029999999999994, + "stderr": 0.0 + } + ], + "average_accuracy": 28.516666666666666, + "best_prompt": 32.57, + "prompt_id": "p2", + "CPS": 31.249829333333334 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_0_GR.json b/e3c_llm_results/google/medgemma-4b-it_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..3835892ab50cf74c6c4a63372320d1bb4f8f4b9e --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 27.1538555, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 27.05, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 26.540000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 27.05, + "stderr": 0.0 + } + ], + "average_accuracy": 26.88, + "best_prompt": 27.05, + "prompt_id": "p1", + "CPS": 27.004015 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 23.810000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 30.240000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.539999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 20.53, + "best_prompt": 30.240000000000002, + "prompt_id": "p2", + "CPS": 27.303696000000002 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_0_IT.json b/e3c_llm_results/google/medgemma-4b-it_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..cb98f8f02ae025b72d36db009ebab6d0520533f4 --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 26.877481500000002, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 31.569999999999997, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 26.27, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 30.04, + "stderr": 0.0 + } + ], + "average_accuracy": 29.293333333333333, + "best_prompt": 31.569999999999997, + "prompt_id": "p1", + "CPS": 30.851256333333332 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.54, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 24.610000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 6.88, + "stderr": 0.0 + } + ], + "average_accuracy": 17.676666666666666, + "best_prompt": 24.610000000000003, + "prompt_id": "p2", + "CPS": 22.903706666666668 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_0_PL.json b/e3c_llm_results/google/medgemma-4b-it_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..14c1f5f02728860f4c00666b348cc06d819c4ed8 --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 17.725084000000003, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 22.55, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 21.83, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 22.55, + "stderr": 0.0 + } + ], + "average_accuracy": 22.310000000000002, + "best_prompt": 22.55, + "prompt_id": "p1", + "CPS": 22.495880000000003 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.5, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 13.139999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 10.54, + "stderr": 0.0 + } + ], + "average_accuracy": 11.726666666666667, + "best_prompt": 13.139999999999999, + "prompt_id": "p2", + "CPS": 12.954287999999998 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_0_SK.json b/e3c_llm_results/google/medgemma-4b-it_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..39e39c7e167f3fef988796f8ae4a6742f23cf037 --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 19.074956666666665, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 24.47, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.87, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 24.47, + "stderr": 0.0 + } + ], + "average_accuracy": 24.27, + "best_prompt": 24.47, + "prompt_id": "p1", + "CPS": 24.421059999999997 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.19, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 13.99, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 11.19, + "stderr": 0.0 + } + ], + "average_accuracy": 12.123333333333333, + "best_prompt": 13.99, + "prompt_id": "p2", + "CPS": 13.728853333333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_0_SL.json b/e3c_llm_results/google/medgemma-4b-it_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..b25cdd8ddf43458f8d01a054d249972089a89a2d --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 18.266028000000002, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 25.740000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 25.580000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 25.740000000000002, + "stderr": 0.0 + } + ], + "average_accuracy": 25.686666666666667, + "best_prompt": 25.740000000000002, + "prompt_id": "p1", + "CPS": 25.726272 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 9.73, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.89, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 9.73, + "stderr": 0.0 + } + ], + "average_accuracy": 10.116666666666667, + "best_prompt": 10.89, + "prompt_id": "p2", + "CPS": 10.805784000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_EN.json b/e3c_llm_results/google/medgemma-4b-it_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..20e519982838ed46b8a34855977c7aa03b943a3c --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 30.937053083333332, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 48.33, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 50.05, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 49.51, + "stderr": 0.0 + } + ], + "average_accuracy": 49.29666666666666, + "best_prompt": 50.05, + "prompt_id": "p2", + "CPS": 49.672956666666664 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 9.64, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 12.370000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 11.005, + "best_prompt": 12.370000000000001, + "prompt_id": "p2", + "CPS": 12.2011495 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_GR.json b/e3c_llm_results/google/medgemma-4b-it_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..88d7b983e916403ee1eb80b67fd097622abb2126 --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 32.84242175, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 49.1, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 50.39, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 49.1, + "stderr": 0.0 + } + ], + "average_accuracy": 49.53, + "best_prompt": 50.39, + "prompt_id": "p2", + "CPS": 49.956646000000006 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 16.05, + "stderr": 0.0 + } + ], + "average_accuracy": 14.045, + "best_prompt": 16.05, + "prompt_id": "p2", + "CPS": 15.7281975 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_IT.json b/e3c_llm_results/google/medgemma-4b-it_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..412d9f1fca6b3e81531909a695eea1ce45efcb6e --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 37.09009025, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 56.330000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 53.769999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.52, + "stderr": 0.0 + } + ], + "average_accuracy": 54.54, + "best_prompt": 56.330000000000005, + "prompt_id": "p1", + "CPS": 55.321693 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.920000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.17, + "stderr": 0.0 + } + ], + "average_accuracy": 17.545, + "best_prompt": 19.17, + "prompt_id": "p2", + "CPS": 18.858487500000003 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_PL.json b/e3c_llm_results/google/medgemma-4b-it_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..7fed991c7499ee2c17181d350e82c36106bd427b --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 31.79935483333333, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.85999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.059999999999995, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.85999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 51.926666666666655, + "best_prompt": 52.059999999999995, + "prompt_id": "p2", + "CPS": 51.99058666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.709999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 9.969999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 10.84, + "best_prompt": 11.709999999999999, + "prompt_id": "p1", + "CPS": 11.608122999999999 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_SK.json b/e3c_llm_results/google/medgemma-4b-it_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..2d69fa0e9c70a53e2ea4427554f8ac2718000b67 --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 28.988108833333335, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 47.56, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 44.49, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 47.56, + "stderr": 0.0 + } + ], + "average_accuracy": 46.53666666666667, + "best_prompt": 47.56, + "prompt_id": "p1", + "CPS": 47.07330266666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 10.95, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.09, + "stderr": 0.0 + } + ], + "average_accuracy": 10.52, + "best_prompt": 10.95, + "prompt_id": "p1", + "CPS": 10.902915 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/google/medgemma-4b-it_10_SL.json b/e3c_llm_results/google/medgemma-4b-it_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..5ce12945def960eefdd10aac3a6b5ea9c1864a4e --- /dev/null +++ b/e3c_llm_results/google/medgemma-4b-it_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 31.314164499999997, + "config": { + "model_name": "google/medgemma-4b-it", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "google/medgemma-4b-it", + "base_model": "Gemma3ForConditionalGeneration", + "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d", + "submitted_time": "2025-05-19 20:52:44+00:00", + "num_params_billion": 4.300079472, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.17, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 49.55, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.17, + "stderr": 0.0 + } + ], + "average_accuracy": 50.629999999999995, + "best_prompt": 51.17, + "prompt_id": "p1", + "CPS": 50.893682 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.78, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 11.01, + "stderr": 0.0 + } + ], + "average_accuracy": 11.395, + "best_prompt": 11.78, + "prompt_id": "p1", + "CPS": 11.734646999999999 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_EN.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..c0ae4e930c79065b67d63cfcc6fc4af41c70fba6 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 33.28536166666667, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 25.019999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 30.89, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 27.68, + "stderr": 0.0 + } + ], + "average_accuracy": 27.863333333333333, + "best_prompt": 30.89, + "prompt_id": "p2", + "CPS": 29.955062666666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 22.74, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 39.290000000000006, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 35.42, + "stderr": 0.0 + } + ], + "average_accuracy": 32.483333333333334, + "best_prompt": 39.290000000000006, + "prompt_id": "p2", + "CPS": 36.61566066666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_GR.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..93d5f8222bb73eac2bb5b7aff34d71b25f65ef97 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 15.980523333333334, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 16.41, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 18.69, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.41, + "stderr": 0.0 + } + ], + "average_accuracy": 17.17, + "best_prompt": 18.69, + "prompt_id": "p2", + "CPS": 18.405912 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.359999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 7.779999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.180000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 9.773333333333333, + "best_prompt": 14.180000000000001, + "prompt_id": "p3", + "CPS": 13.555134666666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_IT.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..2c837b3b6b372cf0b28ffd264f9ff25d7c83b0f6 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 23.758854499999998, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 33.97, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 33.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.26, + "stderr": 0.0 + } + ], + "average_accuracy": 33.07666666666666, + "best_prompt": 33.97, + "prompt_id": "p1", + "CPS": 33.666534666666664 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 14.89, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 7.359999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 1.49, + "stderr": 0.0 + } + ], + "average_accuracy": 7.913333333333333, + "best_prompt": 14.89, + "prompt_id": "p1", + "CPS": 13.851174333333335 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_PL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..2d11b207b3cc06efca3e6c395a7f3e01897a8f76 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 29.002397166666665, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 28.15, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 28.610000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.15, + "stderr": 0.0 + } + ], + "average_accuracy": 28.30333333333333, + "best_prompt": 28.610000000000003, + "prompt_id": "p2", + "CPS": 28.522262666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.09, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 29.080000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 30.61, + "stderr": 0.0 + } + ], + "average_accuracy": 26.926666666666666, + "best_prompt": 30.61, + "prompt_id": "p3", + "CPS": 29.482531666666663 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_SK.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..899fc55a71ca57694da1af13bc8df3e64e248beb --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 21.908259666666666, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 25.71, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 29.87, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 25.71, + "stderr": 0.0 + } + ], + "average_accuracy": 27.096666666666668, + "best_prompt": 29.87, + "prompt_id": "p2", + "CPS": 29.041605333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.540000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.77, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 15.540000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 10.616666666666667, + "best_prompt": 15.540000000000001, + "prompt_id": "p1", + "CPS": 14.774914 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_SL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..c3a032699b93f22c4d898f1796521a8f166678a9 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 16.78806766666667, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 29.98, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 26.8, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 29.98, + "stderr": 0.0 + } + ], + "average_accuracy": 28.92, + "best_prompt": 29.98, + "prompt_id": "p1", + "CPS": 29.662212000000004 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 3.95, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 1.21, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 3.95, + "stderr": 0.0 + } + ], + "average_accuracy": 3.0366666666666666, + "best_prompt": 3.95, + "prompt_id": "p1", + "CPS": 3.9139233333333334 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..0df51052f3cd02a20cdbf74ca118988824ed9f34 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 30.9929955, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 50.09, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 49.66, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 50.49, + "stderr": 0.0 + } + ], + "average_accuracy": 50.080000000000005, + "best_prompt": 50.49, + "prompt_id": "p3", + "CPS": 50.282991 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.75, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.95, + "stderr": 0.0 + } + ], + "average_accuracy": 11.35, + "best_prompt": 11.75, + "prompt_id": "p1", + "CPS": 11.703 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..e9f1b1dcb36bd8f0494116bc079fcbbd61eddbdb --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 20.344995166666667, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 33.75, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 34.03, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 33.75, + "stderr": 0.0 + } + ], + "average_accuracy": 33.843333333333334, + "best_prompt": 34.03, + "prompt_id": "p2", + "CPS": 33.96647733333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.2700000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.81, + "stderr": 0.0 + } + ], + "average_accuracy": 5.54, + "best_prompt": 6.81, + "prompt_id": "p2", + "CPS": 6.723513 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..9630d4c74c728514fd0ad286528e3704713b91cb --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 36.3490175, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.949999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 53.010000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 52.75, + "stderr": 0.0 + } + ], + "average_accuracy": 52.57, + "best_prompt": 53.010000000000005, + "prompt_id": "p2", + "CPS": 52.776756 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.14, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 9.610000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 15.375, + "best_prompt": 21.14, + "prompt_id": "p1", + "CPS": 19.921279000000002 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..dc4149a99351c4d8531026cfbd0d6cf649ed13c8 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 26.618304, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.129999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.32, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.129999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 39.85999999999999, + "best_prompt": 41.32, + "prompt_id": "p2", + "CPS": 40.716727999999996 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.55, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 12.07, + "stderr": 0.0 + } + ], + "average_accuracy": 12.31, + "best_prompt": 12.55, + "prompt_id": "p1", + "CPS": 12.51988 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..c240b904d5cb159530bb49ce9fe4804042ec5d78 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 23.377642833333333, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.06, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 38.61, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.06, + "stderr": 0.0 + } + ], + "average_accuracy": 40.24333333333333, + "best_prompt": 41.06, + "prompt_id": "p1", + "CPS": 40.72467666666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 5.09, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.0600000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 5.575, + "best_prompt": 6.0600000000000005, + "prompt_id": "p2", + "CPS": 6.030609 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..9c9595d905cc296e4b02233827f82fbded3a00d7 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 24.261933583333334, + "config": { + "model_name": "microsoft/MediPhi-Clinical", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "microsoft/MediPhi-Clinical", + "base_model": "Phi3ForCausalLM", + "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0", + "submitted_time": "2025-05-29 20:40:05+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.36, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 39.900000000000006, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.36, + "stderr": 0.0 + } + ], + "average_accuracy": 40.20666666666667, + "best_prompt": 40.36, + "prompt_id": "p1", + "CPS": 40.29811466666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.290000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.74, + "stderr": 0.0 + } + ], + "average_accuracy": 7.515000000000001, + "best_prompt": 8.290000000000001, + "prompt_id": "p1", + "CPS": 8.2257525 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_EN.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..6c0779796fa82060b849295643c06df44b0114ef --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 29.050603666666667, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.61, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 24.099999999999998, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.25, + "stderr": 0.0 + } + ], + "average_accuracy": 15.986666666666666, + "best_prompt": 24.099999999999998, + "prompt_id": "p2", + "CPS": 22.144686666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.35, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.06, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.04, + "stderr": 0.0 + } + ], + "average_accuracy": 29.816666666666666, + "best_prompt": 40.06, + "prompt_id": "p2", + "CPS": 35.95652066666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_GR.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..71d26338cfb4ed5c7793df881f3f0d103a47c271 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 15.261295333333333, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.94, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 8.9, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 12.94, + "stderr": 0.0 + } + ], + "average_accuracy": 11.593333333333334, + "best_prompt": 12.94, + "prompt_id": "p1", + "CPS": 12.765741333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 9.62, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.7299999999999995, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 19.16, + "stderr": 0.0 + } + ], + "average_accuracy": 11.836666666666666, + "best_prompt": 19.16, + "prompt_id": "p3", + "CPS": 17.75684933333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_IT.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..292d372190765f390ac4a0824e6ca24e69f82c92 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 27.906489166666667, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 24.84, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 27.169999999999998, + "stderr": 0.0 + } + ], + "average_accuracy": 20.226666666666667, + "best_prompt": 27.169999999999998, + "prompt_id": "p3", + "CPS": 25.283496333333332 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 17.119999999999997, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 28.96, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.61, + "stderr": 0.0 + } + ], + "average_accuracy": 26.23, + "best_prompt": 32.61, + "prompt_id": "p3", + "CPS": 30.529482 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_PL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..1202eb41e6f940041bb4dd7c92806e504221502e --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 23.550823, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.1, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 16.8, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 15.1, + "stderr": 0.0 + } + ], + "average_accuracy": 15.666666666666666, + "best_prompt": 16.8, + "prompt_id": "p2", + "CPS": 16.6096 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 26.83, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 31.259999999999998, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.32, + "stderr": 0.0 + } + ], + "average_accuracy": 28.80333333333333, + "best_prompt": 31.259999999999998, + "prompt_id": "p2", + "CPS": 30.492046 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_SK.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..7283b7adc881ac280deb3682387213262b6dca55 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 18.487242666666667, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 16.41, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 20.810000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.41, + "stderr": 0.0 + } + ], + "average_accuracy": 17.87666666666667, + "best_prompt": 20.810000000000002, + "prompt_id": "p2", + "CPS": 20.199573333333337 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 17.76, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 1.1199999999999999, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 17.76, + "stderr": 0.0 + } + ], + "average_accuracy": 12.213333333333333, + "best_prompt": 17.76, + "prompt_id": "p1", + "CPS": 16.774912 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_SL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..3ebdade3147e2a00171743d8bc718167b054e66b --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 16.379518, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 17.580000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 18.6, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 17.580000000000002, + "stderr": 0.0 + } + ], + "average_accuracy": 17.92, + "best_prompt": 18.6, + "prompt_id": "p2", + "CPS": 18.47352 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 14.46, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.84, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.46, + "stderr": 0.0 + } + ], + "average_accuracy": 13.253333333333336, + "best_prompt": 14.46, + "prompt_id": "p1", + "CPS": 14.285516000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..a7650bd6ea580b99e5880ff3c71f8844ae1f0c2e --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 35.63120666666667, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 53.56999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.27, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 50.629999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 52.156666666666666, + "best_prompt": 53.56999999999999, + "prompt_id": "p1", + "CPS": 52.81287733333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 14.32, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 18.88, + "stderr": 0.0 + } + ], + "average_accuracy": 16.6, + "best_prompt": 18.88, + "prompt_id": "p2", + "CPS": 18.449536000000002 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..09121be062b47a9d9fa8f95b95dae8679fef4653 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 18.171546, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 28.22, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 29.99, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.22, + "stderr": 0.0 + } + ], + "average_accuracy": 28.81, + "best_prompt": 29.99, + "prompt_id": "p2", + "CPS": 29.636117999999996 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 5.76, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.74, + "stderr": 0.0 + } + ], + "average_accuracy": 6.25, + "best_prompt": 6.74, + "prompt_id": "p2", + "CPS": 6.706974 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..5725a716e2cdc34e293f9fcaf8fe800989401764 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 42.6923105, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 57.29, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.269999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.9, + "stderr": 0.0 + } + ], + "average_accuracy": 57.15333333333333, + "best_prompt": 57.9, + "prompt_id": "p3", + "CPS": 57.467679999999994 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 28.73, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.07, + "stderr": 0.0 + } + ], + "average_accuracy": 25.9, + "best_prompt": 28.73, + "prompt_id": "p1", + "CPS": 27.916941 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..163aa7524ca797372574ed3c5c67fa525c589fa2 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 35.0467105, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 44.17, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.06, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 44.17, + "stderr": 0.0 + } + ], + "average_accuracy": 44.46666666666667, + "best_prompt": 45.06, + "prompt_id": "p2", + "CPS": 44.792644 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.25, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 26.86, + "stderr": 0.0 + } + ], + "average_accuracy": 21.055, + "best_prompt": 26.86, + "prompt_id": "p2", + "CPS": 25.300776999999997 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..bd12cc62797182a384cfd307e705454eb5fac735 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 28.277421583333332, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.269999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.23, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.269999999999996, + "stderr": 0.0 + } + ], + "average_accuracy": 42.25666666666666, + "best_prompt": 43.269999999999996, + "prompt_id": "p1", + "CPS": 42.831530666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 10.7, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 13.950000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 12.325, + "best_prompt": 13.950000000000001, + "prompt_id": "p2", + "CPS": 13.7233125 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..a935ec1eddd79dd152a2cc0e8ace87b73512c462 --- /dev/null +++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 26.81930283333333, + "config": { + "model_name": "microsoft/MediPhi-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "microsoft/MediPhi-Instruct", + "base_model": "Phi3ForCausalLM", + "revision": "a94ac478e7c246103d55665a0804684042f3b973", + "submitted_time": "2025-07-11 19:28:15+00:00", + "num_params_billion": 3.821079552, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 39.73, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 35.64, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 39.73, + "stderr": 0.0 + } + ], + "average_accuracy": 38.36666666666667, + "best_prompt": 39.73, + "prompt_id": "p1", + "CPS": 39.188347666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 11.55, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 14.680000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 13.115000000000002, + "best_prompt": 14.680000000000001, + "prompt_id": "p2", + "CPS": 14.450258000000002 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_EN.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..74e5096fed16055fcb6688f2815227b2426bf9d0 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 29.513497333333333, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 25.290000000000003, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 21.44, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 21.62, + "stderr": 0.0 + } + ], + "average_accuracy": 22.783333333333335, + "best_prompt": 25.290000000000003, + "prompt_id": "p1", + "CPS": 24.656064 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 36.88, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 36.42, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.93, + "stderr": 0.0 + } + ], + "average_accuracy": 30.076666666666668, + "best_prompt": 36.88, + "prompt_id": "p1", + "CPS": 34.370930666666666 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_GR.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..6719657cdb412c1a590eecc02936bf8490c96d00 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 14.120156666666666, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 16.03, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.09, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.03, + "stderr": 0.0 + } + ], + "average_accuracy": 17.05, + "best_prompt": 19.09, + "prompt_id": "p2", + "CPS": 18.700564 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.32, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.4799999999999995, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 9.94, + "stderr": 0.0 + } + ], + "average_accuracy": 5.913333333333333, + "best_prompt": 9.94, + "prompt_id": "p3", + "CPS": 9.539749333333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_IT.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..6e72b69246cb0cb14bdd742a15d4e5bd0a749f2f --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 19.788279666666668, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 27.88, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 20.3, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 24.81, + "stderr": 0.0 + } + ], + "average_accuracy": 24.33, + "best_prompt": 27.88, + "prompt_id": "p1", + "CPS": 26.89026 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 13.819999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 1.63, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 1.4000000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 5.616666666666666, + "best_prompt": 13.819999999999999, + "prompt_id": "p1", + "CPS": 12.686299333333332 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_PL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..ed2764b1f6aa0d0040e55ea28a9166546136c9e4 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 21.287892, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 30.240000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 28.110000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 30.240000000000002, + "stderr": 0.0 + } + ], + "average_accuracy": 29.53, + "best_prompt": 30.240000000000002, + "prompt_id": "p1", + "CPS": 30.025296 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.63, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 12.920000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.63, + "stderr": 0.0 + } + ], + "average_accuracy": 10.06, + "best_prompt": 12.920000000000002, + "prompt_id": "p2", + "CPS": 12.550488000000001 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SK.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..9c30a1c774cfb1ee338ab94043d096ea4d55ba70 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 14.880865666666669, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.43, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 21.46, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 21.43, + "stderr": 0.0 + } + ], + "average_accuracy": 21.439999999999998, + "best_prompt": 21.46, + "prompt_id": "p2", + "CPS": 21.455708 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.5600000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 8.35, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.5600000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 7.823333333333333, + "best_prompt": 8.35, + "prompt_id": "p2", + "CPS": 8.306023333333334 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..fb51bf4f973a10498c0092ba08780e0682e5fe39 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 17.567646000000003, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 17.66, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.470000000000002, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 17.66, + "stderr": 0.0 + } + ], + "average_accuracy": 18.263333333333335, + "best_prompt": 19.470000000000002, + "prompt_id": "p2", + "CPS": 19.235062000000003 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.66, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 16.950000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.66, + "stderr": 0.0 + } + ], + "average_accuracy": 10.756666666666668, + "best_prompt": 16.950000000000003, + "prompt_id": "p2", + "CPS": 15.900230000000002 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..331516e5509ecb6d70a8c113a6d4a67d17970c95 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 42.61762233333333, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 47.25, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 47.3, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 48.05, + "stderr": 0.0 + } + ], + "average_accuracy": 47.53333333333333, + "best_prompt": 48.05, + "prompt_id": "p3", + "CPS": 47.801741666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 25.929999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.339999999999996, + "stderr": 0.0 + } + ], + "average_accuracy": 33.135, + "best_prompt": 40.339999999999996, + "prompt_id": "p2", + "CPS": 37.433503 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..0e61eac9dba0617e85484c36930f38adcaf4ff21 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 29.018154000000003, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 34.98, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 36.480000000000004, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 34.98, + "stderr": 0.0 + } + ], + "average_accuracy": 35.48, + "best_prompt": 36.480000000000004, + "prompt_id": "p2", + "CPS": 36.1152 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 10.549999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.43, + "stderr": 0.0 + } + ], + "average_accuracy": 16.99, + "best_prompt": 23.43, + "prompt_id": "p2", + "CPS": 21.921108 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..90e813c542eeb50e89264e9a7bbc1d77d5eb3b0e --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 47.034969, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.470000000000006, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.32, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 51.49, + "stderr": 0.0 + } + ], + "average_accuracy": 51.76, + "best_prompt": 52.32, + "prompt_id": "p2", + "CPS": 52.027007999999995 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 30.919999999999998, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 45.300000000000004, + "stderr": 0.0 + } + ], + "average_accuracy": 38.11, + "best_prompt": 45.300000000000004, + "prompt_id": "p2", + "CPS": 42.04293 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..a57782b79c6e9f4213222458c344219974e4c69d --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 43.90926, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 49.11, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 50.46000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 49.11, + "stderr": 0.0 + } + ], + "average_accuracy": 49.56, + "best_prompt": 50.46000000000001, + "prompt_id": "p2", + "CPS": 50.005860000000006 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 38.95, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 33.11, + "stderr": 0.0 + } + ], + "average_accuracy": 36.03, + "best_prompt": 38.95, + "prompt_id": "p1", + "CPS": 37.81266 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..50c353afb467295e60134901013669f36f0ddebd --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 30.650676250000004, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.29, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.940000000000005, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.29, + "stderr": 0.0 + } + ], + "average_accuracy": 39.50666666666667, + "best_prompt": 40.29, + "prompt_id": "p1", + "CPS": 39.974395 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.55, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.48, + "stderr": 0.0 + } + ], + "average_accuracy": 20.515, + "best_prompt": 21.55, + "prompt_id": "p1", + "CPS": 21.326957500000002 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..169954f3903532996acb053dcdc1c21c41c9e80d --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 30.92908, + "config": { + "model_name": "mistralai/Mistral-7B-Instruct-v0.2", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "mistralai/Mistral-7B-Instruct-v0.2", + "base_model": "MistralForCausalLM", + "revision": "63a8b081895390a26e140280378bc85ec8bce07a", + "submitted_time": "2023-12-11 13:18:44+00:00", + "num_params_billion": 7.241732096, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 42.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.74, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 42.04, + "stderr": 0.0 + } + ], + "average_accuracy": 41.94, + "best_prompt": 42.04, + "prompt_id": "p1", + "CPS": 41.99796 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 19.900000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 19.5, + "stderr": 0.0 + } + ], + "average_accuracy": 19.700000000000003, + "best_prompt": 19.900000000000002, + "prompt_id": "p1", + "CPS": 19.860200000000003 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_EN.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..52ede58d4b7e91ba34218b9be8dfa1edc4c1c45f --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 31.20982683333333, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 27.67, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 22.99, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 17.48, + "stderr": 0.0 + } + ], + "average_accuracy": 22.713333333333335, + "best_prompt": 27.67, + "prompt_id": "p1", + "CPS": 26.298490333333337 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 36.94, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 34.82, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.41, + "stderr": 0.0 + } + ], + "average_accuracy": 34.72333333333333, + "best_prompt": 36.94, + "prompt_id": "p1", + "CPS": 36.12116333333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_GR.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..450b326d419591181d6f4d3a5569b18fa3bf9e20 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 18.510654333333335, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 7.32, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 6.87, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 7.32, + "stderr": 0.0 + } + ], + "average_accuracy": 7.170000000000001, + "best_prompt": 7.32, + "prompt_id": "p1", + "CPS": 7.30902 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.75, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 21.17, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.87, + "stderr": 0.0 + } + ], + "average_accuracy": 23.263333333333332, + "best_prompt": 32.87, + "prompt_id": "p3", + "CPS": 29.712288666666666 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_IT.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..77fa35820f9d5c29b775b0a552876c44e6bf6e31 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 26.353595166666665, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 27.92, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 17.72, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 13.16, + "stderr": 0.0 + } + ], + "average_accuracy": 19.599999999999998, + "best_prompt": 27.92, + "prompt_id": "p1", + "CPS": 25.597056 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 28.49, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.84, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 18.61, + "stderr": 0.0 + } + ], + "average_accuracy": 23.646666666666665, + "best_prompt": 28.49, + "prompt_id": "p1", + "CPS": 27.11013433333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_PL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..8ace4330841e419b7656d4dc037041e081372b5b --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 12.708361833333333, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.83, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.390000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 4.83, + "stderr": 0.0 + } + ], + "average_accuracy": 4.683333333333334, + "best_prompt": 4.83, + "prompt_id": "p1", + "CPS": 4.822916 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.23, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 16.86, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.61, + "stderr": 0.0 + } + ], + "average_accuracy": 18.233333333333334, + "best_prompt": 21.23, + "prompt_id": "p1", + "CPS": 20.593807666666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SK.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..66a263277c38d6ee3968141d54319ccd52f50d0d --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 12.570468000000002, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 6.8500000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 8.44, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 6.8500000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 7.38, + "best_prompt": 8.44, + "prompt_id": "p2", + "CPS": 8.350536 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 16.96, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 13.96, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 16.96, + "stderr": 0.0 + } + ], + "average_accuracy": 15.96, + "best_prompt": 16.96, + "prompt_id": "p1", + "CPS": 16.7904 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..a53b24df5563a56c9c9e66d930010cfa94f5240c --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 15.375161166666668, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 8.61, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 8.05, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.61, + "stderr": 0.0 + } + ], + "average_accuracy": 8.423333333333334, + "best_prompt": 8.61, + "prompt_id": "p1", + "CPS": 8.593928 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 23.09, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.96, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 23.09, + "stderr": 0.0 + } + ], + "average_accuracy": 19.046666666666667, + "best_prompt": 23.09, + "prompt_id": "p1", + "CPS": 22.156394333333335 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..a9ca30e36d1c9ba1200906eb04a3c5ee7dc947b8 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 52.1037285, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 57.769999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.41, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 56.68, + "stderr": 0.0 + } + ], + "average_accuracy": 57.62, + "best_prompt": 58.41, + "prompt_id": "p2", + "CPS": 57.948561 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 34.82, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 50.080000000000005, + "stderr": 0.0 + } + ], + "average_accuracy": 42.45, + "best_prompt": 50.080000000000005, + "prompt_id": "p2", + "CPS": 46.258896 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..30ae9f124005ce43a9410b480b4fc21fe3693645 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 36.652986500000004, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 50.81, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 49.88, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 50.81, + "stderr": 0.0 + } + ], + "average_accuracy": 50.5, + "best_prompt": 50.81, + "prompt_id": "p1", + "CPS": 50.652489 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 20.29, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 22.96, + "stderr": 0.0 + } + ], + "average_accuracy": 21.625, + "best_prompt": 22.96, + "prompt_id": "p2", + "CPS": 22.653484000000002 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..ffeaed0aa24500189694adb019a652ddb50890d4 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 51.30399291666667, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 64.3, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 64.37, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 64.57000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 64.41333333333334, + "best_prompt": 64.57000000000001, + "prompt_id": "p3", + "CPS": 64.46884033333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 27.08, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 40.99, + "stderr": 0.0 + } + ], + "average_accuracy": 34.035, + "best_prompt": 40.99, + "prompt_id": "p2", + "CPS": 38.139145500000005 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..3079ee525824488df6c027190e8557f78d768ea1 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 36.291591, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 53.52, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 54.21, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.52, + "stderr": 0.0 + } + ], + "average_accuracy": 53.75, + "best_prompt": 54.21, + "prompt_id": "p2", + "CPS": 53.960634 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 18.63, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 18.55, + "stderr": 0.0 + } + ], + "average_accuracy": 18.59, + "best_prompt": 18.63, + "prompt_id": "p1", + "CPS": 18.622548 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..f508f2cf049802baf4d56e0a4d146ea9590e3f40 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 35.501746499999996, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 50.24999999999999, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 50.4, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 50.24999999999999, + "stderr": 0.0 + } + ], + "average_accuracy": 50.29999999999999, + "best_prompt": 50.4, + "prompt_id": "p2", + "CPS": 50.349599999999995 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.370000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 21.66, + "stderr": 0.0 + } + ], + "average_accuracy": 17.015, + "best_prompt": 21.66, + "prompt_id": "p2", + "CPS": 20.653893 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..8058a5dff8de98a7efe1ce8f45ad024a463ca569 --- /dev/null +++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 36.59565525, + "config": { + "model_name": "mistralai/Mistral-Nemo-Instruct-2407", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "mistralai/Mistral-Nemo-Instruct-2407", + "base_model": "MistralForCausalLM", + "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3", + "submitted_time": "2024-07-17 17:26:49+00:00", + "num_params_billion": 12.2477824, + "language": "en_fr_de_es_it_pt_ru_zh_ja" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 53.23, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 53.349999999999994, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 53.23, + "stderr": 0.0 + } + ], + "average_accuracy": 53.27, + "best_prompt": 53.349999999999994, + "prompt_id": "p2", + "CPS": 53.30732 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 13.900000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 20.57, + "stderr": 0.0 + } + ], + "average_accuracy": 17.235, + "best_prompt": 20.57, + "prompt_id": "p2", + "CPS": 19.8839905 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_EN.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..d7fa5577a054e4c4d05d08e14f8ad47a129e0a2f --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 32.621343333333336, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 22.7, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 27.089999999999996, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 29.959999999999997, + "stderr": 0.0 + } + ], + "average_accuracy": 26.583333333333332, + "best_prompt": 29.959999999999997, + "prompt_id": "p3", + "CPS": 28.948350666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.57, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 38.35, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.48, + "stderr": 0.0 + } + ], + "average_accuracy": 32.800000000000004, + "best_prompt": 38.48, + "prompt_id": "p3", + "CPS": 36.294336 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_GR.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..1cf988506780753209f92d40621e68cbff6f6bca --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 14.248081500000001, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 21.3, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 4.95, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 21.3, + "stderr": 0.0 + } + ], + "average_accuracy": 15.850000000000001, + "best_prompt": 21.3, + "prompt_id": "p1", + "CPS": 20.13915 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 4.01, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 2.5, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 8.67, + "stderr": 0.0 + } + ], + "average_accuracy": 5.06, + "best_prompt": 8.67, + "prompt_id": "p3", + "CPS": 8.357013 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_IT.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..486acfd28b741977f1049f5415ac79b2550bb88b --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 23.062588666666667, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 12.61, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.27, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 24.44, + "stderr": 0.0 + } + ], + "average_accuracy": 20.106666666666666, + "best_prompt": 24.44, + "prompt_id": "p3", + "CPS": 23.380933333333335 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 24.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 16.99, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 14.92, + "stderr": 0.0 + } + ], + "average_accuracy": 18.65, + "best_prompt": 24.04, + "prompt_id": "p1", + "CPS": 22.744244 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_PL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..1e782f919c922f0c0eec0ea68327fd937e493722 --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 19.314392833333333, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 24.52, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 23.380000000000003, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 24.52, + "stderr": 0.0 + } + ], + "average_accuracy": 24.14, + "best_prompt": 24.52, + "prompt_id": "p1", + "CPS": 24.426824 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 15.010000000000002, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 1.23, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 12.64, + "stderr": 0.0 + } + ], + "average_accuracy": 9.626666666666667, + "best_prompt": 15.010000000000002, + "prompt_id": "p1", + "CPS": 14.201961666666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SK.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..c750e1e3efa704f41a04658a66df1c01fe84f803 --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 16.691507333333334, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 27.169999999999998, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 31.78, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 27.169999999999998, + "stderr": 0.0 + } + ], + "average_accuracy": 28.706666666666667, + "best_prompt": 31.78, + "prompt_id": "p2", + "CPS": 30.803294666666666 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 1.43, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 2.6, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 1.43, + "stderr": 0.0 + } + ], + "average_accuracy": 1.82, + "best_prompt": 2.6, + "prompt_id": "p2", + "CPS": 2.57972 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..de5e2ef6ac90f60b1dce06ad24bb425c268247a6 --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 12.605178333333333, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 25.19, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 18.529999999999998, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 25.19, + "stderr": 0.0 + } + ], + "average_accuracy": 22.97, + "best_prompt": 25.19, + "prompt_id": "p1", + "CPS": 24.630782 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.47000000000000003, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.58, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.47000000000000003, + "stderr": 0.0 + } + ], + "average_accuracy": 0.5066666666666667, + "best_prompt": 0.58, + "prompt_id": "p2", + "CPS": 0.5795746666666666 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..b5e7b6262bac82ece4c4bbb2cd1cdc97089b24aa --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.2351185, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 58.4, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 54.21, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 59.28, + "stderr": 0.0 + } + ], + "average_accuracy": 57.29666666666666, + "best_prompt": 59.28, + "prompt_id": "p3", + "CPS": 58.104279999999996 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.35, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.86, + "stderr": 0.0 + } + ], + "average_accuracy": 49.605000000000004, + "best_prompt": 55.86, + "prompt_id": "p2", + "CPS": 52.365957 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..d4377d8728ba5a5ae908b22c1af752d7a49704d6 --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 36.66668416666666, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 33.45, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 36.55, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 33.45, + "stderr": 0.0 + } + ], + "average_accuracy": 34.483333333333334, + "best_prompt": 36.55, + "prompt_id": "p2", + "CPS": 35.79463333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.49, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.55, + "stderr": 0.0 + } + ], + "average_accuracy": 37.519999999999996, + "best_prompt": 37.55, + "prompt_id": "p2", + "CPS": 37.538734999999996 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..645b2d6b480189c228afb4c0be89cd567cb864e6 --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 54.68382, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 58.209999999999994, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 54.32, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 56.220000000000006, + "stderr": 0.0 + } + ], + "average_accuracy": 56.25, + "best_prompt": 58.209999999999994, + "prompt_id": "p1", + "CPS": 57.069084 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 46.22, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 54.58, + "stderr": 0.0 + } + ], + "average_accuracy": 50.4, + "best_prompt": 54.58, + "prompt_id": "p2", + "CPS": 52.298556 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..91954f0c6a2f324b52b9097bb0303fc5211754cd --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 48.62956716666666, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 43.04, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.23, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 43.04, + "stderr": 0.0 + } + ], + "average_accuracy": 42.43666666666667, + "best_prompt": 43.04, + "prompt_id": "p1", + "CPS": 42.78032533333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.29, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.71, + "stderr": 0.0 + } + ], + "average_accuracy": 53.5, + "best_prompt": 55.71, + "prompt_id": "p2", + "CPS": 54.478809 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..a459171da36ed4654f83074129d6a78adb9f3393 --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 44.76583875, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 45.45, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 41.160000000000004, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 45.45, + "stderr": 0.0 + } + ], + "average_accuracy": 44.02, + "best_prompt": 45.45, + "prompt_id": "p1", + "CPS": 44.800065000000004 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.5, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 46.949999999999996, + "stderr": 0.0 + } + ], + "average_accuracy": 42.224999999999994, + "best_prompt": 46.949999999999996, + "prompt_id": "p2", + "CPS": 44.7316125 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..e25214684f52f09b3d42a5e9c1c6ec948dd96485 --- /dev/null +++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 35.00057433333333, + "config": { + "model_name": "tiiuae/Falcon3-10B-Instruct", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "tiiuae/Falcon3-10B-Instruct", + "base_model": "LlamaForCausalLM", + "revision": "8799bc6aec0152757221dc6b272d824642db6202", + "submitted_time": "2024-12-14 05:17:25+00:00", + "num_params_billion": 10.30565376, + "language": "" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 41.21, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 39.09, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 41.21, + "stderr": 0.0 + } + ], + "average_accuracy": 40.50333333333334, + "best_prompt": 41.21, + "prompt_id": "p1", + "CPS": 40.918782666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 23.23, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 30.12, + "stderr": 0.0 + } + ], + "average_accuracy": 26.675, + "best_prompt": 30.12, + "prompt_id": "p2", + "CPS": 29.082366 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_0_EN.json b/e3c_llm_results/unsloth/phi-4_0_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..92f33f44529c6c1a4e1cd0b895f6c7d3b2a942ac --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_0_EN.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 23.598540333333336, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "EN", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 2.52, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 5.72, + "stderr": 0.0 + } + ], + "average_accuracy": 2.7466666666666666, + "best_prompt": 5.72, + "prompt_id": "p3", + "CPS": 5.549925333333333 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 40.22, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 42.19, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 40.300000000000004, + "stderr": 0.0 + } + ], + "average_accuracy": 40.903333333333336, + "best_prompt": 42.19, + "prompt_id": "p2", + "CPS": 41.64715533333334 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_0_GR.json b/e3c_llm_results/unsloth/phi-4_0_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..05d89337bff36f6b158e2f634552675b418a00a0 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_0_GR.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 13.214538500000002, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "GR", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + } + ], + "average_accuracy": 0.0, + "best_prompt": 0.0, + "prompt_id": "p1", + "CPS": 0.0 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 29.01, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 22.08, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 9.25, + "stderr": 0.0 + } + ], + "average_accuracy": 20.113333333333333, + "best_prompt": 29.01, + "prompt_id": "p1", + "CPS": 26.429077000000003 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_0_IT.json b/e3c_llm_results/unsloth/phi-4_0_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..6e26ae3093925c32149d6e2f3bf7f17d6433f196 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_0_IT.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 32.617002166666666, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "IT", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 0.0, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 17.24, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 34.28, + "stderr": 0.0 + } + ], + "average_accuracy": 17.173333333333332, + "best_prompt": 34.28, + "prompt_id": "p3", + "CPS": 28.415834666666665 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 33.54, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 37.37, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 36.77, + "stderr": 0.0 + } + ], + "average_accuracy": 35.89333333333334, + "best_prompt": 37.37, + "prompt_id": "p2", + "CPS": 36.81816966666667 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_0_PL.json b/e3c_llm_results/unsloth/phi-4_0_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..aa90d501bcb8fd46119784ae5142abae8480fb81 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_0_PL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 20.92978433333333, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "PL", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 2.36, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 3.66, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 2.36, + "stderr": 0.0 + } + ], + "average_accuracy": 2.793333333333333, + "best_prompt": 3.66, + "prompt_id": "p2", + "CPS": 3.62828 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 37.99, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 38.29, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 38.129999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 38.13666666666666, + "best_prompt": 38.29, + "prompt_id": "p2", + "CPS": 38.231288666666664 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_0_SK.json b/e3c_llm_results/unsloth/phi-4_0_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..e019448e08233966028e131fc182396d832c926f --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_0_SK.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 21.629032, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SK", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 3.16, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 10.7, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 3.16, + "stderr": 0.0 + } + ], + "average_accuracy": 5.673333333333333, + "best_prompt": 10.7, + "prompt_id": "p2", + "CPS": 10.162146666666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 32.519999999999996, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 33.26, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.519999999999996, + "stderr": 0.0 + } + ], + "average_accuracy": 32.76666666666666, + "best_prompt": 33.26, + "prompt_id": "p2", + "CPS": 33.09591733333333 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_0_SL.json b/e3c_llm_results/unsloth/phi-4_0_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..e4f1d858f492b5afc6f070c32200b284266cd874 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_0_SL.json @@ -0,0 +1,69 @@ +{ + "average_CPS": 28.7078975, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "0", + "batch_size": 1, + "LANG": "SL", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 28.7, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 9.81, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 28.7, + "stderr": 0.0 + } + ], + "average_accuracy": 22.403333333333332, + "best_prompt": 28.7, + "prompt_id": "p1", + "CPS": 26.892856666666667 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 32.09, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 17.44, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 32.09, + "stderr": 0.0 + } + ], + "average_accuracy": 27.206666666666667, + "best_prompt": 32.09, + "prompt_id": "p1", + "CPS": 30.522938333333336 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_EN.json b/e3c_llm_results/unsloth/phi-4_10_EN.json new file mode 100644 index 0000000000000000000000000000000000000000..6fadb2859e48671f3d04006c3740f2243bf1be09 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_10_EN.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 57.3466435, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "EN", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 60.980000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 57.11000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 61.41, + "stderr": 0.0 + } + ], + "average_accuracy": 59.833333333333336, + "best_prompt": 61.41, + "prompt_id": "p3", + "CPS": 60.441769 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 49.120000000000005, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.26, + "stderr": 0.0 + } + ], + "average_accuracy": 52.69, + "best_prompt": 56.26, + "prompt_id": "p2", + "CPS": 54.251518 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_GR.json b/e3c_llm_results/unsloth/phi-4_10_GR.json new file mode 100644 index 0000000000000000000000000000000000000000..1e8e520d343ada009bd322e341e8633e009a92e4 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_10_GR.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 54.36022816666667, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "GR", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 57.17, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 56.11000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 57.17, + "stderr": 0.0 + } + ], + "average_accuracy": 56.81666666666667, + "best_prompt": 57.17, + "prompt_id": "p1", + "CPS": 56.96799933333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 49.35, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.61, + "stderr": 0.0 + } + ], + "average_accuracy": 50.980000000000004, + "best_prompt": 52.61, + "prompt_id": "p2", + "CPS": 51.752457 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_IT.json b/e3c_llm_results/unsloth/phi-4_10_IT.json new file mode 100644 index 0000000000000000000000000000000000000000..fa0c532f79c35d1e6eba7397536cf25ae1c276d5 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_10_IT.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 62.7994975, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "IT", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 66.47, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 67.32000000000001, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 68.97, + "stderr": 0.0 + } + ], + "average_accuracy": 67.58666666666666, + "best_prompt": 68.97, + "prompt_id": "p3", + "CPS": 68.01591499999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 56.08, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 58.199999999999996, + "stderr": 0.0 + } + ], + "average_accuracy": 57.14, + "best_prompt": 58.199999999999996, + "prompt_id": "p2", + "CPS": 57.58308 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_PL.json b/e3c_llm_results/unsloth/phi-4_10_PL.json new file mode 100644 index 0000000000000000000000000000000000000000..900d801d6a7a0752b34896e5e05ce1991e8f738f --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_10_PL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 55.851632499999994, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "PL", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 55.489999999999995, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 53.239999999999995, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.489999999999995, + "stderr": 0.0 + } + ], + "average_accuracy": 54.73999999999999, + "best_prompt": 55.489999999999995, + "prompt_id": "p1", + "CPS": 55.07382499999999 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 54.230000000000004, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 57.599999999999994, + "stderr": 0.0 + } + ], + "average_accuracy": 55.915, + "best_prompt": 57.599999999999994, + "prompt_id": "p2", + "CPS": 56.62944 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_SK.json b/e3c_llm_results/unsloth/phi-4_10_SK.json new file mode 100644 index 0000000000000000000000000000000000000000..5ae34425e6b791d353340c516375b3e919140c1b --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_10_SK.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 53.08822666666667, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SK", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 55.61000000000001, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 54.49, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.61000000000001, + "stderr": 0.0 + } + ], + "average_accuracy": 55.23666666666667, + "best_prompt": 55.61000000000001, + "prompt_id": "p1", + "CPS": 55.40238933333334 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.06, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 49.94, + "stderr": 0.0 + } + ], + "average_accuracy": 50.5, + "best_prompt": 51.06, + "prompt_id": "p1", + "CPS": 50.774064 + } + } +} \ No newline at end of file diff --git a/e3c_llm_results/unsloth/phi-4_10_SL.json b/e3c_llm_results/unsloth/phi-4_10_SL.json new file mode 100644 index 0000000000000000000000000000000000000000..ab7e893b84d037203dcfcba7c1a6c2fef7f044f7 --- /dev/null +++ b/e3c_llm_results/unsloth/phi-4_10_SL.json @@ -0,0 +1,63 @@ +{ + "average_CPS": 53.913512000000004, + "config": { + "model_name": "unsloth/phi-4", + "num_fewshot": "10", + "batch_size": 1, + "LANG": "SL", + "model": "unsloth/phi-4", + "base_model": "LlamaForCausalLM", + "revision": "c6220bde10fff762dbd72c3331894aa4cade249d", + "submitted_time": "2025-01-08 21:56:16+00:00", + "num_params_billion": 14.6595072, + "language": "en" + }, + "tasks": { + "ner": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 55.86, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 55.58, + "stderr": 0.0 + }, + { + "prompt": "p3", + "metric": "f1", + "value": 55.86, + "stderr": 0.0 + } + ], + "average_accuracy": 55.76666666666667, + "best_prompt": 55.86, + "prompt_id": "p1", + "CPS": 55.807864 + }, + "re": { + "prompts": [ + { + "prompt": "p1", + "metric": "f1", + "value": 51.17, + "stderr": 0.0 + }, + { + "prompt": "p2", + "metric": "f1", + "value": 52.32, + "stderr": 0.0 + } + ], + "average_accuracy": 51.745000000000005, + "best_prompt": 52.32, + "prompt_id": "p2", + "CPS": 52.01916000000001 + } + } +} \ No newline at end of file diff --git a/get_model_info.py b/get_model_info.py index bc5c6dd630dc417dac307eb6530d74cc7867d77a..cc0f02daf051e15a0aa09da5a43e4ff6154fb172 100644 --- a/get_model_info.py +++ b/get_model_info.py @@ -43,9 +43,9 @@ api = HfApi() # Directory paths # input_folder: Directory containing the output files of the lm-eval-harness library, including model accuracy metrics. #input_folder = "../evalita_llm_models_output/" -input_folder = "/home/sfarzi/leaderboard/evalita_llm_leaderboard/task_result/" +input_folder = "/home/sfarzi/leaderboard/MediLingua_Leaderboard/csv_files/outputs/" # output_folder: Directory where JSON files with model characteristics will be saved. -output_folder = "/home/sfarzi/leaderboard/evalita_llm_leaderboard/e3c_llm_requests/" +output_folder = "/home/sfarzi/leaderboard/MediLingua_Leaderboard/e3c_llm_requests/" # Creates the output folder if it doesn't exist os.makedirs(output_folder, exist_ok=True) @@ -55,7 +55,7 @@ model_pattern = re.compile(r"pretrained=([\w\-./]+)") # Scans files in the input folder for filename in os.listdir(input_folder): - if filename.endswith('.out'): + if filename.endswith('.txt'): file_path = os.path.join(input_folder, filename) # Reads the file content diff --git a/preprocess_models_output.py b/preprocess_models_output.py index a5d666145eaa39d0fcd6cc411b9264996bd8da57..6deb5a9843594a2d9794de166aa290a5d56ac63a 100644 --- a/preprocess_models_output.py +++ b/preprocess_models_output.py @@ -102,6 +102,16 @@ def calculate_task_metrics(task_info): def extract_data_from_file(file_path): """Extracts task and prompt data from a specified file.""" + LANG="" + if file_path.find ("__en__")!=-1 : LANG="EN" + if file_path.find ("__sl__")!=-1 : LANG="SL" + if file_path.find ("__it__")!=-1 : LANG="IT" + if file_path.find ("__gr__")!=-1 : LANG="GR" + if file_path.find ("__sk__")!=-1 : LANG="SK" + if file_path.find ("__pl__")!=-1 : LANG="PL" + if LANG=="" : + print ("ERROR: ",file_path) + with open(file_path, 'r') as file: lines = file.readlines() @@ -116,13 +126,13 @@ def extract_data_from_file(file_path): continue # Skips header lines - if line.startswith("| Tasks"): + if line.startswith("| Tasks") or line.startswith(" | Task"): continue # Extracts model configuration details - if line.startswith("hf (pretrained="): + if line.startswith("hf (pretrained=") or line.startswith("hf(pretrained="): start = line.find("pretrained=") + len("pretrained=") - end = line.find(",", start) + end = line.find(" )", start) pretrained_model = line[start:end] num_fewshot_match = re.search(r"num_fewshot:\s*([\w\d]+)", line) @@ -136,7 +146,7 @@ def extract_data_from_file(file_path): columns = line.split('|') if len(columns) != 11: continue - + print (columns) task_name = columns[1] metric = columns[5].strip() value = safe_float(columns[7]) @@ -154,7 +164,7 @@ def extract_data_from_file(file_path): {'prompts': [], 'average_accuracy': 0, 'best_prompt': None, 'prompt_id': None, 'CPS': None}) - elif task_name.startswith(" - ") and current_task: + elif task_name.startswith(" - ") and current_task: prompt_name = task_name[4:].strip() prompt_data = {'prompt': prompt_name, 'metric': metric, 'value': value * 100, 'stderr': stderr} @@ -193,7 +203,8 @@ def extract_data_from_file(file_path): config = { "model_name": pretrained_model, "num_fewshot": num_fewshot, - "batch_size": batch_size + "batch_size": batch_size, + "LANG": LANG } return {'average_CPS': average_CPS, 'config': config, 'tasks': tasks_data} @@ -218,18 +229,18 @@ This script executes the complete evaluation data processing workflow: - Organized by model organization/name - Contains complete evaluation results with metadata """ -directory_in_path = '/home/sfarzi/leaderboard/evalita_llm_leaderboard/task_result/' -directory_in_requests_path = '/home/sfarzi/leaderboard/evalita_llm_leaderboard/evalita_llm_requests/' -directory_out_results_path = '/home/sfarzi/leaderboard/evalita_llm_leaderboard/evalita_llm_results/' +directory_in_path = '/home/sfarzi/leaderboard/MediLingua_Leaderboard/csv_files/outputs/' +directory_in_requests_path = '/home/sfarzi/leaderboard/MediLingua_Leaderboard/e3c_llm_requests/' +directory_out_results_path = '/home/sfarzi/leaderboard/MediLingua_Leaderboard/e3c_llm_results/' for filename in os.listdir(directory_in_path): - if filename.endswith('.out'): + if filename.endswith('.txt'): file_path = os.path.join(directory_in_path, filename) json_output = extract_data_from_file(file_path) model_org_name, model_name = json_output['config']['model_name'].split('/') - + config_file_path = os.path.join(directory_in_requests_path, model_org_name, f"{model_name}.json") if os.path.exists(config_file_path): @@ -241,7 +252,7 @@ for filename in os.listdir(directory_in_path): org_folder_path = os.path.join(directory_out_results_path, model_org_name) os.makedirs(org_folder_path, exist_ok=True) - file_suffix = f"{json_output['config']['num_fewshot']}" + file_suffix = f"{json_output['config']['num_fewshot']}" +"_"+ f"{json_output['config']['LANG']}" output_file_path = os.path.join(org_folder_path, f"{model_name}_{file_suffix}.json") with open(output_file_path, 'w', newline="\n") as outfile: diff --git a/src/display/.ipynb_checkpoints/css_html_js-checkpoint.py b/src/display/.ipynb_checkpoints/css_html_js-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..225e9d4131c29ae147d488630b1f8052f3173afc --- /dev/null +++ b/src/display/.ipynb_checkpoints/css_html_js-checkpoint.py @@ -0,0 +1,106 @@ +custom_css = """ + +.markdown-text { + font-size: 16px !important; +} + +#models-to-add-text { + font-size: 18px !important; +} + +#citation-button span { + font-size: 16px !important; +} + +#citation-button textarea { + font-size: 16px !important; +} + +#citation-button > label > button { + margin: 6px; + transform: scale(1.3); +} + +#leaderboard-table { + margin-top: 15px +} + +#leaderboard-table-lite { + margin-top: 15px +} + +#search-bar-table-box > div:first-child { + background: none; + border: none; +} + +#search-bar { + padding: 0px; +} + +/* Limit the width of the first AutoEvalColumn so that names don't expand too much */ +#leaderboard-table td:nth-child(2), +#leaderboard-table th:nth-child(2) { + max-width: 400px; + overflow: auto; + white-space: nowrap; +} + +.tab-buttons button { + font-size: 20px; +} + +#scale-logo { + border-style: none !important; + box-shadow: none; + display: block; + margin-left: auto; + margin-right: auto; + max-width: 600px; +} + +#scale-logo .download { + display: none; +} +#filter_type{ + border: 0; + padding-left: 0; + padding-top: 0; +} +#filter_type label { + display: flex; +} +#filter_type label > span{ + margin-top: var(--spacing-lg); + margin-right: 0.5em; +} +#filter_type label > .wrap{ + width: 103px; +} +#filter_type label > .wrap .wrap-inner{ + padding: 2px; +} +#filter_type label > .wrap .wrap-inner input{ + width: 1px +} +#filter-columns-type{ + border:0; + padding:0.5; +} +#filter-columns-size{ + border:0; + padding:0.5; +} +#box-filter > .form{ + border: 0 +} + +""" + +get_window_url_params = """ + function(url_params) { + const params = new URLSearchParams(window.location.search); + url_params = Object.fromEntries(params); + return url_params; + } + """ diff --git a/src/display/.ipynb_checkpoints/formatting-checkpoint.py b/src/display/.ipynb_checkpoints/formatting-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..b46d29c9dba71be80866bfe46c5a77acd0dc50ce --- /dev/null +++ b/src/display/.ipynb_checkpoints/formatting-checkpoint.py @@ -0,0 +1,27 @@ +def model_hyperlink(link, model_name): + return f'{model_name}' + + +def make_clickable_model(model_name): + link = f"https://huggingface.co/{model_name}" + return model_hyperlink(link, model_name) + + +def styled_error(error): + return f"

{error}

" + + +def styled_warning(warn): + return f"

{warn}

" + + +def styled_message(message): + return f"

{message}

" + + +def has_no_nan_values(df, columns): + return df[columns].notna().all(axis=1) + + +def has_nan_values(df, columns): + return df[columns].isna().any(axis=1) diff --git a/src/submission/.ipynb_checkpoints/check_validity-checkpoint.py b/src/submission/.ipynb_checkpoints/check_validity-checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..3c3ce45c4dacd2d600544c87584ee72c81d3b956 --- /dev/null +++ b/src/submission/.ipynb_checkpoints/check_validity-checkpoint.py @@ -0,0 +1,99 @@ +import json +import os +import re +from collections import defaultdict +from datetime import datetime, timedelta, timezone + +import huggingface_hub +from huggingface_hub import ModelCard +from huggingface_hub.hf_api import ModelInfo +from transformers import AutoConfig +from transformers.models.auto.tokenization_auto import AutoTokenizer + +def check_model_card(repo_id: str) -> tuple[bool, str]: + """Checks if the model card and license exist and have been filled""" + try: + card = ModelCard.load(repo_id) + except huggingface_hub.utils.EntryNotFoundError: + return False, "Please add a model card to your model to explain how you trained/fine-tuned it." + + # Enforce license metadata + if card.data.license is None: + if not ("license_name" in card.data and "license_link" in card.data): + return False, ( + "License not found. Please add a license to your model card using the `license` metadata or a" + " `license_name`/`license_link` pair." + ) + + # Enforce card content + if len(card.text) < 200: + return False, "Please add a description to your model card, it is too short." + + return True, "" + +def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]: + """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses.""" + try: + config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) + if test_tokenizer: + try: + tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token) + except ValueError as e: + return ( + False, + f"uses a tokenizer which is not in a transformers release: {e}", + None + ) + except Exception as e: + return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None) + return True, None, config + + except ValueError: + return ( + False, + "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.", + None + ) + + except Exception as e: + return False, "was not found on hub!", None + + +def get_model_size(model_info: ModelInfo, precision: str): + """Gets the model size from the configuration, or the model name if the configuration does not contain the information.""" + try: + model_size = round(model_info.safetensors["total"] / 1e9, 3) + except (AttributeError, TypeError): + return 0 # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in example_app.py + + size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1 + model_size = size_factor * model_size + return model_size + +def get_model_arch(model_info: ModelInfo): + """Gets the model architecture from the configuration""" + return model_info.config.get("architectures", "Unknown") + +def already_submitted_models(requested_models_dir: str) -> set[str]: + """Gather a list of already submitted models to avoid duplicates""" + depth = 1 + file_names = [] + users_to_submission_dates = defaultdict(list) + + for root, _, files in os.walk(requested_models_dir): + current_depth = root.count(os.sep) - requested_models_dir.count(os.sep) + if current_depth == depth: + for file in files: + if not file.endswith(".json"): + continue + with open(os.path.join(root, file), "r") as f: + info = json.load(f) + file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}") + + # Select organisation + if info["model"].count("/") == 0 or "submitted_time" not in info: + continue + organisation, _ = info["model"].split("/") + users_to_submission_dates[organisation].append(info["submitted_time"]) + + return set(file_names), users_to_submission_dates