| | """Generate json file for webpage.""" |
| | import json |
| | import os |
| | import re |
| |
|
| | models = ["alpaca", "llama", "gpt35", "bard"] |
| |
|
| |
|
| | def read_jsonl(path: str, key: str = None): |
| | data = [] |
| | with open(os.path.expanduser(path)) as f: |
| | for line in f: |
| | if not line: |
| | continue |
| | data.append(json.loads(line)) |
| | if key is not None: |
| | data.sort(key=lambda x: x[key]) |
| | data = {item[key]: item for item in data} |
| | return data |
| |
|
| |
|
| | def trim_hanging_lines(s: str, n: int) -> str: |
| | s = s.strip() |
| | for _ in range(n): |
| | s = s.split("\n", 1)[1].strip() |
| | return s |
| |
|
| |
|
| | if __name__ == "__main__": |
| | questions = read_jsonl("table/question.jsonl", key="question_id") |
| |
|
| | alpaca_answers = read_jsonl( |
| | "table/answer/answer_alpaca-13b.jsonl", key="question_id" |
| | ) |
| | bard_answers = read_jsonl("table/answer/answer_bard.jsonl", key="question_id") |
| | gpt35_answers = read_jsonl("table/answer/answer_gpt35.jsonl", key="question_id") |
| | llama_answers = read_jsonl("table/answer/answer_llama-13b.jsonl", key="question_id") |
| | vicuna_answers = read_jsonl( |
| | "table/answer/answer_vicuna-13b.jsonl", key="question_id" |
| | ) |
| |
|
| | review_alpaca = read_jsonl( |
| | "table/review/review_alpaca-13b_vicuna-13b.jsonl", key="question_id" |
| | ) |
| | review_bard = read_jsonl( |
| | "table/review/review_bard_vicuna-13b.jsonl", key="question_id" |
| | ) |
| | review_gpt35 = read_jsonl( |
| | "table/review/review_gpt35_vicuna-13b.jsonl", key="question_id" |
| | ) |
| | review_llama = read_jsonl( |
| | "table/review/review_llama-13b_vicuna-13b.jsonl", key="question_id" |
| | ) |
| |
|
| | records = [] |
| | for qid in questions.keys(): |
| | r = { |
| | "id": qid, |
| | "category": questions[qid]["category"], |
| | "question": questions[qid]["text"], |
| | "answers": { |
| | "alpaca": alpaca_answers[qid]["text"], |
| | "llama": llama_answers[qid]["text"], |
| | "bard": bard_answers[qid]["text"], |
| | "gpt35": gpt35_answers[qid]["text"], |
| | "vicuna": vicuna_answers[qid]["text"], |
| | }, |
| | "evaluations": { |
| | "alpaca": review_alpaca[qid]["text"], |
| | "llama": review_llama[qid]["text"], |
| | "bard": review_bard[qid]["text"], |
| | "gpt35": review_gpt35[qid]["text"], |
| | }, |
| | "scores": { |
| | "alpaca": review_alpaca[qid]["score"], |
| | "llama": review_llama[qid]["score"], |
| | "bard": review_bard[qid]["score"], |
| | "gpt35": review_gpt35[qid]["score"], |
| | }, |
| | } |
| |
|
| | |
| | cleaned_evals = {} |
| | for k, v in r["evaluations"].items(): |
| | v = v.strip() |
| | lines = v.split("\n") |
| | |
| | if re.match(r"\d+[, ]+\d+", lines[0]): |
| | lines = lines[1:] |
| | v = "\n".join(lines) |
| | cleaned_evals[k] = v.replace("Assistant 1", "**Assistant 1**").replace( |
| | "Assistant 2", "**Assistant 2**" |
| | ) |
| |
|
| | r["evaluations"] = cleaned_evals |
| | records.append(r) |
| |
|
| | |
| | for r in records: |
| | if r["id"] <= 20: |
| | r["id"] += 60 |
| | else: |
| | r["id"] -= 20 |
| | for r in records: |
| | if r["id"] <= 50: |
| | r["id"] += 10 |
| | elif 50 < r["id"] <= 60: |
| | r["id"] -= 50 |
| | for r in records: |
| | if r["id"] == 7: |
| | r["id"] = 1 |
| | elif r["id"] < 7: |
| | r["id"] += 1 |
| |
|
| | records.sort(key=lambda x: x["id"]) |
| |
|
| | |
| | with open("webpage/data.json", "w") as f: |
| | json.dump({"questions": records, "models": models}, f, indent=2) |
| |
|