codescripts / rougecal.py
f541119578's picture
Upload folder using huggingface_hub
fdf190d verified
import json
import random
from rouge import Rouge
f1 = open("/home/aiscuser/fhw/data/code_alpaca_20k.json", "r+")
f2 = open("/home/aiscuser/fhw/data/llama_python_7w.json", "r+")
f1d = json.load(f1)
lines = f2.readlines()
#reference = []
"""
for item in f1d:
reference.append(item['instruction'])
"""
fw = open("/home/aiscuser/fhw/data/rouge.json", "w+")
random.shuffle(lines)
rouge = Rouge()
final = []
from tqdm import tqdm
for line in tqdm(lines[:500]):
d = json.loads(line)
candidate = [d['instruction']]
cur = {"rouge-l": {"f": 0.}}
for item in f1d:
rouge_score = rouge.get_scores(hyps=candidate, refs=[item['instruction']], avg=True)
if rouge_score["rouge-l"]["f"] > cur["rouge-l"]["f"]:
cur = rouge_score
print(cur)
final.append(cur)
#value = max(rouge_score, key=lambda(item):item['rouge-l'])
#print(value)
fw.write(json.dumps({"final": final}))