Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from datasets import load_dataset
|
|
| 8 |
import accelerate
|
| 9 |
|
| 10 |
# 환경 변수에서 토큰 가져오기
|
| 11 |
-
hf_token = os.environ.get("HF_TOKEN",
|
| 12 |
|
| 13 |
# Hugging Face 로그인
|
| 14 |
if hf_token:
|
|
@@ -40,13 +40,12 @@ def evaluate_model(question, choices):
|
|
| 40 |
answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip()
|
| 41 |
return answer
|
| 42 |
|
| 43 |
-
def run_kmmlu_test(
|
| 44 |
-
subject_df = df[df['subject'] == subject]
|
| 45 |
correct = 0
|
| 46 |
-
total = len(
|
| 47 |
|
| 48 |
results = []
|
| 49 |
-
for _, row in
|
| 50 |
question = row['question']
|
| 51 |
choices = [row['A'], row['B'], row['C'], row['D']]
|
| 52 |
correct_answer = row['answer']
|
|
@@ -60,18 +59,17 @@ def run_kmmlu_test(subject):
|
|
| 60 |
results.append(f"질문: {question}\n모델 답변: {model_answer}\n정답: {correct_answer}\n정확도: {'맞음' if is_correct else '틀림'}\n")
|
| 61 |
|
| 62 |
accuracy = correct / total
|
| 63 |
-
summary = f"
|
| 64 |
return summary + "\n".join(results)
|
| 65 |
|
| 66 |
-
subjects=df['subject'].unique().tolist()
|
| 67 |
|
| 68 |
iface = gr.Interface(
|
| 69 |
fn=run_kmmlu_test,
|
| 70 |
-
|
| 71 |
-
inputs=gr.Dropdown(choices=subjects, label="주제 선택"),
|
| 72 |
outputs="text",
|
| 73 |
title="Llama 3를 이용한 KMMLU 테스트",
|
| 74 |
-
description="
|
| 75 |
)
|
| 76 |
|
| 77 |
iface.launch()
|
|
|
|
| 8 |
import accelerate
|
| 9 |
|
| 10 |
# 환경 변수에서 토큰 가져오기
|
| 11 |
+
hf_token = os.environ.get("HF_TOKEN", "Accounting")
|
| 12 |
|
| 13 |
# Hugging Face 로그인
|
| 14 |
if hf_token:
|
|
|
|
| 40 |
answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip()
|
| 41 |
return answer
|
| 42 |
|
| 43 |
+
def run_kmmlu_test():
|
|
|
|
| 44 |
correct = 0
|
| 45 |
+
total = len(df)
|
| 46 |
|
| 47 |
results = []
|
| 48 |
+
for _, row in df.iterrows():
|
| 49 |
question = row['question']
|
| 50 |
choices = [row['A'], row['B'], row['C'], row['D']]
|
| 51 |
correct_answer = row['answer']
|
|
|
|
| 59 |
results.append(f"질문: {question}\n모델 답변: {model_answer}\n정답: {correct_answer}\n정확도: {'맞음' if is_correct else '틀림'}\n")
|
| 60 |
|
| 61 |
accuracy = correct / total
|
| 62 |
+
summary = f"전체 테스트 결과\n정확도: {accuracy:.2%} ({correct}/{total})\n\n"
|
| 63 |
return summary + "\n".join(results)
|
| 64 |
|
|
|
|
| 65 |
|
| 66 |
iface = gr.Interface(
|
| 67 |
fn=run_kmmlu_test,
|
| 68 |
+
inputs=None,
|
| 69 |
+
#inputs=gr.Dropdown(choices=subjects, label="주제 선택"),
|
| 70 |
outputs="text",
|
| 71 |
title="Llama 3를 이용한 KMMLU 테스트",
|
| 72 |
+
description="Accounting 영역에 대한 KMMLU 테스트 수행"
|
| 73 |
)
|
| 74 |
|
| 75 |
iface.launch()
|