wabang commited on
Commit
6c9234d
ยท
verified ยท
1 Parent(s): 2566659

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import torch
5
+
6
+ # ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
7
+ model_name = "meta-llama/Meta-Llama-3.1-8B"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
10
+
11
+ # KMMLU ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ (์˜ˆ์‹œ)
12
+ df = pd.read_csv("kmmlu_sample.csv")
13
+
14
+ def evaluate_model(question, choices):
15
+ prompt = f"์งˆ๋ฌธ: {question}\n\n์„ ํƒ์ง€:\n"
16
+ for i, choice in enumerate(choices):
17
+ prompt += f"{chr(65 + i)}. {choice}\n"
18
+ prompt += "\n๋‹ต๋ณ€:"
19
+
20
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
21
+ with torch.no_grad():
22
+ outputs = model.generate(**inputs, max_new_tokens=1, temperature=0.0)
23
+
24
+ answer = tokenizer.decode(outputs[0][-1:], skip_special_tokens=True).strip()
25
+ return answer
26
+
27
+ def run_kmmlu_test(subject):
28
+ subject_df = df[df['subject'] == subject]
29
+ correct = 0
30
+ total = len(subject_df)
31
+
32
+ results = []
33
+ for _, row in subject_df.iterrows():
34
+ question = row['question']
35
+ choices = [row['A'], row['B'], row['C'], row['D']]
36
+ correct_answer = row['answer']
37
+
38
+ model_answer = evaluate_model(question, choices)
39
+ is_correct = model_answer == correct_answer
40
+
41
+ if is_correct:
42
+ correct += 1
43
+
44
+ results.append(f"์งˆ๋ฌธ: {question}\n๋ชจ๋ธ ๋‹ต๋ณ€: {model_answer}\n์ •๋‹ต: {correct_answer}\n์ •ํ™•๋„: {'๋งž์Œ' if is_correct else 'ํ‹€๋ฆผ'}\n")
45
+
46
+ accuracy = correct / total
47
+ summary = f"์ฃผ์ œ: {subject}\n์ •ํ™•๋„: {accuracy:.2%} ({correct}/{total})\n\n"
48
+ return summary + "\n".join(results)
49
+
50
+ subjects = df['subject'].unique().tolist()
51
+
52
+ iface = gr.Interface(
53
+ fn=run_kmmlu_test,
54
+ inputs=gr.Dropdown(choices=subjects, label="์ฃผ์ œ ์„ ํƒ"),
55
+ outputs="text",
56
+ title="Llama 3.1์„ ์ด์šฉํ•œ