Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,15 +13,56 @@ except ImportError:
|
|
| 13 |
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
| 14 |
groq_client = Groq(api_key=os.getenv('groq_key'))
|
| 15 |
|
| 16 |
-
#
|
| 17 |
-
OPENAI_MODELS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
GROQ_MODELS = ["llama3-8b-8192", "gemma2-9b-it"]
|
| 19 |
|
| 20 |
MODEL_CONFIGS = {
|
| 21 |
"openai": {
|
| 22 |
-
|
| 23 |
-
"gpt-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
},
|
| 26 |
"groq": {
|
| 27 |
"llama3-8b-8192": {"max_tokens": 4090, "temperature": 0.7},
|
|
@@ -30,31 +71,45 @@ MODEL_CONFIGS = {
|
|
| 30 |
}
|
| 31 |
|
| 32 |
def get_llm_response(prompt, provider, model):
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
def evaluate_essay(message, additional_prompt, provider, model):
|
|
|
|
| 58 |
if not message.strip():
|
| 59 |
return [], gr.Markdown("### 請輸入作文內容進行評分")
|
| 60 |
|
|
@@ -88,7 +143,7 @@ def evaluate_essay(message, additional_prompt, provider, model):
|
|
| 88 |
|
| 89 |
{additional_prompt if additional_prompt else ''}
|
| 90 |
|
| 91 |
-
請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C
|
| 92 |
如果是缺考、未作答、完全文不對題或作答內容完全照抄試題,請給予0分。
|
| 93 |
|
| 94 |
請按以下格式回覆:
|
|
@@ -118,9 +173,22 @@ def evaluate_essay(message, additional_prompt, provider, model):
|
|
| 118 |
|
| 119 |
all_feedback[criterion] = feedback
|
| 120 |
|
|
|
|
| 121 |
for criterion in criteria:
|
| 122 |
history.append(("", all_feedback[criterion]))
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
total_score_display = f"""
|
| 125 |
# 總評分結果
|
| 126 |
## 使用模型:{provider} ({model})
|
|
@@ -132,10 +200,10 @@ def evaluate_essay(message, additional_prompt, provider, model):
|
|
| 132 |
except Exception as e:
|
| 133 |
return [("", f"評分過程發生錯誤:{str(e)}")], gr.Markdown("### ❌ 評分失敗")
|
| 134 |
|
| 135 |
-
# 模型選擇切換函數
|
| 136 |
def update_model_choices(provider):
|
|
|
|
| 137 |
if provider == "openai":
|
| 138 |
-
return gr.Dropdown(choices=OPENAI_MODELS, value="gpt-
|
| 139 |
else:
|
| 140 |
return gr.Dropdown(choices=GROQ_MODELS, value="llama3-8b-8192")
|
| 141 |
|
|
@@ -145,25 +213,26 @@ with gr.Blocks(title="國文作文自動評分系統") as demo:
|
|
| 145 |
# 國文作文自動評分系統
|
| 146 |
## 評分標準說明
|
| 147 |
本系統採用以下四個面向進行評分:
|
| 148 |
-
- 題旨發揮 (40%)
|
| 149 |
-
- 資料掌握 (20%)
|
| 150 |
-
- 結構安排 (20%)
|
| 151 |
-
- 字句運用 (20%)
|
| 152 |
-
|
| 153 |
評分採用三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)
|
| 154 |
""")
|
| 155 |
|
| 156 |
# 輸入區塊
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
| 167 |
|
| 168 |
# 模型選擇
|
| 169 |
with gr.Row():
|
|
@@ -175,7 +244,7 @@ with gr.Blocks(title="國文作文自動評分系統") as demo:
|
|
| 175 |
model = gr.Dropdown(
|
| 176 |
choices=OPENAI_MODELS,
|
| 177 |
label="選擇模型",
|
| 178 |
-
value="gpt-
|
| 179 |
interactive=True
|
| 180 |
)
|
| 181 |
|
|
@@ -212,9 +281,10 @@ with gr.Blocks(title="國文作文自動評分系統") as demo:
|
|
| 212 |
outputs=[chatbot, score_display]
|
| 213 |
)
|
| 214 |
clear.click(
|
| 215 |
-
lambda: [None, None, "openai", "gpt-
|
| 216 |
outputs=[msg, additional_prompt, provider, model, chatbot, score_display]
|
| 217 |
)
|
| 218 |
|
| 219 |
# 啟動應用
|
| 220 |
-
|
|
|
|
|
|
| 13 |
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
| 14 |
groq_client = Groq(api_key=os.getenv('groq_key'))
|
| 15 |
|
| 16 |
+
# 更新後的模型設定,包含最新的模型
|
| 17 |
+
OPENAI_MODELS = [
|
| 18 |
+
# GPT-4o 系列
|
| 19 |
+
"gpt-4o", # 最新的旗艦模型
|
| 20 |
+
"gpt-4o-2024-08-06", # 支援結構化輸出的版本
|
| 21 |
+
"gpt-4o-mini", # 輕量快速版本
|
| 22 |
+
"chatgpt-4o-latest", # ChatGPT使用的持續更新版本
|
| 23 |
+
|
| 24 |
+
# GPT-4 Turbo 系列
|
| 25 |
+
"gpt-4-turbo", # 最新的GPT-4 Turbo
|
| 26 |
+
"gpt-3.5-turbo", # GPT-3.5 Turbo
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
GROQ_MODELS = ["llama3-8b-8192", "gemma2-9b-it"]
|
| 30 |
|
| 31 |
MODEL_CONFIGS = {
|
| 32 |
"openai": {
|
| 33 |
+
# GPT-4o 系列配置
|
| 34 |
+
"gpt-4o": {
|
| 35 |
+
"max_tokens": 16384,
|
| 36 |
+
"temperature": 0.7,
|
| 37 |
+
"context_window": 128000
|
| 38 |
+
},
|
| 39 |
+
"gpt-4o-2024-08-06": {
|
| 40 |
+
"max_tokens": 16384,
|
| 41 |
+
"temperature": 0.7,
|
| 42 |
+
"context_window": 128000
|
| 43 |
+
},
|
| 44 |
+
"gpt-4o-mini": {
|
| 45 |
+
"max_tokens": 16384,
|
| 46 |
+
"temperature": 0.7,
|
| 47 |
+
"context_window": 128000
|
| 48 |
+
},
|
| 49 |
+
"chatgpt-4o-latest": {
|
| 50 |
+
"max_tokens": 16384,
|
| 51 |
+
"temperature": 0.7,
|
| 52 |
+
"context_window": 128000
|
| 53 |
+
},
|
| 54 |
+
|
| 55 |
+
# GPT-4 Turbo 系列配置
|
| 56 |
+
"gpt-4-turbo": {
|
| 57 |
+
"max_tokens": 4096,
|
| 58 |
+
"temperature": 0.7,
|
| 59 |
+
"context_window": 128000
|
| 60 |
+
},
|
| 61 |
+
"gpt-3.5-turbo": {
|
| 62 |
+
"max_tokens": 4096,
|
| 63 |
+
"temperature": 0.7,
|
| 64 |
+
"context_window": 16385
|
| 65 |
+
}
|
| 66 |
},
|
| 67 |
"groq": {
|
| 68 |
"llama3-8b-8192": {"max_tokens": 4090, "temperature": 0.7},
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
def get_llm_response(prompt, provider, model):
|
| 74 |
+
"""獲取LLM回應的函數"""
|
| 75 |
+
try:
|
| 76 |
+
if provider == "openai":
|
| 77 |
+
response = openai_client.chat.completions.create(
|
| 78 |
+
model=model,
|
| 79 |
+
messages=[
|
| 80 |
+
{"role": "system", "content": """你是一位資深的國文作文評閱委員,請依據以下評分規準進行評分:
|
| 81 |
+
1. 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
|
| 82 |
+
2. 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
|
| 83 |
+
3. 結構安排 (20%): 審視文章結構是否完整、條理是否分明
|
| 84 |
+
4. 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美
|
| 85 |
+
|
| 86 |
+
請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分。
|
| 87 |
+
若有缺考、未作答、完全文不對題或作答內容完全照抄試題者,則給予0分。
|
| 88 |
+
"""},
|
| 89 |
+
{"role": "user", "content": prompt}
|
| 90 |
+
],
|
| 91 |
+
temperature=MODEL_CONFIGS["openai"][model]["temperature"],
|
| 92 |
+
max_tokens=MODEL_CONFIGS["openai"][model]["max_tokens"]
|
| 93 |
+
)
|
| 94 |
+
return response.choices[0].message.content
|
| 95 |
+
else: # groq
|
| 96 |
+
completion = groq_client.chat.completions.create(
|
| 97 |
+
model=model,
|
| 98 |
+
messages=[
|
| 99 |
+
{"role": "system", "content": "你是一位資深的國文作文評閱委員,請依據提供的評分規準進行評分。"},
|
| 100 |
+
{"role": "user", "content": prompt}
|
| 101 |
+
],
|
| 102 |
+
**MODEL_CONFIGS["groq"][model],
|
| 103 |
+
stream=False,
|
| 104 |
+
top_p=1,
|
| 105 |
+
stop=None
|
| 106 |
+
)
|
| 107 |
+
return completion.choices[0].message.content
|
| 108 |
+
except Exception as e:
|
| 109 |
+
return f"評分過程發生錯誤:{str(e)}"
|
| 110 |
|
| 111 |
def evaluate_essay(message, additional_prompt, provider, model):
|
| 112 |
+
"""評估作文的主函數"""
|
| 113 |
if not message.strip():
|
| 114 |
return [], gr.Markdown("### 請輸入作文內容進行評分")
|
| 115 |
|
|
|
|
| 143 |
|
| 144 |
{additional_prompt if additional_prompt else ''}
|
| 145 |
|
| 146 |
+
請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分,並提供詳細評語。
|
| 147 |
如果是缺考、未作答、完全文不對題或作答內容完全照抄試題,請給予0分。
|
| 148 |
|
| 149 |
請按以下格式回覆:
|
|
|
|
| 173 |
|
| 174 |
all_feedback[criterion] = feedback
|
| 175 |
|
| 176 |
+
# 添加各項評分到歷史記錄
|
| 177 |
for criterion in criteria:
|
| 178 |
history.append(("", all_feedback[criterion]))
|
| 179 |
|
| 180 |
+
# 生成總評
|
| 181 |
+
total_evaluation = """
|
| 182 |
+
### 綜合評語
|
| 183 |
+
本作文各項得分如下:
|
| 184 |
+
"""
|
| 185 |
+
for criterion, details in criteria.items():
|
| 186 |
+
total_evaluation += f"- {criterion}:{all_feedback[criterion].split('得分**:')[1].split('/')[0]}/{details['max_score']}\n"
|
| 187 |
+
|
| 188 |
+
total_evaluation += f"\n### 總分:{total_score:.1f}/100"
|
| 189 |
+
|
| 190 |
+
history.append(("", total_evaluation))
|
| 191 |
+
|
| 192 |
total_score_display = f"""
|
| 193 |
# 總評分結果
|
| 194 |
## 使用模型:{provider} ({model})
|
|
|
|
| 200 |
except Exception as e:
|
| 201 |
return [("", f"評分過程發生錯誤:{str(e)}")], gr.Markdown("### ❌ 評分失敗")
|
| 202 |
|
|
|
|
| 203 |
def update_model_choices(provider):
|
| 204 |
+
"""更新模型選擇的函數"""
|
| 205 |
if provider == "openai":
|
| 206 |
+
return gr.Dropdown(choices=OPENAI_MODELS, value="gpt-4o")
|
| 207 |
else:
|
| 208 |
return gr.Dropdown(choices=GROQ_MODELS, value="llama3-8b-8192")
|
| 209 |
|
|
|
|
| 213 |
# 國文作文自動評分系統
|
| 214 |
## 評分標準說明
|
| 215 |
本系統採用以下四個面向進行評分:
|
| 216 |
+
- 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
|
| 217 |
+
- 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
|
| 218 |
+
- 結構安排 (20%): 審視文章結構是否完整、條理是否分明
|
| 219 |
+
- 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美
|
| 220 |
+
|
| 221 |
評分採用三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)
|
| 222 |
""")
|
| 223 |
|
| 224 |
# 輸入區塊
|
| 225 |
+
with gr.Column():
|
| 226 |
+
msg = gr.Textbox(
|
| 227 |
+
label="請輸入作文內容",
|
| 228 |
+
placeholder="在此輸入作文...",
|
| 229 |
+
lines=10
|
| 230 |
+
)
|
| 231 |
+
additional_prompt = gr.Textbox(
|
| 232 |
+
label="額外評分提示(選填)",
|
| 233 |
+
placeholder="可輸入額外的評分要求或提示...",
|
| 234 |
+
lines=2
|
| 235 |
+
)
|
| 236 |
|
| 237 |
# 模型選擇
|
| 238 |
with gr.Row():
|
|
|
|
| 244 |
model = gr.Dropdown(
|
| 245 |
choices=OPENAI_MODELS,
|
| 246 |
label="選擇模型",
|
| 247 |
+
value="gpt-4o",
|
| 248 |
interactive=True
|
| 249 |
)
|
| 250 |
|
|
|
|
| 281 |
outputs=[chatbot, score_display]
|
| 282 |
)
|
| 283 |
clear.click(
|
| 284 |
+
lambda: [None, None, "openai", "gpt-4o", [], gr.Markdown("### 等待評分...")],
|
| 285 |
outputs=[msg, additional_prompt, provider, model, chatbot, score_display]
|
| 286 |
)
|
| 287 |
|
| 288 |
# 啟動應用
|
| 289 |
+
if __name__ == "__main__":
|
| 290 |
+
demo.launch()
|