WritingScore / app.py
Browen0311's picture
Update app.py
53e2dbe verified
import os
import gradio as gr
from openai import OpenAI
# 安裝並導入 Groq
try:
from groq import Groq
except ImportError:
os.system('pip install groq')
from groq import Groq
# 初始化 API clients
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
groq_client = Groq(api_key=os.getenv('groq_key'))
# 更新後的模型設定,包含最新的模型
OPENAI_MODELS = [
# GPT-4o 系列
"gpt-4o", # 最新的旗艦模型
"gpt-4o-2024-08-06", # 支援結構化輸出的版本
"gpt-4o-mini", # 輕量快速版本
"chatgpt-4o-latest", # ChatGPT使用的持續更新版本
# GPT-4 Turbo 系列
"gpt-4-turbo", # 最新的GPT-4 Turbo
"gpt-3.5-turbo", # GPT-3.5 Turbo
]
GROQ_MODELS = ["llama3-8b-8192", "gemma2-9b-it"]
MODEL_CONFIGS = {
"openai": {
# GPT-4o 系列配置
"gpt-4o": {
"max_tokens": 16384,
"temperature": 0.7,
"context_window": 128000
},
"gpt-4o-2024-08-06": {
"max_tokens": 16384,
"temperature": 0.7,
"context_window": 128000
},
"gpt-4o-mini": {
"max_tokens": 16384,
"temperature": 0.7,
"context_window": 128000
},
"chatgpt-4o-latest": {
"max_tokens": 16384,
"temperature": 0.7,
"context_window": 128000
},
# GPT-4 Turbo 系列配置
"gpt-4-turbo": {
"max_tokens": 4096,
"temperature": 0.7,
"context_window": 128000
},
"gpt-3.5-turbo": {
"max_tokens": 4096,
"temperature": 0.7,
"context_window": 16385
}
},
"groq": {
"llama3-8b-8192": {"max_tokens": 4090, "temperature": 0.7},
"gemma2-9b-it": {"max_tokens": 1024, "temperature": 0.7}
}
}
def get_llm_response(prompt, provider, model):
"""獲取LLM回應的函數"""
try:
if provider == "openai":
response = openai_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": """你是一位資深的國文作文評閱委員,請依據以下評分規準進行評分:
1. 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
2. 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
3. 結構安排 (20%): 審視文章結構是否完整、條理是否分明
4. 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美
請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分。
若有缺考、未作答、完全文不對題或作答內容完全照抄試題者,則給予0分。
"""},
{"role": "user", "content": prompt}
],
temperature=MODEL_CONFIGS["openai"][model]["temperature"],
max_tokens=MODEL_CONFIGS["openai"][model]["max_tokens"]
)
return response.choices[0].message.content
else: # groq
completion = groq_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "你是一位資深的國文作文評閱委員,請依據提供的評分規準進行評分。"},
{"role": "user", "content": prompt}
],
**MODEL_CONFIGS["groq"][model],
stream=False,
top_p=1,
stop=None
)
return completion.choices[0].message.content
except Exception as e:
return f"評分過程發生錯誤:{str(e)}"
def evaluate_essay(message, additional_prompt, provider, model):
"""評估作文的主函數"""
if not message.strip():
return [], gr.Markdown("### 請輸入作文內容進行評分")
criteria = {
'題旨發揮': {'weight': 0.4, 'max_score': 40},
'資料掌握': {'weight': 0.2, 'max_score': 20},
'結構安排': {'weight': 0.2, 'max_score': 20},
'字句運用': {'weight': 0.2, 'max_score': 20}
}
grade_scores = {
'A+': 95, 'A': 90, 'A-': 85,
'B+': 80, 'B': 75, 'B-': 70,
'C+': 65, 'C': 60, 'C-': 55,
'0': 0
}
try:
history = []
total_score = 0
history.append(("作文內容:", message))
history.append(("", f"正在使用 {provider} ({model}) 進行評分分析..."))
all_feedback = {}
for criterion, details in criteria.items():
prompt = f"""評估以下作文的{criterion}(權重{details['weight']*100}%):
作文內容:
{message}
{additional_prompt if additional_prompt else ''}
請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分,並提供詳細評語。
如果是缺考、未作答、完全文不對題或作答內容完全照抄試題,請給予0分。
請按以下格式回覆:
等第:
評語:"""
result = get_llm_response(prompt, provider, model)
lines = result.lower().split('\n')
grade = '0'
comment = ""
for line in lines:
if '等第:' in line or '等第:' in line:
grade_text = line.split(':')[-1].strip().upper()
if grade_text in grade_scores:
grade = grade_text
elif '評語:' in line or '評語:' in line:
comment = line.split(':')[-1].strip()
weighted_score = (grade_scores[grade] / 100) * details['max_score']
total_score += weighted_score
feedback = f"### {criterion}\n"
feedback += f"- **等第**:{grade}\n"
feedback += f"- **得分**:{weighted_score:.1f}/{details['max_score']}\n"
feedback += f"- **評語**:{comment}\n"
all_feedback[criterion] = feedback
# 添加各項評分到歷史記錄
for criterion in criteria:
history.append(("", all_feedback[criterion]))
# 生成總評
total_evaluation = """
### 綜合評語
本作文各項得分如下:
"""
for criterion, details in criteria.items():
total_evaluation += f"- {criterion}{all_feedback[criterion].split('得分**:')[1].split('/')[0]}/{details['max_score']}\n"
total_evaluation += f"\n### 總分:{total_score:.1f}/100"
history.append(("", total_evaluation))
total_score_display = f"""
# 總評分結果
## 使用模型:{provider} ({model})
## 總分:{total_score:.1f}/100
"""
return history, gr.Markdown(total_score_display)
except Exception as e:
return [("", f"評分過程發生錯誤:{str(e)}")], gr.Markdown("### ❌ 評分失敗")
def update_model_choices(provider):
"""更新模型選擇的函數"""
if provider == "openai":
return gr.Dropdown(choices=OPENAI_MODELS, value="gpt-4o")
else:
return gr.Dropdown(choices=GROQ_MODELS, value="llama3-8b-8192")
# 建立 Gradio 介面
with gr.Blocks(title="國文作文自動評分系統") as demo:
gr.Markdown("""
# 國文作文自動評分系統
## 評分標準說明
本系統採用以下四個面向進行評分:
- 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
- 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
- 結構安排 (20%): 審視文章結構是否完整、條理是否分明
- 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美
評分採用三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)
""")
# 輸入區塊
with gr.Column():
msg = gr.Textbox(
label="請輸入作文內容",
placeholder="在此輸入作文...",
lines=10
)
additional_prompt = gr.Textbox(
label="額外評分提示(選填)",
placeholder="可輸入額外的評分要求或提示...",
lines=2
)
# 模型選擇
with gr.Row():
provider = gr.Radio(
choices=["openai", "groq"],
label="選擇服務提供者",
value="openai"
)
model = gr.Dropdown(
choices=OPENAI_MODELS,
label="選擇模型",
value="gpt-4o",
interactive=True
)
# 當更換提供者時更新模型選項
provider.change(
fn=update_model_choices,
inputs=provider,
outputs=model
)
# 按鈕區
with gr.Row():
submit = gr.Button("開始評分", variant="primary", size="lg")
clear = gr.Button("清除內容", size="lg")
# 顯示區塊
with gr.Row():
# 左側顯示評分詳情
with gr.Column(scale=2):
chatbot = gr.Chatbot(
show_copy_button=True,
render_markdown=True,
height=600,
label="評分詳情"
)
# 右側顯示總分
with gr.Column(scale=1):
score_display = gr.Markdown("### 等待評分...")
# 事件處理
submit.click(
evaluate_essay,
inputs=[msg, additional_prompt, provider, model],
outputs=[chatbot, score_display]
)
clear.click(
lambda: [None, None, "openai", "gpt-4o", [], gr.Markdown("### 等待評分...")],
outputs=[msg, additional_prompt, provider, model, chatbot, score_display]
)
# 啟動應用
if __name__ == "__main__":
demo.launch()