Spaces:

Browen0311
/

WritingScore

Sleeping

App Files Files Community

WritingScore / app.py

Browen0311

Update app.py

53e2dbe verified about 1 year ago

raw

history blame contribute delete

10 kB

	import os
	import gradio as gr
	from openai import OpenAI

	# 安裝並導入 Groq
	try:
	from groq import Groq
	except ImportError:
	os.system('pip install groq')
	from groq import Groq

	# 初始化 API clients
	openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
	groq_client = Groq(api_key=os.getenv('groq_key'))

	# 更新後的模型設定，包含最新的模型
	OPENAI_MODELS = [
	# GPT-4o 系列
	"gpt-4o", # 最新的旗艦模型
	"gpt-4o-2024-08-06", # 支援結構化輸出的版本
	"gpt-4o-mini", # 輕量快速版本
	"chatgpt-4o-latest", # ChatGPT使用的持續更新版本

	# GPT-4 Turbo 系列
	"gpt-4-turbo", # 最新的GPT-4 Turbo
	"gpt-3.5-turbo", # GPT-3.5 Turbo
	]

	GROQ_MODELS = ["llama3-8b-8192", "gemma2-9b-it"]

	MODEL_CONFIGS = {
	"openai": {
	# GPT-4o 系列配置
	"gpt-4o": {
	"max_tokens": 16384,
	"temperature": 0.7,
	"context_window": 128000
	},
	"gpt-4o-2024-08-06": {
	"max_tokens": 16384,
	"temperature": 0.7,
	"context_window": 128000
	},
	"gpt-4o-mini": {
	"max_tokens": 16384,
	"temperature": 0.7,
	"context_window": 128000
	},
	"chatgpt-4o-latest": {
	"max_tokens": 16384,
	"temperature": 0.7,
	"context_window": 128000
	},

	# GPT-4 Turbo 系列配置
	"gpt-4-turbo": {
	"max_tokens": 4096,
	"temperature": 0.7,
	"context_window": 128000
	},
	"gpt-3.5-turbo": {
	"max_tokens": 4096,
	"temperature": 0.7,
	"context_window": 16385
	}
	},
	"groq": {
	"llama3-8b-8192": {"max_tokens": 4090, "temperature": 0.7},
	"gemma2-9b-it": {"max_tokens": 1024, "temperature": 0.7}
	}
	}

	def get_llm_response(prompt, provider, model):
	"""獲取LLM回應的函數"""
	try:
	if provider == "openai":
	response = openai_client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": """你是一位資深的國文作文評閱委員，請依據以下評分規準進行評分：
	1. 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
	2. 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
	3. 結構安排 (20%): 審視文章結構是否完整、條理是否分明
	4. 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美

	請依三等九級制（A+、A、A-、B+、B、B-、C+、C、C-）評分。
	若有缺考、未作答、完全文不對題或作答內容完全照抄試題者，則給予0分。
	"""},
	{"role": "user", "content": prompt}
	],
	temperature=MODEL_CONFIGS["openai"][model]["temperature"],
	max_tokens=MODEL_CONFIGS["openai"][model]["max_tokens"]
	)
	return response.choices[0].message.content
	else: # groq
	completion = groq_client.chat.completions.create(
	model=model,
	messages=[
	{"role": "system", "content": "你是一位資深的國文作文評閱委員，請依據提供的評分規準進行評分。"},
	{"role": "user", "content": prompt}
	],
	**MODEL_CONFIGS["groq"][model],
	stream=False,
	top_p=1,
	stop=None
	)
	return completion.choices[0].message.content
	except Exception as e:
	return f"評分過程發生錯誤：{str(e)}"

	def evaluate_essay(message, additional_prompt, provider, model):
	"""評估作文的主函數"""
	if not message.strip():
	return [], gr.Markdown("### 請輸入作文內容進行評分")

	criteria = {
	'題旨發揮': {'weight': 0.4, 'max_score': 40},
	'資料掌握': {'weight': 0.2, 'max_score': 20},
	'結構安排': {'weight': 0.2, 'max_score': 20},
	'字句運用': {'weight': 0.2, 'max_score': 20}
	}

	grade_scores = {
	'A+': 95, 'A': 90, 'A-': 85,
	'B+': 80, 'B': 75, 'B-': 70,
	'C+': 65, 'C': 60, 'C-': 55,
	'0': 0
	}

	try:
	history = []
	total_score = 0
	history.append(("作文內容：", message))
	history.append(("", f"正在使用 {provider} ({model}) 進行評分分析..."))

	all_feedback = {}

	for criterion, details in criteria.items():
	prompt = f"""評估以下作文的{criterion}（權重{details['weight']*100}%）：

	作文內容：
	{message}

	{additional_prompt if additional_prompt else ''}

	請依三等九級制（A+、A、A-、B+、B、B-、C+、C、C-）評分，並提供詳細評語。
	如果是缺考、未作答、完全文不對題或作答內容完全照抄試題，請給予0分。

	請按以下格式回覆：
	等第：
	評語："""

	result = get_llm_response(prompt, provider, model)
	lines = result.lower().split('\n')
	grade = '0'
	comment = ""

	for line in lines:
	if '等第：' in line or '等第:' in line:
	grade_text = line.split('：')[-1].strip().upper()
	if grade_text in grade_scores:
	grade = grade_text
	elif '評語：' in line or '評語:' in line:
	comment = line.split('：')[-1].strip()

	weighted_score = (grade_scores[grade] / 100) * details['max_score']
	total_score += weighted_score

	feedback = f"### {criterion}\n"
	feedback += f"- 等第：{grade}\n"
	feedback += f"- 得分：{weighted_score:.1f}/{details['max_score']}\n"
	feedback += f"- 評語：{comment}\n"

	all_feedback[criterion] = feedback

	# 添加各項評分到歷史記錄
	for criterion in criteria:
	history.append(("", all_feedback[criterion]))

	# 生成總評
	total_evaluation = """
	### 綜合評語
	本作文各項得分如下：
	"""
	for criterion, details in criteria.items():
	total_evaluation += f"- {criterion}：{all_feedback[criterion].split('得分**：')[1].split('/')[0]}/{details['max_score']}\n"

	total_evaluation += f"\n### 總分：{total_score:.1f}/100"

	history.append(("", total_evaluation))

	total_score_display = f"""
	# 總評分結果
	## 使用模型：{provider} ({model})
	## 總分：{total_score:.1f}/100
	"""

	return history, gr.Markdown(total_score_display)

	except Exception as e:
	return [("", f"評分過程發生錯誤：{str(e)}")], gr.Markdown("### ❌ 評分失敗")

	def update_model_choices(provider):
	"""更新模型選擇的函數"""
	if provider == "openai":
	return gr.Dropdown(choices=OPENAI_MODELS, value="gpt-4o")
	else:
	return gr.Dropdown(choices=GROQ_MODELS, value="llama3-8b-8192")

	# 建立 Gradio 介面
	with gr.Blocks(title="國文作文自動評分系統") as demo:
	gr.Markdown("""
	# 國文作文自動評分系統
	## 評分標準說明
	本系統採用以下四個面向進行評分：
	- 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
	- 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
	- 結構安排 (20%): 審視文章結構是否完整、條理是否分明
	- 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美

	評分採用三等九級制（A+、A、A-、B+、B、B-、C+、C、C-）
	""")

	# 輸入區塊
	with gr.Column():
	msg = gr.Textbox(
	label="請輸入作文內容",
	placeholder="在此輸入作文...",
	lines=10
	)
	additional_prompt = gr.Textbox(
	label="額外評分提示（選填）",
	placeholder="可輸入額外的評分要求或提示...",
	lines=2
	)

	# 模型選擇
	with gr.Row():
	provider = gr.Radio(
	choices=["openai", "groq"],
	label="選擇服務提供者",
	value="openai"
	)
	model = gr.Dropdown(
	choices=OPENAI_MODELS,
	label="選擇模型",
	value="gpt-4o",
	interactive=True
	)

	# 當更換提供者時更新模型選項
	provider.change(
	fn=update_model_choices,
	inputs=provider,
	outputs=model
	)

	# 按鈕區
	with gr.Row():
	submit = gr.Button("開始評分", variant="primary", size="lg")
	clear = gr.Button("清除內容", size="lg")

	# 顯示區塊
	with gr.Row():
	# 左側顯示評分詳情
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(
	show_copy_button=True,
	render_markdown=True,
	height=600,
	label="評分詳情"
	)
	# 右側顯示總分
	with gr.Column(scale=1):
	score_display = gr.Markdown("### 等待評分...")

	# 事件處理
	submit.click(
	evaluate_essay,
	inputs=[msg, additional_prompt, provider, model],
	outputs=[chatbot, score_display]
	)
	clear.click(
	lambda: [None, None, "openai", "gpt-4o", [], gr.Markdown("### 等待評分...")],
	outputs=[msg, additional_prompt, provider, model, chatbot, score_display]
	)

	# 啟動應用
	if __name__ == "__main__":
	demo.launch()