import os import gradio as gr from openai import OpenAI # 安裝並導入 Groq try: from groq import Groq except ImportError: os.system('pip install groq') from groq import Groq # 初始化 API clients openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) groq_client = Groq(api_key=os.getenv('groq_key')) # 更新後的模型設定,包含最新的模型 OPENAI_MODELS = [ # GPT-4o 系列 "gpt-4o", # 最新的旗艦模型 "gpt-4o-2024-08-06", # 支援結構化輸出的版本 "gpt-4o-mini", # 輕量快速版本 "chatgpt-4o-latest", # ChatGPT使用的持續更新版本 # GPT-4 Turbo 系列 "gpt-4-turbo", # 最新的GPT-4 Turbo "gpt-3.5-turbo", # GPT-3.5 Turbo ] GROQ_MODELS = ["llama3-8b-8192", "gemma2-9b-it"] MODEL_CONFIGS = { "openai": { # GPT-4o 系列配置 "gpt-4o": { "max_tokens": 16384, "temperature": 0.7, "context_window": 128000 }, "gpt-4o-2024-08-06": { "max_tokens": 16384, "temperature": 0.7, "context_window": 128000 }, "gpt-4o-mini": { "max_tokens": 16384, "temperature": 0.7, "context_window": 128000 }, "chatgpt-4o-latest": { "max_tokens": 16384, "temperature": 0.7, "context_window": 128000 }, # GPT-4 Turbo 系列配置 "gpt-4-turbo": { "max_tokens": 4096, "temperature": 0.7, "context_window": 128000 }, "gpt-3.5-turbo": { "max_tokens": 4096, "temperature": 0.7, "context_window": 16385 } }, "groq": { "llama3-8b-8192": {"max_tokens": 4090, "temperature": 0.7}, "gemma2-9b-it": {"max_tokens": 1024, "temperature": 0.7} } } def get_llm_response(prompt, provider, model): """獲取LLM回應的函數""" try: if provider == "openai": response = openai_client.chat.completions.create( model=model, messages=[ {"role": "system", "content": """你是一位資深的國文作文評閱委員,請依據以下評分規準進行評分: 1. 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻 2. 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實 3. 結構安排 (20%): 審視文章結構是否完整、條理是否分明 4. 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美 請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分。 若有缺考、未作答、完全文不對題或作答內容完全照抄試題者,則給予0分。 """}, {"role": "user", "content": prompt} ], temperature=MODEL_CONFIGS["openai"][model]["temperature"], max_tokens=MODEL_CONFIGS["openai"][model]["max_tokens"] ) return response.choices[0].message.content else: # groq completion = groq_client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "你是一位資深的國文作文評閱委員,請依據提供的評分規準進行評分。"}, {"role": "user", "content": prompt} ], **MODEL_CONFIGS["groq"][model], stream=False, top_p=1, stop=None ) return completion.choices[0].message.content except Exception as e: return f"評分過程發生錯誤:{str(e)}" def evaluate_essay(message, additional_prompt, provider, model): """評估作文的主函數""" if not message.strip(): return [], gr.Markdown("### 請輸入作文內容進行評分") criteria = { '題旨發揮': {'weight': 0.4, 'max_score': 40}, '資料掌握': {'weight': 0.2, 'max_score': 20}, '結構安排': {'weight': 0.2, 'max_score': 20}, '字句運用': {'weight': 0.2, 'max_score': 20} } grade_scores = { 'A+': 95, 'A': 90, 'A-': 85, 'B+': 80, 'B': 75, 'B-': 70, 'C+': 65, 'C': 60, 'C-': 55, '0': 0 } try: history = [] total_score = 0 history.append(("作文內容:", message)) history.append(("", f"正在使用 {provider} ({model}) 進行評分分析...")) all_feedback = {} for criterion, details in criteria.items(): prompt = f"""評估以下作文的{criterion}(權重{details['weight']*100}%): 作文內容: {message} {additional_prompt if additional_prompt else ''} 請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分,並提供詳細評語。 如果是缺考、未作答、完全文不對題或作答內容完全照抄試題,請給予0分。 請按以下格式回覆: 等第: 評語:""" result = get_llm_response(prompt, provider, model) lines = result.lower().split('\n') grade = '0' comment = "" for line in lines: if '等第:' in line or '等第:' in line: grade_text = line.split(':')[-1].strip().upper() if grade_text in grade_scores: grade = grade_text elif '評語:' in line or '評語:' in line: comment = line.split(':')[-1].strip() weighted_score = (grade_scores[grade] / 100) * details['max_score'] total_score += weighted_score feedback = f"### {criterion}\n" feedback += f"- **等第**:{grade}\n" feedback += f"- **得分**:{weighted_score:.1f}/{details['max_score']}\n" feedback += f"- **評語**:{comment}\n" all_feedback[criterion] = feedback # 添加各項評分到歷史記錄 for criterion in criteria: history.append(("", all_feedback[criterion])) # 生成總評 total_evaluation = """ ### 綜合評語 本作文各項得分如下: """ for criterion, details in criteria.items(): total_evaluation += f"- {criterion}:{all_feedback[criterion].split('得分**:')[1].split('/')[0]}/{details['max_score']}\n" total_evaluation += f"\n### 總分:{total_score:.1f}/100" history.append(("", total_evaluation)) total_score_display = f""" # 總評分結果 ## 使用模型:{provider} ({model}) ## 總分:{total_score:.1f}/100 """ return history, gr.Markdown(total_score_display) except Exception as e: return [("", f"評分過程發生錯誤:{str(e)}")], gr.Markdown("### ❌ 評分失敗") def update_model_choices(provider): """更新模型選擇的函數""" if provider == "openai": return gr.Dropdown(choices=OPENAI_MODELS, value="gpt-4o") else: return gr.Dropdown(choices=GROQ_MODELS, value="llama3-8b-8192") # 建立 Gradio 介面 with gr.Blocks(title="國文作文自動評分系統") as demo: gr.Markdown(""" # 國文作文自動評分系統 ## 評分標準說明 本系統採用以下四個面向進行評分: - 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻 - 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實 - 結構安排 (20%): 審視文章結構是否完整、條理是否分明 - 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美 評分採用三等九級制(A+、A、A-、B+、B、B-、C+、C、C-) """) # 輸入區塊 with gr.Column(): msg = gr.Textbox( label="請輸入作文內容", placeholder="在此輸入作文...", lines=10 ) additional_prompt = gr.Textbox( label="額外評分提示(選填)", placeholder="可輸入額外的評分要求或提示...", lines=2 ) # 模型選擇 with gr.Row(): provider = gr.Radio( choices=["openai", "groq"], label="選擇服務提供者", value="openai" ) model = gr.Dropdown( choices=OPENAI_MODELS, label="選擇模型", value="gpt-4o", interactive=True ) # 當更換提供者時更新模型選項 provider.change( fn=update_model_choices, inputs=provider, outputs=model ) # 按鈕區 with gr.Row(): submit = gr.Button("開始評分", variant="primary", size="lg") clear = gr.Button("清除內容", size="lg") # 顯示區塊 with gr.Row(): # 左側顯示評分詳情 with gr.Column(scale=2): chatbot = gr.Chatbot( show_copy_button=True, render_markdown=True, height=600, label="評分詳情" ) # 右側顯示總分 with gr.Column(scale=1): score_display = gr.Markdown("### 等待評分...") # 事件處理 submit.click( evaluate_essay, inputs=[msg, additional_prompt, provider, model], outputs=[chatbot, score_display] ) clear.click( lambda: [None, None, "openai", "gpt-4o", [], gr.Markdown("### 等待評分...")], outputs=[msg, additional_prompt, provider, model, chatbot, score_display] ) # 啟動應用 if __name__ == "__main__": demo.launch()