Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from openai import OpenAI | |
| # 安裝並導入 Groq | |
| try: | |
| from groq import Groq | |
| except ImportError: | |
| os.system('pip install groq') | |
| from groq import Groq | |
| # 初始化 API clients | |
| openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) | |
| groq_client = Groq(api_key=os.getenv('groq_key')) | |
| # 更新後的模型設定,包含最新的模型 | |
| OPENAI_MODELS = [ | |
| # GPT-4o 系列 | |
| "gpt-4o", # 最新的旗艦模型 | |
| "gpt-4o-2024-08-06", # 支援結構化輸出的版本 | |
| "gpt-4o-mini", # 輕量快速版本 | |
| "chatgpt-4o-latest", # ChatGPT使用的持續更新版本 | |
| # GPT-4 Turbo 系列 | |
| "gpt-4-turbo", # 最新的GPT-4 Turbo | |
| "gpt-3.5-turbo", # GPT-3.5 Turbo | |
| ] | |
| GROQ_MODELS = ["llama3-8b-8192", "gemma2-9b-it"] | |
| MODEL_CONFIGS = { | |
| "openai": { | |
| # GPT-4o 系列配置 | |
| "gpt-4o": { | |
| "max_tokens": 16384, | |
| "temperature": 0.7, | |
| "context_window": 128000 | |
| }, | |
| "gpt-4o-2024-08-06": { | |
| "max_tokens": 16384, | |
| "temperature": 0.7, | |
| "context_window": 128000 | |
| }, | |
| "gpt-4o-mini": { | |
| "max_tokens": 16384, | |
| "temperature": 0.7, | |
| "context_window": 128000 | |
| }, | |
| "chatgpt-4o-latest": { | |
| "max_tokens": 16384, | |
| "temperature": 0.7, | |
| "context_window": 128000 | |
| }, | |
| # GPT-4 Turbo 系列配置 | |
| "gpt-4-turbo": { | |
| "max_tokens": 4096, | |
| "temperature": 0.7, | |
| "context_window": 128000 | |
| }, | |
| "gpt-3.5-turbo": { | |
| "max_tokens": 4096, | |
| "temperature": 0.7, | |
| "context_window": 16385 | |
| } | |
| }, | |
| "groq": { | |
| "llama3-8b-8192": {"max_tokens": 4090, "temperature": 0.7}, | |
| "gemma2-9b-it": {"max_tokens": 1024, "temperature": 0.7} | |
| } | |
| } | |
| def get_llm_response(prompt, provider, model): | |
| """獲取LLM回應的函數""" | |
| try: | |
| if provider == "openai": | |
| response = openai_client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": """你是一位資深的國文作文評閱委員,請依據以下評分規準進行評分: | |
| 1. 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻 | |
| 2. 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實 | |
| 3. 結構安排 (20%): 審視文章結構是否完整、條理是否分明 | |
| 4. 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美 | |
| 請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分。 | |
| 若有缺考、未作答、完全文不對題或作答內容完全照抄試題者,則給予0分。 | |
| """}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=MODEL_CONFIGS["openai"][model]["temperature"], | |
| max_tokens=MODEL_CONFIGS["openai"][model]["max_tokens"] | |
| ) | |
| return response.choices[0].message.content | |
| else: # groq | |
| completion = groq_client.chat.completions.create( | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": "你是一位資深的國文作文評閱委員,請依據提供的評分規準進行評分。"}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| **MODEL_CONFIGS["groq"][model], | |
| stream=False, | |
| top_p=1, | |
| stop=None | |
| ) | |
| return completion.choices[0].message.content | |
| except Exception as e: | |
| return f"評分過程發生錯誤:{str(e)}" | |
| def evaluate_essay(message, additional_prompt, provider, model): | |
| """評估作文的主函數""" | |
| if not message.strip(): | |
| return [], gr.Markdown("### 請輸入作文內容進行評分") | |
| criteria = { | |
| '題旨發揮': {'weight': 0.4, 'max_score': 40}, | |
| '資料掌握': {'weight': 0.2, 'max_score': 20}, | |
| '結構安排': {'weight': 0.2, 'max_score': 20}, | |
| '字句運用': {'weight': 0.2, 'max_score': 20} | |
| } | |
| grade_scores = { | |
| 'A+': 95, 'A': 90, 'A-': 85, | |
| 'B+': 80, 'B': 75, 'B-': 70, | |
| 'C+': 65, 'C': 60, 'C-': 55, | |
| '0': 0 | |
| } | |
| try: | |
| history = [] | |
| total_score = 0 | |
| history.append(("作文內容:", message)) | |
| history.append(("", f"正在使用 {provider} ({model}) 進行評分分析...")) | |
| all_feedback = {} | |
| for criterion, details in criteria.items(): | |
| prompt = f"""評估以下作文的{criterion}(權重{details['weight']*100}%): | |
| 作文內容: | |
| {message} | |
| {additional_prompt if additional_prompt else ''} | |
| 請依三等九級制(A+、A、A-、B+、B、B-、C+、C、C-)評分,並提供詳細評語。 | |
| 如果是缺考、未作答、完全文不對題或作答內容完全照抄試題,請給予0分。 | |
| 請按以下格式回覆: | |
| 等第: | |
| 評語:""" | |
| result = get_llm_response(prompt, provider, model) | |
| lines = result.lower().split('\n') | |
| grade = '0' | |
| comment = "" | |
| for line in lines: | |
| if '等第:' in line or '等第:' in line: | |
| grade_text = line.split(':')[-1].strip().upper() | |
| if grade_text in grade_scores: | |
| grade = grade_text | |
| elif '評語:' in line or '評語:' in line: | |
| comment = line.split(':')[-1].strip() | |
| weighted_score = (grade_scores[grade] / 100) * details['max_score'] | |
| total_score += weighted_score | |
| feedback = f"### {criterion}\n" | |
| feedback += f"- **等第**:{grade}\n" | |
| feedback += f"- **得分**:{weighted_score:.1f}/{details['max_score']}\n" | |
| feedback += f"- **評語**:{comment}\n" | |
| all_feedback[criterion] = feedback | |
| # 添加各項評分到歷史記錄 | |
| for criterion in criteria: | |
| history.append(("", all_feedback[criterion])) | |
| # 生成總評 | |
| total_evaluation = """ | |
| ### 綜合評語 | |
| 本作文各項得分如下: | |
| """ | |
| for criterion, details in criteria.items(): | |
| total_evaluation += f"- {criterion}:{all_feedback[criterion].split('得分**:')[1].split('/')[0]}/{details['max_score']}\n" | |
| total_evaluation += f"\n### 總分:{total_score:.1f}/100" | |
| history.append(("", total_evaluation)) | |
| total_score_display = f""" | |
| # 總評分結果 | |
| ## 使用模型:{provider} ({model}) | |
| ## 總分:{total_score:.1f}/100 | |
| """ | |
| return history, gr.Markdown(total_score_display) | |
| except Exception as e: | |
| return [("", f"評分過程發生錯誤:{str(e)}")], gr.Markdown("### ❌ 評分失敗") | |
| def update_model_choices(provider): | |
| """更新模型選擇的函數""" | |
| if provider == "openai": | |
| return gr.Dropdown(choices=OPENAI_MODELS, value="gpt-4o") | |
| else: | |
| return gr.Dropdown(choices=GROQ_MODELS, value="llama3-8b-8192") | |
| # 建立 Gradio 介面 | |
| with gr.Blocks(title="國文作文自動評分系統") as demo: | |
| gr.Markdown(""" | |
| # 國文作文自動評分系統 | |
| ## 評分標準說明 | |
| 本系統採用以下四個面向進行評分: | |
| - 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻 | |
| - 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實 | |
| - 結構安排 (20%): 審視文章結構是否完整、條理是否分明 | |
| - 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美 | |
| 評分採用三等九級制(A+、A、A-、B+、B、B-、C+、C、C-) | |
| """) | |
| # 輸入區塊 | |
| with gr.Column(): | |
| msg = gr.Textbox( | |
| label="請輸入作文內容", | |
| placeholder="在此輸入作文...", | |
| lines=10 | |
| ) | |
| additional_prompt = gr.Textbox( | |
| label="額外評分提示(選填)", | |
| placeholder="可輸入額外的評分要求或提示...", | |
| lines=2 | |
| ) | |
| # 模型選擇 | |
| with gr.Row(): | |
| provider = gr.Radio( | |
| choices=["openai", "groq"], | |
| label="選擇服務提供者", | |
| value="openai" | |
| ) | |
| model = gr.Dropdown( | |
| choices=OPENAI_MODELS, | |
| label="選擇模型", | |
| value="gpt-4o", | |
| interactive=True | |
| ) | |
| # 當更換提供者時更新模型選項 | |
| provider.change( | |
| fn=update_model_choices, | |
| inputs=provider, | |
| outputs=model | |
| ) | |
| # 按鈕區 | |
| with gr.Row(): | |
| submit = gr.Button("開始評分", variant="primary", size="lg") | |
| clear = gr.Button("清除內容", size="lg") | |
| # 顯示區塊 | |
| with gr.Row(): | |
| # 左側顯示評分詳情 | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot( | |
| show_copy_button=True, | |
| render_markdown=True, | |
| height=600, | |
| label="評分詳情" | |
| ) | |
| # 右側顯示總分 | |
| with gr.Column(scale=1): | |
| score_display = gr.Markdown("### 等待評分...") | |
| # 事件處理 | |
| submit.click( | |
| evaluate_essay, | |
| inputs=[msg, additional_prompt, provider, model], | |
| outputs=[chatbot, score_display] | |
| ) | |
| clear.click( | |
| lambda: [None, None, "openai", "gpt-4o", [], gr.Markdown("### 等待評分...")], | |
| outputs=[msg, additional_prompt, provider, model, chatbot, score_display] | |
| ) | |
| # 啟動應用 | |
| if __name__ == "__main__": | |
| demo.launch() |