Spaces:

Browen0311
/

WritingScore

Sleeping

File size: 10,006 Bytes

import os
import gradio as gr
from openai import OpenAI

# 安裝並導入 Groq
try:
    from groq import Groq
except ImportError:
    os.system('pip install groq')
    from groq import Groq

# 初始化 API clients
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
groq_client = Groq(api_key=os.getenv('groq_key'))

# 更新後的模型設定，包含最新的模型
OPENAI_MODELS = [
    # GPT-4o 系列
    "gpt-4o",                    # 最新的旗艦模型
    "gpt-4o-2024-08-06",        # 支援結構化輸出的版本
    "gpt-4o-mini",              # 輕量快速版本
    "chatgpt-4o-latest",        # ChatGPT使用的持續更新版本
    
    # GPT-4 Turbo 系列
    "gpt-4-turbo",              # 最新的GPT-4 Turbo
    "gpt-3.5-turbo",            # GPT-3.5 Turbo
]

GROQ_MODELS = ["llama3-8b-8192", "gemma2-9b-it"]

MODEL_CONFIGS = {
    "openai": {
        # GPT-4o 系列配置
        "gpt-4o": {
            "max_tokens": 16384,
            "temperature": 0.7,
            "context_window": 128000
        },
        "gpt-4o-2024-08-06": {
            "max_tokens": 16384,
            "temperature": 0.7,
            "context_window": 128000
        },
        "gpt-4o-mini": {
            "max_tokens": 16384,
            "temperature": 0.7,
            "context_window": 128000
        },
        "chatgpt-4o-latest": {
            "max_tokens": 16384,
            "temperature": 0.7,
            "context_window": 128000
        },
        
        # GPT-4 Turbo 系列配置
        "gpt-4-turbo": {
            "max_tokens": 4096,
            "temperature": 0.7,
            "context_window": 128000
        },
        "gpt-3.5-turbo": {
            "max_tokens": 4096,
            "temperature": 0.7,
            "context_window": 16385
        }
    },
    "groq": {
        "llama3-8b-8192": {"max_tokens": 4090, "temperature": 0.7},
        "gemma2-9b-it": {"max_tokens": 1024, "temperature": 0.7}
    }
}

def get_llm_response(prompt, provider, model):
    """獲取LLM回應的函數"""
    try:
        if provider == "openai":
            response = openai_client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": """你是一位資深的國文作文評閱委員，請依據以下評分規準進行評分：
                    1. 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
                    2. 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
                    3. 結構安排 (20%): 審視文章結構是否完整、條理是否分明
                    4. 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美

                    請依三等九級制（A+、A、A-、B+、B、B-、C+、C、C-）評分。
                    若有缺考、未作答、完全文不對題或作答內容完全照抄試題者，則給予0分。
                    """},
                    {"role": "user", "content": prompt}
                ],
                temperature=MODEL_CONFIGS["openai"][model]["temperature"],
                max_tokens=MODEL_CONFIGS["openai"][model]["max_tokens"]
            )
            return response.choices[0].message.content
        else:  # groq
            completion = groq_client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": "你是一位資深的國文作文評閱委員，請依據提供的評分規準進行評分。"},
                    {"role": "user", "content": prompt}
                ],
                **MODEL_CONFIGS["groq"][model],
                stream=False,
                top_p=1,
                stop=None
            )
            return completion.choices[0].message.content
    except Exception as e:
        return f"評分過程發生錯誤：{str(e)}"

def evaluate_essay(message, additional_prompt, provider, model):
    """評估作文的主函數"""
    if not message.strip():
        return [], gr.Markdown("### 請輸入作文內容進行評分")
    
    criteria = {
        '題旨發揮': {'weight': 0.4, 'max_score': 40},
        '資料掌握': {'weight': 0.2, 'max_score': 20},
        '結構安排': {'weight': 0.2, 'max_score': 20},
        '字句運用': {'weight': 0.2, 'max_score': 20}
    }
    
    grade_scores = {
        'A+': 95, 'A': 90, 'A-': 85,
        'B+': 80, 'B': 75, 'B-': 70,
        'C+': 65, 'C': 60, 'C-': 55,
        '0': 0
    }

    try:
        history = []
        total_score = 0
        history.append(("作文內容：", message))
        history.append(("", f"正在使用 {provider} ({model}) 進行評分分析..."))
        
        all_feedback = {}
        
        for criterion, details in criteria.items():
            prompt = f"""評估以下作文的{criterion}（權重{details['weight']*100}%）：

作文內容：
{message}

{additional_prompt if additional_prompt else ''}

請依三等九級制（A+、A、A-、B+、B、B-、C+、C、C-）評分，並提供詳細評語。
如果是缺考、未作答、完全文不對題或作答內容完全照抄試題，請給予0分。

請按以下格式回覆：
等第：
評語："""

            result = get_llm_response(prompt, provider, model)
            lines = result.lower().split('\n')
            grade = '0'
            comment = ""
            
            for line in lines:
                if '等第：' in line or '等第:' in line:
                    grade_text = line.split('：')[-1].strip().upper()
                    if grade_text in grade_scores:
                        grade = grade_text
                elif '評語：' in line or '評語:' in line:
                    comment = line.split('：')[-1].strip()
            
            weighted_score = (grade_scores[grade] / 100) * details['max_score']
            total_score += weighted_score
            
            feedback = f"### {criterion}\n"
            feedback += f"- **等第**：{grade}\n"
            feedback += f"- **得分**：{weighted_score:.1f}/{details['max_score']}\n"
            feedback += f"- **評語**：{comment}\n"
            
            all_feedback[criterion] = feedback

        # 添加各項評分到歷史記錄
        for criterion in criteria:
            history.append(("", all_feedback[criterion]))
        
        # 生成總評
        total_evaluation = """
### 綜合評語
本作文各項得分如下：
"""
        for criterion, details in criteria.items():
            total_evaluation += f"- {criterion}：{all_feedback[criterion].split('得分**：')[1].split('/')[0]}/{details['max_score']}\n"
        
        total_evaluation += f"\n### 總分：{total_score:.1f}/100"
        
        history.append(("", total_evaluation))
        
        total_score_display = f"""
# 總評分結果
## 使用模型：{provider} ({model})
## 總分：{total_score:.1f}/100
"""
        
        return history, gr.Markdown(total_score_display)
        
    except Exception as e:
        return [("", f"評分過程發生錯誤：{str(e)}")], gr.Markdown("### ❌ 評分失敗")

def update_model_choices(provider):
    """更新模型選擇的函數"""
    if provider == "openai":
        return gr.Dropdown(choices=OPENAI_MODELS, value="gpt-4o")
    else:
        return gr.Dropdown(choices=GROQ_MODELS, value="llama3-8b-8192")

# 建立 Gradio 介面
with gr.Blocks(title="國文作文自動評分系統") as demo:
    gr.Markdown("""
    # 國文作文自動評分系統
    ## 評分標準說明
    本系統採用以下四個面向進行評分：
    - 題旨發揮 (40%): 檢視作文是否切合題目要求、思路清晰、感發得宜、體悟深刻
    - 資料掌握 (20%): 評估材料運用是否恰當、例證是否充實
    - 結構安排 (20%): 審視文章結構是否完整、條理是否分明
    - 字句運用 (20%): 考察遣詞造句是否精確、修辭是否優美

    評分採用三等九級制（A+、A、A-、B+、B、B-、C+、C、C-）
    """)
    
    # 輸入區塊
    with gr.Column():
        msg = gr.Textbox(
            label="請輸入作文內容",
            placeholder="在此輸入作文...",
            lines=10
        )
        additional_prompt = gr.Textbox(
            label="額外評分提示（選填）",
            placeholder="可輸入額外的評分要求或提示...",
            lines=2
        )
    
    # 模型選擇
    with gr.Row():
        provider = gr.Radio(
            choices=["openai", "groq"],
            label="選擇服務提供者",
            value="openai"
        )
        model = gr.Dropdown(
            choices=OPENAI_MODELS,
            label="選擇模型",
            value="gpt-4o",
            interactive=True
        )
    
    # 當更換提供者時更新模型選項
    provider.change(
        fn=update_model_choices,
        inputs=provider,
        outputs=model
    )
    
    # 按鈕區
    with gr.Row():
        submit = gr.Button("開始評分", variant="primary", size="lg")
        clear = gr.Button("清除內容", size="lg")
    
    # 顯示區塊
    with gr.Row():
        # 左側顯示評分詳情
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(
                show_copy_button=True,
                render_markdown=True,
                height=600,
                label="評分詳情"
            )
        # 右側顯示總分
        with gr.Column(scale=1):
            score_display = gr.Markdown("### 等待評分...")
    
    # 事件處理
    submit.click(
        evaluate_essay,
        inputs=[msg, additional_prompt, provider, model],
        outputs=[chatbot, score_display]
    )
    clear.click(
        lambda: [None, None, "openai", "gpt-4o", [], gr.Markdown("### 等待評分...")],
        outputs=[msg, additional_prompt, provider, model, chatbot, score_display]
    )

# 啟動應用
if __name__ == "__main__":
    demo.launch()